Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update HTMLFastPathParser::scanTagName() to return an ElementName #12270

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions Source/WebCore/dom/make_names.pl
Original file line number Diff line number Diff line change
Expand Up @@ -1048,6 +1048,8 @@ sub printElementNameHeaderFile
print F "} // namespace ElementNames\n";
print F "\n";
print F "ElementName findElementName(Namespace, const String&);\n";
print F "ElementName findHTMLElementName(Span<const LChar>);\n";
print F "ElementName findHTMLElementName(Span<const UChar>);\n";
print F "TagName tagNameForElement(ElementName);\n";
print F "ElementName elementNameForTag(Namespace, TagName);\n";
print F "const QualifiedName& qualifiedNameForElement(ElementName);\n";
Expand Down Expand Up @@ -1171,6 +1173,16 @@ sub printElementNameCppFile
print F " return findElementFromBuffer(ns, makeSpan(name.characters16(), name.length()));\n";
print F "}\n";
print F "\n";
print F "ElementName findHTMLElementName(Span<const LChar> buffer)\n";
print F "{\n";
print F " return findHTMLElement(buffer);\n";
print F "}\n";
print F "\n";
print F "ElementName findHTMLElementName(Span<const UChar> buffer)\n";
print F "{\n";
print F " return findHTMLElement(buffer);\n";
print F "}\n";
print F "\n";
print F "const QualifiedName& qualifiedNameForElement(ElementName elementName)\n";
print F "{\n";
print F " ASSERT(elementName != ElementName::Unknown);\n";
Expand Down
99 changes: 32 additions & 67 deletions Source/WebCore/html/parser/HTMLDocumentParserFastPath.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,36 +103,6 @@ template<class Char> static bool operator==(Span<const Char> span, ASCIILiteral
return WTF::equal(span.data(), s.characters8(), span.size());
}

#if ASSERT_ENABLED
static constexpr bool onlyContainsLowercaseASCIILetters(ASCIILiteral s)
{
for (size_t i = 0; i < s.length(); ++i) {
if (!isASCIILower(s[i]))
return false;
}
return true;
}
#endif // ASSERT_ENABLED

// A hash function that is just good enough to distinguish the supported tagNames. It needs to be
// adapted as soon as we have colliding tagNames. The implementation was chosen to map to a dense
// integer range to allow for compact switch jump-tables. If adding support for a new tag results
// in a collision, then pick a new function that minimizes the number of operations and results
// in a dense integer range.
static constexpr uint32_t tagNameHash(ASCIILiteral s)
{
// The fast-path parser only scans for letters in tagNames.
ASSERT_UNDER_CONSTEXPR_CONTEXT(onlyContainsLowercaseASCIILetters(s));
// This function is called with null-termined string, which should be used in the hash
// implementation, hence the -2.
return (s[0] + 17 * s[s.length() - 1]) & 63;
}

template<class Char> static constexpr uint32_t tagNameHash(Span<const Char> s)
{
return (s[0] + 17 * s[s.size() - 1]) & 63;
}

template<typename CharacterType> static inline bool isQuoteCharacter(CharacterType c)
{
return c == '"' || c == '\'';
Expand Down Expand Up @@ -334,7 +304,7 @@ class HTMLFastPathParser {
};

struct A : ContainerTag<HTMLAnchorElement, PermittedParents::FlowContent> {
static constexpr ASCIILiteral tagName = "a"_s;
static constexpr ElementName tagName = ElementName::HTML_a;

static RefPtr<Element> parseChild(HTMLFastPathParser& self)
{
Expand All @@ -347,7 +317,7 @@ class HTMLFastPathParser {
};

struct AWithPhrasingContent : ContainsPhrasingContentTag<HTMLAnchorElement, PermittedParents::PhrasingOrFlowContent> {
static constexpr ASCIILiteral tagName = "a"_s;
static constexpr ElementName tagName = ElementName::HTML_a;

static RefPtr<Element> parseChild(HTMLFastPathParser& self)
{
Expand All @@ -360,7 +330,7 @@ class HTMLFastPathParser {
};

struct B : ContainsPhrasingContentTag<HTMLElement, PermittedParents::PhrasingOrFlowContent> {
static constexpr ASCIILiteral tagName = "b"_s;
static constexpr ElementName tagName = ElementName::HTML_b;

static Ref<HTMLElement> create(Document& document)
{
Expand All @@ -369,19 +339,19 @@ class HTMLFastPathParser {
};

struct Br : VoidTag<HTMLBRElement, PermittedParents::PhrasingOrFlowContent> {
static constexpr ASCIILiteral tagName = "br"_s;
static constexpr ElementName tagName = ElementName::HTML_br;
};

struct Button : ContainsPhrasingContentTag<HTMLButtonElement, PermittedParents::PhrasingOrFlowContent> {
static constexpr ASCIILiteral tagName = "button"_s;
static constexpr ElementName tagName = ElementName::HTML_button;
};

struct Div : ContainerTag<HTMLDivElement, PermittedParents::FlowContent> {
static constexpr ASCIILiteral tagName = "div"_s;
static constexpr ElementName tagName = ElementName::HTML_div;
};

struct Footer : ContainerTag<HTMLDivElement, PermittedParents::FlowContent> {
static constexpr ASCIILiteral tagName = "footer"_s;
static constexpr ElementName tagName = ElementName::HTML_footer;

static Ref<HTMLElement> create(Document& document)
{
Expand All @@ -390,7 +360,7 @@ class HTMLFastPathParser {
};

struct I : ContainsPhrasingContentTag<HTMLElement, PermittedParents::PhrasingOrFlowContent> {
static constexpr ASCIILiteral tagName = "i"_s;
static constexpr ElementName tagName = ElementName::HTML_i;

static Ref<HTMLElement> create(Document& document)
{
Expand All @@ -399,7 +369,7 @@ class HTMLFastPathParser {
};

struct Input : VoidTag<HTMLInputElement, PermittedParents::PhrasingOrFlowContent> {
static constexpr ASCIILiteral tagName = "input"_s;
static constexpr ElementName tagName = ElementName::HTML_input;

static Ref<HTMLInputElement> create(Document& document)
{
Expand All @@ -408,15 +378,15 @@ class HTMLFastPathParser {
};

struct Li : ContainerTag<HTMLLIElement, PermittedParents::Special> {
static constexpr ASCIILiteral tagName = "li"_s;
static constexpr ElementName tagName = ElementName::HTML_li;
};

struct Label : ContainsPhrasingContentTag<HTMLLabelElement, PermittedParents::PhrasingOrFlowContent> {
static constexpr ASCIILiteral tagName = "label"_s;
static constexpr ElementName tagName = ElementName::HTML_label;
};

struct Option : ContainerTag<HTMLOptionElement, PermittedParents::Special> {
static constexpr ASCIILiteral tagName = "option"_s;
static constexpr ElementName tagName = ElementName::HTML_option;

static RefPtr<Element> parseChild(HTMLFastPathParser& self)
{
Expand All @@ -426,7 +396,7 @@ class HTMLFastPathParser {
};

struct Ol : ContainerTag<HTMLOListElement, PermittedParents::FlowContent> {
static constexpr ASCIILiteral tagName = "ol"_s;
static constexpr ElementName tagName = ElementName::HTML_ol;

static RefPtr<Element> parseChild(HTMLFastPathParser& self)
{
Expand All @@ -435,11 +405,11 @@ class HTMLFastPathParser {
};

struct P : ContainsPhrasingContentTag<HTMLParagraphElement, PermittedParents::FlowContent> {
static constexpr ASCIILiteral tagName = "p"_s;
static constexpr ElementName tagName = ElementName::HTML_p;
};

struct Select : ContainerTag<HTMLSelectElement, PermittedParents::PhrasingOrFlowContent> {
static constexpr ASCIILiteral tagName = "select"_s;
static constexpr ElementName tagName = ElementName::HTML_select;

static RefPtr<Element> parseChild(HTMLFastPathParser& self)
{
Expand All @@ -448,11 +418,11 @@ class HTMLFastPathParser {
};

struct Span : ContainsPhrasingContentTag<HTMLSpanElement, PermittedParents::PhrasingOrFlowContent> {
static constexpr ASCIILiteral tagName = "span"_s;
static constexpr ElementName tagName = ElementName::HTML_span;
};

struct Strong : ContainsPhrasingContentTag<HTMLElement, PermittedParents::PhrasingOrFlowContent> {
static constexpr ASCIILiteral tagName = "strong"_s;
static constexpr ElementName tagName = ElementName::HTML_strong;

static Ref<HTMLElement> create(Document& document)
{
Expand All @@ -461,7 +431,7 @@ class HTMLFastPathParser {
};

struct Ul : ContainerTag<HTMLUListElement, PermittedParents::FlowContent> {
static constexpr ASCIILiteral tagName = "ul"_s;
static constexpr ElementName tagName = ElementName::HTML_ul;

static RefPtr<Element> parseChild(HTMLFastPathParser& self)
{
Expand Down Expand Up @@ -525,7 +495,7 @@ class HTMLFastPathParser {
}

// Scan a tagName and convert to lowercase if necessary.
CharSpan scanTagName()
ElementName scanTagName()
{
auto* start = m_parsingBuffer.position();
skipWhile<isASCIILower>(m_parsingBuffer);
Expand All @@ -544,13 +514,13 @@ class HTMLFastPathParser {
m_charBuffer.append(c);
}
if (m_parsingBuffer.atEnd() || !isCharAfterTagNameOrAttribute(*m_parsingBuffer))
return didFail(HTMLFastPathResult::FailedParsingTagName, CharSpan { });
return didFail(HTMLFastPathResult::FailedParsingTagName, ElementName::Unknown);
skipWhile<isHTMLSpace>(m_parsingBuffer);
return CharSpan { m_charBuffer.data(), m_charBuffer.size() };
return findHTMLElementName({ m_charBuffer.data(), m_charBuffer.size() });
}
CharSpan result { start, static_cast<size_t>(m_parsingBuffer.position() - start) };
auto tagName = findHTMLElementName({ start, static_cast<size_t>(m_parsingBuffer.position() - start) });
skipWhile<isHTMLSpace>(m_parsingBuffer);
return result;
return tagName;
}

CharSpan scanAttributeName()
Expand Down Expand Up @@ -845,16 +815,16 @@ class HTMLFastPathParser {

template<class... Tags> RefPtr<Element> parseSpecificElements()
{
CharSpan tagName = scanTagName();
auto tagName = scanTagName();
return parseSpecificElements<Tags...>(tagName);
}

template<void* = nullptr> RefPtr<Element> parseSpecificElements(CharSpan)
template<void* = nullptr> RefPtr<Element> parseSpecificElements(ElementName)
{
return didFail(HTMLFastPathResult::FailedParsingSpecificElements, nullptr);
}

template<class Tag, class... OtherTags> RefPtr<Element> parseSpecificElements(CharSpan tagName)
template<class Tag, class... OtherTags> RefPtr<Element> parseSpecificElements(ElementName tagName)
{
if (tagName == Tag::tagName)
return parseElementAfterTagName<Tag>();
Expand All @@ -864,8 +834,6 @@ class HTMLFastPathParser {
template<bool nonPhrasingContent> RefPtr<Element> parseElement()
{
auto tagName = scanTagName();
if (tagName.empty())
return didFail(HTMLFastPathResult::FailedParsingElement, nullptr);

// HTML has complicated rules around auto-closing tags and re-parenting
// DOM nodes. We avoid complications with auto-closing rules by disallowing
Expand All @@ -877,16 +845,13 @@ class HTMLFastPathParser {
//
// If this switch has duplicate cases, then `tagNameHash()` needs to be
// updated.
switch (tagNameHash(tagName)) {
switch (tagName) {
#define TAG_CASE(TagName, TagClassName) \
case tagNameHash(TagInfo::TagClassName::tagName): \
case ElementName::HTML_ ## TagName: \
if (std::is_same_v<typename TagInfo::A, typename TagInfo::TagClassName>) \
goto caseA; \
if constexpr (nonPhrasingContent ? TagInfo::TagClassName::allowedInFlowContent() : TagInfo::TagClassName::allowedInPhrasingOrFlowContent()) { \
/* See comment in parse() for details on why equality is checked here */ \
if (tagName == TagInfo::TagClassName::tagName) \
return parseElementAfterTagName<typename TagInfo::TagClassName>(); \
} \
goto caseA; \
if constexpr (nonPhrasingContent ? TagInfo::TagClassName::allowedInFlowContent() : TagInfo::TagClassName::allowedInPhrasingOrFlowContent()) \
return parseElementAfterTagName<typename TagInfo::TagClassName>(); \
break;

FOR_EACH_SUPPORTED_TAG(TAG_CASE)
Expand All @@ -895,7 +860,7 @@ class HTMLFastPathParser {
caseA:
// <a> tags must not be nested, because HTML parsing would auto-close
// the outer one when encountering a nested one.
if (tagName == TagInfo::A::tagName && !m_insideOfTagA) {
if (!m_insideOfTagA) {
return nonPhrasingContent
? parseElementAfterTagName<typename TagInfo::A>()
: parseElementAfterTagName<typename TagInfo::AWithPhrasingContent>();
Expand Down