Skip to content

Commit

Permalink
Adopt SIMD in attribute and text scanning
Browse files Browse the repository at this point in the history
https://bugs.webkit.org/show_bug.cgi?id=273977
rdar://127843610

Reviewed by Mark Lam.

This patch integrates SIMD into attribute value and string scanning.
We also attach UNLIKELY / LIKELY to bailout paths.

* Source/WebCore/html/parser/HTMLDocumentParserFastPath.cpp:
(WebCore::HTMLFastPathParser::parseCompleteInput):
(WebCore::HTMLFastPathParser::scanText):
(WebCore::HTMLFastPathParser::scanTagName):
(WebCore::HTMLFastPathParser::scanAttributeValue):
(WebCore::HTMLFastPathParser::parseChildren):
(WebCore::HTMLFastPathParser::parseAttributes):
(WebCore::HTMLFastPathParser::parseContainerElement):

Canonical link: https://commits.webkit.org/278647@main
  • Loading branch information
Constellation committed May 11, 2024
1 parent c524a27 commit e8401be
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 36 deletions.
17 changes: 8 additions & 9 deletions Source/JavaScriptCore/runtime/LiteralParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -875,24 +875,23 @@ ALWAYS_INLINE TokenType LiteralParser<CharType>::Lexer::lexString(LiteralParserT
constexpr auto quoteMask = SIMD::splat(static_cast<UnsignedType>('"'));
constexpr auto escapeMask = SIMD::splat(static_cast<UnsignedType>('\\'));
constexpr auto controlMask = SIMD::splat(static_cast<UnsignedType>(' '));
for (; m_ptr + (stride - 1) < m_end; m_ptr += stride) {
auto input = SIMD::load(bitwise_cast<const UnsignedType*>(m_ptr));
auto match = [&](auto* cursor) ALWAYS_INLINE_LAMBDA {
auto input = SIMD::load(bitwise_cast<const UnsignedType*>(cursor));
auto quotes = SIMD::equal(input, quoteMask);
auto escapes = SIMD::equal(input, escapeMask);
auto controls = SIMD::lessThan(input, controlMask);
auto mask = SIMD::merge(quotes, SIMD::merge(escapes, controls));
if (auto index = SIMD::findFirstNonZeroIndex(mask)) {
return SIMD::findFirstNonZeroIndex(mask);
};

for (; m_ptr + (stride - 1) < m_end; m_ptr += stride) {
if (auto index = match(m_ptr)) {
m_ptr += index.value();
return;
}
}
if (m_ptr < m_end) {
auto input = SIMD::load(bitwise_cast<const UnsignedType*>(m_end - stride));
auto quotes = SIMD::equal(input, quoteMask);
auto escapes = SIMD::equal(input, escapeMask);
auto controls = SIMD::lessThan(input, controlMask);
auto mask = SIMD::merge(quotes, SIMD::merge(escapes, controls));
if (auto index = SIMD::findFirstNonZeroIndex(mask)) {
if (auto index = match(m_end - stride)) {
m_ptr = m_end - stride + index.value();
return;
}
Expand Down
2 changes: 1 addition & 1 deletion Source/WTF/wtf/text/StringParsingBuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class StringParsingBuffer final {

constexpr void setPosition(const CharacterType* position)
{
ASSERT(!m_data.empty());
ASSERT(position <= m_data.data() + m_data.size());
m_data = { position, m_data.data() + m_data.size() };
}

Expand Down
138 changes: 112 additions & 26 deletions Source/WebCore/html/parser/HTMLDocumentParserFastPath.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ class HTMLFastPathParser {
template<typename ParentTag> void parseCompleteInput()
{
parseChildren<ParentTag>(m_destinationParent.get());
if (m_parsingBuffer.hasCharactersRemaining())
if (UNLIKELY(m_parsingBuffer.hasCharactersRemaining()))
didFail(HTMLFastPathResult::FailedDidntReachEndOfInput);
}

Expand All @@ -477,21 +477,65 @@ class HTMLFastPathParser {
String scanText()
{
auto* start = m_parsingBuffer.position();
while (m_parsingBuffer.hasCharactersRemaining() && *m_parsingBuffer != '<') {
// '&' indicates escape sequences, '\r' might require
// https://infra.spec.whatwg.org/#normalize-newlines
if (*m_parsingBuffer == '&' || *m_parsingBuffer == '\r') {
m_parsingBuffer.setPosition(start);
return scanEscapedText();
auto* cursor = start;
const auto* end = start + m_parsingBuffer.lengthRemaining();
([&]() ALWAYS_INLINE_LAMBDA {
constexpr size_t stride = 16 / sizeof(CharacterType);
using UnsignedType = std::make_unsigned_t<CharacterType>;
if (static_cast<size_t>(end - cursor) >= stride) {
const auto quoteMask = SIMD::splat(static_cast<UnsignedType>('<'));
const auto escapeMask = SIMD::splat(static_cast<UnsignedType>('&'));
const auto newlineMask = SIMD::splat(static_cast<UnsignedType>('\r'));
const auto zeroMask = SIMD::splat(static_cast<UnsignedType>(0));

auto match = [&](auto* cursor) ALWAYS_INLINE_LAMBDA {
auto input = SIMD::load(bitwise_cast<const UnsignedType*>(cursor));
auto quotes = SIMD::equal(input, quoteMask);
auto escapes = SIMD::equal(input, escapeMask);
auto newlines = SIMD::equal(input, newlineMask);
auto zeros = SIMD::equal(input, zeroMask);
auto mask = SIMD::merge(zeros, SIMD::merge(quotes, SIMD::merge(escapes, newlines)));
return SIMD::findFirstNonZeroIndex(mask);
};

for (; cursor + (stride - 1) < end; cursor += stride) {
if (auto index = match(cursor)) {
cursor += index.value();
return;
}
}
if (cursor < end) {
if (auto index = match(end - stride)) {
cursor = end - stride + index.value();
return;
}
cursor = end;
}
return;
}
if (UNLIKELY(*m_parsingBuffer == '\0'))

for (; cursor != end; ++cursor) {
auto character = *cursor;
if (character == '<' || character == '&' || character == '\r' || character == '\0')
return;
}
}());
m_parsingBuffer.setPosition(cursor);

if (cursor != end) {
if (UNLIKELY(*cursor == '\0'))
return didFail(HTMLFastPathResult::FailedContainsNull, String());

m_parsingBuffer.advance();
if (*cursor == '&' || *cursor == '\r') {
m_parsingBuffer.setPosition(start);
return scanEscapedText();
}
}
unsigned length = m_parsingBuffer.position() - start;

unsigned length = cursor - start;
if (UNLIKELY(length >= Text::defaultLengthLimit))
return didFail(HTMLFastPathResult::FailedBigText, String());

return length ? String({ start, length }) : String();
}

Expand Down Expand Up @@ -540,7 +584,7 @@ class HTMLFastPathParser {
m_parsingBuffer.advance();
m_charBuffer.append(c);
}
if (m_parsingBuffer.atEnd() || !isCharAfterTagNameOrAttribute(*m_parsingBuffer))
if (UNLIKELY(m_parsingBuffer.atEnd() || !isCharAfterTagNameOrAttribute(*m_parsingBuffer)))
return didFail(HTMLFastPathResult::FailedParsingTagName, ElementName::Unknown);
skipWhile<isASCIIWhitespace>(m_parsingBuffer);
return findHTMLElementName(m_charBuffer.span());
Expand Down Expand Up @@ -596,22 +640,64 @@ class HTMLFastPathParser {
if (m_parsingBuffer.hasCharactersRemaining() && isQuoteCharacter(*m_parsingBuffer)) {
auto quoteChar = m_parsingBuffer.consume();
start = m_parsingBuffer.position();
for (; m_parsingBuffer.hasCharactersRemaining() && *m_parsingBuffer != quoteChar; m_parsingBuffer.advance()) {
if (*m_parsingBuffer == '&' || *m_parsingBuffer == '\r') {
m_parsingBuffer.setPosition(start - 1);
return scanEscapedAttributeValue();
auto* cursor = start;
const auto* end = start + m_parsingBuffer.lengthRemaining();
([&]() ALWAYS_INLINE_LAMBDA {
constexpr size_t stride = 16 / sizeof(CharacterType);
using UnsignedType = std::make_unsigned_t<CharacterType>;
if (static_cast<size_t>(end - cursor) >= stride) {
const auto quoteMask = SIMD::splat(static_cast<UnsignedType>(quoteChar));
const auto escapeMask = SIMD::splat(static_cast<UnsignedType>('&'));
const auto newlineMask = SIMD::splat(static_cast<UnsignedType>('\r'));

auto match = [&](auto* cursor) ALWAYS_INLINE_LAMBDA {
auto input = SIMD::load(bitwise_cast<const UnsignedType*>(cursor));
auto quotes = SIMD::equal(input, quoteMask);
auto escapes = SIMD::equal(input, escapeMask);
auto newlines = SIMD::equal(input, newlineMask);
auto mask = SIMD::merge(quotes, SIMD::merge(escapes, newlines));
return SIMD::findFirstNonZeroIndex(mask);
};

for (; cursor + (stride - 1) < end; cursor += stride) {
if (auto index = match(cursor)) {
cursor += index.value();
return;
}
}
if (cursor < end) {
if (auto index = match(end - stride)) {
cursor = end - stride + index.value();
return;
}
cursor = end;
}
return;
}
}
if (m_parsingBuffer.atEnd())

for (; cursor != end; ++cursor) {
auto character = *cursor;
if (character == quoteChar || character == '&' || character == '\r')
return;
}
}());

if (UNLIKELY(cursor == end))
return didFail(HTMLFastPathResult::FailedParsingQuotedAttributeValue, emptyAtom());

length = m_parsingBuffer.position() - start;
if (m_parsingBuffer.consume() != quoteChar)
length = cursor - start;
if (UNLIKELY(*cursor != quoteChar)) {
if (LIKELY(*cursor == '&' || *cursor == '\r')) {
m_parsingBuffer.setPosition(start - 1);
return scanEscapedAttributeValue();
}
return didFail(HTMLFastPathResult::FailedParsingQuotedAttributeValue, emptyAtom());
}
m_parsingBuffer.setPosition(cursor + 1);
} else {
skipWhile<isValidUnquotedAttributeValueChar>(m_parsingBuffer);
length = m_parsingBuffer.position() - start;
if (m_parsingBuffer.atEnd() || !isCharAfterUnquotedAttribute(*m_parsingBuffer))
if (UNLIKELY(m_parsingBuffer.atEnd() || !isCharAfterUnquotedAttribute(*m_parsingBuffer)))
return didFail(HTMLFastPathResult::FailedParsingUnquotedAttributeValue, emptyAtom());
}
return HTMLNameCache::makeAttributeValue({ start, length });
Expand Down Expand Up @@ -700,7 +786,7 @@ class HTMLFastPathParser {
// We assume that we found the closing tag. The tagName will be checked by the caller `parseContainerElement()`.
return;
}
if (++m_elementDepth == Settings::defaultMaximumHTMLParserDOMTreeDepth)
if (UNLIKELY(++m_elementDepth == Settings::defaultMaximumHTMLParserDOMTreeDepth))
return didFail(HTMLFastPathResult::FailedMaxDepth);
auto child = ParentTag::parseChild(parent, *this);
--m_elementDepth;
Expand All @@ -719,15 +805,15 @@ class HTMLFastPathParser {
while (true) {
auto attributeName = scanAttributeName();
if (attributeName == nullQName()) {
if (m_parsingBuffer.hasCharactersRemaining()) {
if (LIKELY(m_parsingBuffer.hasCharactersRemaining())) {
if (*m_parsingBuffer == '>') {
m_parsingBuffer.advance();
break;
}
if (*m_parsingBuffer == '/') {
m_parsingBuffer.advance();
skipWhile<isASCIIWhitespace>(m_parsingBuffer);
if (m_parsingBuffer.atEnd() || m_parsingBuffer.consume() != '>')
if (UNLIKELY(m_parsingBuffer.atEnd() || m_parsingBuffer.consume() != '>'))
return didFail(HTMLFastPathResult::FailedParsingAttributes);
break;
}
Expand Down Expand Up @@ -847,7 +933,7 @@ class HTMLFastPathParser {
parent.parserAppendChild(element);
element->beginParsingChildren();
parseChildren<Tag>(element);
if (parsingFailed() || m_parsingBuffer.atEnd())
if (UNLIKELY(parsingFailed() || m_parsingBuffer.atEnd()))
return didFail(HTMLFastPathResult::FailedEndOfInputReachedForContainer, element);

// parseChildren<Tag>(element) stops after the (hopefully) closing tag's `<`
Expand All @@ -856,12 +942,12 @@ class HTMLFastPathParser {
m_parsingBuffer.advance();

if (UNLIKELY(!skipCharactersExactly(m_parsingBuffer, Tag::tagNameCharacters))) {
if (!skipLettersExactlyIgnoringASCIICase(m_parsingBuffer, Tag::tagNameCharacters))
if (UNLIKELY(!skipLettersExactlyIgnoringASCIICase(m_parsingBuffer, Tag::tagNameCharacters)))
return didFail(HTMLFastPathResult::FailedEndTagNameMismatch, element);
}
skipWhile<isASCIIWhitespace>(m_parsingBuffer);

if (m_parsingBuffer.atEnd() || m_parsingBuffer.consume() != '>')
if (UNLIKELY(m_parsingBuffer.atEnd() || m_parsingBuffer.consume() != '>'))
return didFail(HTMLFastPathResult::FailedUnexpectedTagNameCloseState, element);

element->finishParsingChildren();
Expand Down

0 comments on commit e8401be

Please sign in to comment.