Skip to content

Commit

Permalink
Add SIMD containsHTMLLineBreak
Browse files Browse the repository at this point in the history
https://bugs.webkit.org/show_bug.cgi?id=271878
rdar://125595924

Reviewed by Mark Lam.

This patch adds WTF::charactersContain SIMD function which scans entire string to check if one of character is included.
This function is aligned to charactersAreAllASCII. The intention of this new function is assuming that the given characters rarely
include specified characters. So this function super quickly scans entire string and returning the answer with SIMD.

* Source/WTF/wtf/text/StringCommon.h:
* Source/WebCore/html/TextFieldInputType.cpp:
(WebCore::limitLength):
(WebCore::TextFieldInputType::sanitizeValue const):
* Source/WebCore/html/parser/HTMLParserIdioms.h:
(WebCore::containsHTMLLineBreak):
* Tools/TestWebKitAPI/Tests/WTF/StringCommon.cpp:
(TestWebKitAPI::TEST(WTF_StringCommon, CharactersContain8)):
(TestWebKitAPI::TEST(WTF_StringCommon, CharactersContain16)):

Canonical link: https://commits.webkit.org/276842@main
  • Loading branch information
Constellation committed Mar 29, 2024
1 parent 2349fc0 commit 4a75c60
Show file tree
Hide file tree
Showing 4 changed files with 213 additions and 2 deletions.
92 changes: 92 additions & 0 deletions Source/WTF/wtf/text/StringCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -1163,10 +1163,102 @@ inline std::span<const char> span(const char* string)
return { string, string ? strlen(string) : 0 };
}

#if CPU(ARM64)

ALWAYS_INLINE uint8x16_t loadBulk(const uint8_t* ptr)
{
return vld1q_u8(ptr);
}

ALWAYS_INLINE uint16x8_t loadBulk(const uint16_t* ptr)
{
return vld1q_u16(ptr);
}

ALWAYS_INLINE uint8x16_t mergeBulk(uint8x16_t accumulated, uint8x16_t input)
{
return vorrq_u8(accumulated, input);
}

ALWAYS_INLINE uint16x8_t mergeBulk(uint16x8_t accumulated, uint16x8_t input)
{
return vorrq_u16(accumulated, input);
}

ALWAYS_INLINE bool isNonZeroBulk(uint8x16_t accumulated)
{
return vmaxvq_u8(accumulated);
}

ALWAYS_INLINE bool isNonZeroBulk(uint16x8_t accumulated)
{
return vmaxvq_u16(accumulated);
}

template<LChar character, LChar... characters>
ALWAYS_INLINE uint8x16_t compareBulk(uint8x16_t input)
{
auto result = vceqq_u8(input, vmovq_n_u8(character));
if constexpr (!sizeof...(characters))
return result;
else
return mergeBulk(result, compareBulk<characters...>(input));
}

template<UChar character, UChar... characters>
ALWAYS_INLINE uint16x8_t compareBulk(uint16x8_t input)
{
auto result = vceqq_u16(input, vmovq_n_u16(character));
if constexpr (!sizeof...(characters))
return result;
else
return mergeBulk(result, compareBulk<characters...>(input));
}

#endif

template<typename CharacterType, CharacterType... characters>
ALWAYS_INLINE bool compareEach(CharacterType input)
{
// Use | intentionally to reduce branches.
return (... | (input == characters));
}

template<typename CharacterType, CharacterType... characters>
ALWAYS_INLINE bool charactersContain(std::span<const CharacterType> span)
{
auto* data = span.data();
size_t length = span.size();

#if CPU(ARM64)
constexpr size_t stride = 16 / sizeof(CharacterType);
using UnsignedType = std::make_unsigned_t<CharacterType>;
using BulkType = decltype(loadBulk(static_cast<const UnsignedType*>(nullptr)));
if (length >= stride) {
size_t index = 0;
BulkType accumulated { };
for (; index + (stride - 1) < length; index += stride)
accumulated = mergeBulk(accumulated, compareBulk<characters...>(loadBulk(bitwise_cast<const UnsignedType*>(data + index))));

if (index < length)
accumulated = mergeBulk(accumulated, compareBulk<characters...>(loadBulk(bitwise_cast<const UnsignedType*>(data + length - stride))));

return isNonZeroBulk(accumulated);
}
#endif

for (const auto* end = data + length; data != end; ++data) {
if (compareEach<CharacterType, characters...>(*data))
return true;
}
return false;
}

}

using WTF::equalIgnoringASCIICase;
using WTF::equalLettersIgnoringASCIICase;
using WTF::isLatin1;
using WTF::span;
using WTF::span8;
using WTF::charactersContain;
8 changes: 6 additions & 2 deletions Source/WebCore/html/TextFieldInputType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -500,9 +500,10 @@ void TextFieldInputType::createDataListDropdownIndicator()

static String limitLength(const String& string, unsigned maxLength)
{
unsigned newLength = std::min(maxLength, string.length());
if (newLength == string.length())
if (LIKELY(string.length() <= maxLength))
return string;

unsigned newLength = maxLength;
if (newLength > 0 && U16_IS_LEAD(string[newLength - 1]))
--newLength;
return string.left(newLength);
Expand Down Expand Up @@ -585,6 +586,9 @@ static bool isAutoFillButtonTypeChanged(const AtomString& attribute, AutoFillBut

String TextFieldInputType::sanitizeValue(const String& proposedValue) const
{
if (LIKELY(!containsHTMLLineBreak(proposedValue)))
return limitLength(proposedValue, HTMLInputElement::maxEffectiveLength);

// Passing a lambda instead of a function name helps the compiler inline isHTMLLineBreak.
auto proposedValueWithoutLineBreaks = proposedValue.removeCharacters([](auto character) {
return isHTMLLineBreak(character);
Expand Down
7 changes: 7 additions & 0 deletions Source/WebCore/html/parser/HTMLParserIdioms.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,13 @@ inline bool isHTMLLineBreak(UChar character)
return character <= '\r' && (character == '\n' || character == '\r');
}

ALWAYS_INLINE bool containsHTMLLineBreak(StringView view)
{
if (view.is8Bit())
return charactersContain<LChar, '\r', '\n'>(view.span8());
return charactersContain<UChar, '\r', '\n'>(view.span16());
}

template<typename CharacterType> inline bool isComma(CharacterType character)
{
return character == ',';
Expand Down
108 changes: 108 additions & 0 deletions Tools/TestWebKitAPI/Tests/WTF/StringCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,4 +203,112 @@ TEST(WTF_StringCommon, CopyElements32To16)
EXPECT_EQ(destination[4096 + 4 + i], static_cast<uint16_t>(i));
}

TEST(WTF_StringCommon, CharactersContain8)
{
{
Vector<LChar> source;
EXPECT_FALSE((charactersContain<LChar, 0>(source.span())));
EXPECT_FALSE((charactersContain<LChar, 0, 1>(source.span())));
EXPECT_FALSE((charactersContain<LChar, 0, 1, 2>(source.span())));
}

{
Vector<LChar> source;
for (unsigned i = 0; i < 15; ++i)
source.append(i);
EXPECT_TRUE((charactersContain<LChar, 0>(source.span())));
EXPECT_TRUE((charactersContain<LChar, 1>(source.span())));
EXPECT_TRUE((charactersContain<LChar, 2>(source.span())));
EXPECT_TRUE((charactersContain<LChar, 2, 3>(source.span())));
EXPECT_TRUE((charactersContain<LChar, 16, 14>(source.span())));
EXPECT_FALSE((charactersContain<LChar, 16>(source.span())));
EXPECT_FALSE((charactersContain<LChar, 16, 15>(source.span())));
EXPECT_FALSE((charactersContain<LChar, 16, 15, 17>(source.span())));
EXPECT_FALSE((charactersContain<LChar, 16, 15, 17, 18>(source.span())));
EXPECT_FALSE((charactersContain<LChar, 0x81>(source.span())));
EXPECT_FALSE((charactersContain<LChar, 0x81, 0x82>(source.span())));
}

{
Vector<LChar> source;
for (unsigned i = 0; i < 250; ++i) {
if (i & 0x1)
source.append(i);
}
EXPECT_FALSE((charactersContain<LChar, 0>(source.span())));
EXPECT_FALSE((charactersContain<LChar, 0>(source.span())));
EXPECT_FALSE((charactersContain<LChar, 0xff>(source.span())));
EXPECT_TRUE((charactersContain<LChar, 0x81>(source.span())));
EXPECT_FALSE((charactersContain<LChar, 250>(source.span())));
EXPECT_TRUE((charactersContain<LChar, 249>(source.span())));
}
}

TEST(WTF_StringCommon, CharactersContain16)
{
{
Vector<UChar> source;
EXPECT_FALSE((charactersContain<UChar, 0>(source.span())));
EXPECT_FALSE((charactersContain<UChar, 0, 1>(source.span())));
EXPECT_FALSE((charactersContain<UChar, 0, 1, 2>(source.span())));
}

{
Vector<UChar> source;
for (unsigned i = 0; i < 15; ++i)
source.append(i);
EXPECT_TRUE((charactersContain<UChar, 0>(source.span())));
EXPECT_TRUE((charactersContain<UChar, 1>(source.span())));
EXPECT_TRUE((charactersContain<UChar, 2>(source.span())));
EXPECT_TRUE((charactersContain<UChar, 2, 3>(source.span())));
EXPECT_TRUE((charactersContain<UChar, 16, 14>(source.span())));
EXPECT_FALSE((charactersContain<UChar, 16>(source.span())));
EXPECT_FALSE((charactersContain<UChar, 16, 15>(source.span())));
EXPECT_FALSE((charactersContain<UChar, 16, 15, 17>(source.span())));
EXPECT_FALSE((charactersContain<UChar, 16, 15, 17, 18>(source.span())));
EXPECT_FALSE((charactersContain<UChar, 0x81>(source.span())));
EXPECT_FALSE((charactersContain<UChar, 0x81, 0x82>(source.span())));
}

{
Vector<UChar> source;
for (unsigned i = 0; i < 250; ++i) {
if (i & 0x1)
source.append(i);
}
EXPECT_FALSE((charactersContain<UChar, 0>(source.span())));
EXPECT_FALSE((charactersContain<UChar, 0xff>(source.span())));
EXPECT_TRUE((charactersContain<UChar, 0x81>(source.span())));
EXPECT_FALSE((charactersContain<UChar, 250>(source.span())));
EXPECT_TRUE((charactersContain<UChar, 249>(source.span())));
EXPECT_TRUE((charactersContain<UChar, 0, 249>(source.span())));
EXPECT_FALSE((charactersContain<UChar, 0x101>(source.span())));
EXPECT_FALSE((charactersContain<UChar, 0x1001>(source.span())));
EXPECT_FALSE((charactersContain<UChar, 0x1001, 0x1001>(source.span())));
}

{
Vector<UChar> source;
for (unsigned i = 0; i < 250; ++i) {
if (i & 0x1)
source.append(i + 0x1000);
}
EXPECT_FALSE((charactersContain<UChar, 0>(source.span())));
EXPECT_FALSE((charactersContain<UChar, 0>(source.span())));
EXPECT_FALSE((charactersContain<UChar, 0xff>(source.span())));
EXPECT_FALSE((charactersContain<UChar, 0x81>(source.span())));
EXPECT_FALSE((charactersContain<UChar, 250>(source.span())));
EXPECT_FALSE((charactersContain<UChar, 249>(source.span())));
EXPECT_FALSE((charactersContain<UChar, 0x101>(source.span())));
EXPECT_TRUE((charactersContain<UChar, 0x1001>(source.span())));
EXPECT_FALSE((charactersContain<UChar, 0x1000>(source.span())));
EXPECT_FALSE((charactersContain<UChar, 0x1100>(source.span())));
EXPECT_FALSE((charactersContain<UChar, 0x1000 + 256>(source.span())));
EXPECT_FALSE((charactersContain<UChar, 0x1000 + 250>(source.span())));
EXPECT_TRUE((charactersContain<UChar, 0x1000 + 249>(source.span())));
EXPECT_TRUE((charactersContain<UChar, 0x1000 + 249, 0>(source.span())));
EXPECT_FALSE((charactersContain<UChar, 0x1000 + 250, 0>(source.span())));
}
}

} // namespace

0 comments on commit 4a75c60

Please sign in to comment.