From 31005e5e9299e6a7957bb30a85458a77a719ddad Mon Sep 17 00:00:00 2001 From: Yusuke Suzuki Date: Fri, 10 May 2024 18:50:28 -0700 Subject: [PATCH] Use find8 and find16 in AdaptiveStringSearcher https://bugs.webkit.org/show_bug.cgi?id=274010 rdar://127894478 Reviewed by Mark Lam. We should just use find8 and find16 in AdaptiveStringSearcher. Since we have find16, we do not need to use memchr hack here. * Source/WTF/wtf/text/AdaptiveStringSearcher.h: (WTF::AdaptiveStringSearcherBase::findFirstCharacter): (WTF::AdaptiveStringSearcherBase::alignDown): Deleted. (WTF::AdaptiveStringSearcherBase::getHighestValueByte): Deleted. Canonical link: https://commits.webkit.org/278644@main --- Source/WTF/wtf/text/AdaptiveStringSearcher.h | 55 ++++++-------------- 1 file changed, 17 insertions(+), 38 deletions(-) diff --git a/Source/WTF/wtf/text/AdaptiveStringSearcher.h b/Source/WTF/wtf/text/AdaptiveStringSearcher.h index 243a684b55ed..5a6c44444b61 100644 --- a/Source/WTF/wtf/text/AdaptiveStringSearcher.h +++ b/Source/WTF/wtf/text/AdaptiveStringSearcher.h @@ -65,51 +65,30 @@ class AdaptiveStringSearcherBase { static constexpr bool exceedsOneByte(LChar) { return false; } static constexpr bool exceedsOneByte(UChar c) { return c > 0xff; } - template - static inline T alignDown(T value, U alignment) - { - return reinterpret_cast((reinterpret_cast(value) & ~(alignment - 1))); - } - - static constexpr uint8_t getHighestValueByte(LChar character) { return character; } - - static constexpr uint8_t getHighestValueByte(UChar character) - { - return std::max(static_cast(character & 0xFF), static_cast(character >> 8)); - } - template static inline int findFirstCharacter(std::span pattern, std::span subject, int index) { const auto* subjectPtr = subject.data(); - const PatternChar patternFirstChar = pattern[0]; - const int maxN = (subject.size() - pattern.size() + 1); + PatternChar patternFirstChar = pattern[0]; - if (sizeof(SubjectChar) == 2 && !patternFirstChar) { - // Special-case looking for the 0 char in other than one-byte strings. - // memchr mostly fails in this case due to every other byte being 0 in text - // that is mostly ascii characters. - for (int i = index; i < maxN; ++i) { - if (!subjectPtr[i]) - return i; - } - return -1; - } - const uint8_t searchByte = getHighestValueByte(patternFirstChar); - const SubjectChar searchChar = static_cast(patternFirstChar); - int pos = index; - do { - ASSERT(maxN - pos >= 0); - const SubjectChar* charPos = reinterpret_cast(memchr(subjectPtr + pos, searchByte, (maxN - pos) * sizeof(SubjectChar))); - if (charPos == nullptr) + if constexpr (sizeof(PatternChar) == 2 && sizeof(SubjectChar) == 1) { + if (!isLatin1(patternFirstChar)) return -1; - charPos = alignDown(charPos, sizeof(SubjectChar)); - pos = static_cast(charPos - subjectPtr); - if (subjectPtr[pos] == searchChar) - return pos; - } while (++pos < maxN); + } - return -1; + const int maxN = (subject.size() - pattern.size() + 1); + const SubjectChar searchCharacter = static_cast(patternFirstChar); + const auto* start = subjectPtr + index; + const auto searchLength = maxN - index; + const SubjectChar* charPos = nullptr; + ASSERT(maxN - index >= 0); + if constexpr (sizeof(SubjectChar) == 2) + charPos = bitwise_cast(find16(bitwise_cast(start), searchCharacter, searchLength)); + else + charPos = bitwise_cast(find8(bitwise_cast(start), searchCharacter, searchLength)); + if (charPos == nullptr) + return -1; + return static_cast(charPos - subjectPtr); } };