Skip to content
Permalink
Browse files
[JSC] Add BoyerMooreHorspool search to DFG / FTL
https://bugs.webkit.org/show_bug.cgi?id=244924

Reviewed by Alexey Shvayka.

In DFG / FTL, it is possible that we can find what string is used for searching in String#replace.
So, in compiler, we can construct preprocessed table so that we can make String#replace faster.

In this patch, we deploy Boyer-Moore-Horspool (BMH) search[1], which allows large skips for matching failures for substring,
accelerate the matching significantly. We use BMH since it offers good memory-performance tradeoff.

Currently, we deploy it only for pattern which is smaller than or equal to 256 length for table size efficiency.
We can extend it to 16bit offset table, 32bit offset table. But we consider it as a future work.

In microbenchmarks, we observed 43% improvement. We also found that this is 0.2-0.3% Speedometer2.1 improvement,
in particular Vanilla families get 1-2% improvement with high confidence.

                                         ToT                     Patched

    string-replace-benchmark      121.2814+-0.6856     ^     84.5948+-0.6012        ^ definitely 1.4337x faster

[1]: https://en.wikipedia.org/wiki/Boyer–Moore–Horspool_algorithm

* JSTests/microbenchmarks/string-replace-benchmark.js: Added.
(fill):
(test.template.li.data.id.string_appeared_here.div.input):
(test.template.li.data.id.string_appeared_here.div):
(test.label.button):
* Source/JavaScriptCore/dfg/DFGCommonData.h:
* Source/JavaScriptCore/dfg/DFGGraph.h:
* Source/JavaScriptCore/dfg/DFGJITCompiler.cpp:
(JSC::DFG::JITCompiler::link):
* Source/JavaScriptCore/dfg/DFGOperations.cpp:
(JSC::DFG::stringReplaceStringString):
(JSC::DFG::JSC_DEFINE_JIT_OPERATION):
* Source/JavaScriptCore/dfg/DFGOperations.h:
* Source/JavaScriptCore/dfg/DFGSpeculativeJIT.cpp:
* Source/JavaScriptCore/ftl/FTLLink.cpp:
(JSC::FTL::link):
* Source/JavaScriptCore/ftl/FTLLowerDFGToB3.cpp:
(JSC::FTL::DFG::LowerDFGToB3::compileCompareStrictEq):
* Source/WTF/WTF.xcodeproj/project.pbxproj:
* Source/WTF/wtf/CMakeLists.txt:
* Source/WTF/wtf/text/ASCIILiteral.h:
(WTF::ASCIILiteral::length const):
* Source/WTF/wtf/text/StringSearch.h: Added.
(WTF::BoyerMooreHorspoolTable::BoyerMooreHorspoolTable):
(WTF::BoyerMooreHorspoolTable::find const):
(WTF::BoyerMooreHorspoolTable::findInner const):
* Tools/TestWebKitAPI/CMakeLists.txt:
* Tools/TestWebKitAPI/TestWebKitAPI.xcodeproj/project.pbxproj:
* Tools/TestWebKitAPI/Tests/WTF/StringSearch.cpp: Added.
(TestWebKitAPI::TEST):

Canonical link: https://commits.webkit.org/254342@main
  • Loading branch information
Constellation committed Sep 10, 2022
1 parent 533f50c commit d6426c81236a2be90f2454d85ebf155d2e5beb4e
Show file tree
Hide file tree
Showing 17 changed files with 367 additions and 18 deletions.
@@ -0,0 +1,19 @@
function fill(template, title, completed, checked)
{
return template.replace("{{title}}", title).replace("{{completed}}", completed).replace("{{checked}}", checked);
}
noInline(fill);

function test()
{
for (var i = 55; i < 100; ++i) {
for (var j = 0; j < i; ++j) {
var template = `<li data-id="${j + 1}" class="{{completed}}"><div class="view"><input class="toggle" type="checkbox" {{checked}}><label>{{title}}</label><button class="destroy"></button></div></li>`;
fill(template, `Something to do ${j}`, "", "");
}
}
}
noInline(test);

for (var i = 0; i < 100; ++i)
test();
@@ -27,7 +27,7 @@ CODE_SIGN_IDENTITY = -;

ALWAYS_SEARCH_USER_PATHS = NO;

CLANG_CXX_LANGUAGE_STANDARD = gnu++1z;
CLANG_CXX_LANGUAGE_STANDARD = gnu++2a;
CLANG_CXX_LIBRARY = libc++;
CLANG_ENABLE_OBJC_WEAK = YES;
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
@@ -45,6 +45,7 @@
#include "YarrJIT.h"
#include <wtf/Bag.h>
#include <wtf/Noncopyable.h>
#include <wtf/text/StringSearch.h>

namespace JSC {

@@ -133,6 +134,7 @@ class CommonData : public MathICHolder {
std::unique_ptr<PCToCodeOriginMap> m_pcToCodeOriginMap;
RecordedStatuses recordedStatuses;
FixedVector<JumpReplacement> m_jumpReplacements;
FixedVector<std::unique_ptr<BoyerMooreHorspoolTable<uint8_t>>> m_stringSearchTable8;
Bag<StructureStubInfo> m_stubInfos;
Bag<OptimizingCallLinkInfo> m_callLinkInfos;
Yarr::YarrBoyerMooreData m_boyerMooreData;
@@ -1105,6 +1105,18 @@ class Graph final : public virtual Scannable {

void freeDFGIRAfterLowering();

const BoyerMooreHorspoolTable<uint8_t>* tryAddStringSearchTable8(const String& string)
{
constexpr unsigned minPatternLength = 9;
if (string.length() > BoyerMooreHorspoolTable<uint8_t>::maxPatternLength)
return nullptr;
if (string.length() < minPatternLength)
return nullptr;
return m_stringSearchTable8.ensure(string, [&]() {
return makeUnique<BoyerMooreHorspoolTable<uint8_t>>(string);
}).iterator->value.get();
}

StackCheck m_stackChecker;
VM& m_vm;
Plan& m_plan;
@@ -1120,6 +1132,7 @@ class Graph final : public virtual Scannable {
Vector<SimpleJumpTable> m_switchJumpTables;
Vector<const UnlinkedStringJumpTable*> m_unlinkedStringSwitchJumpTables;
Vector<StringJumpTable> m_stringSwitchJumpTables;
HashMap<String, std::unique_ptr<BoyerMooreHorspoolTable<uint8_t>>> m_stringSearchTable8;

HashMap<EncodedJSValue, FrozenValue*, EncodedJSValueHash, EncodedJSValueHashTraits> m_frozenValueMap;
Bag<FrozenValue> m_frozenValues;
@@ -191,6 +191,13 @@ void JITCompiler::link(LinkBuffer& linkBuffer)

if (!m_graph.m_plan.inlineCallFrames()->isEmpty())
m_jitCode->common.inlineCallFrames = m_graph.m_plan.inlineCallFrames();
if (!m_graph.m_stringSearchTable8.isEmpty()) {
FixedVector<std::unique_ptr<BoyerMooreHorspoolTable<uint8_t>>> tables(m_graph.m_stringSearchTable8.size());
unsigned index = 0;
for (auto& entry : m_graph.m_stringSearchTable8)
tables[index++] = WTFMove(entry.value);
m_jitCode->common.m_stringSearchTable8 = WTFMove(tables);
}

#if USE(JSVALUE32_64)
m_jitCode->common.doubleConstants = WTFMove(m_graph.m_doubleConstants);
@@ -2506,15 +2506,19 @@ JSC_DEFINE_JIT_OPERATION(operationStringValueOf, JSString*, (JSGlobalObject* glo
return nullptr;
}


enum class StringReplaceSubstitutions : bool { Yes, No };
template<StringReplaceSubstitutions substitutions>
static ALWAYS_INLINE JSString* stringReplaceStringString(JSGlobalObject* globalObject, JSString* stringCell, String string, String search, String replacement)
enum class StringReplaceUseTable : bool { Yes, No };
template<StringReplaceSubstitutions substitutions, StringReplaceUseTable useTable, typename TableType>
static ALWAYS_INLINE JSString* stringReplaceStringString(JSGlobalObject* globalObject, JSString* stringCell, String string, String search, String replacement, const TableType* table)
{
VM& vm = globalObject->vm();
auto scope = DECLARE_THROW_SCOPE(vm);

size_t matchStart = string.find(search);
size_t matchStart;
if constexpr (useTable == StringReplaceUseTable::Yes)
matchStart = table->find(string, search);
else
matchStart = string.find(search);
if (matchStart == notFound)
return stringCell;

@@ -2559,7 +2563,7 @@ JSC_DEFINE_JIT_OPERATION(operationStringReplaceStringString, JSString*, (JSGloba
String replacement = replacementCell->value(globalObject);
RETURN_IF_EXCEPTION(scope, nullptr);

RELEASE_AND_RETURN(scope, stringReplaceStringString<StringReplaceSubstitutions::Yes>(globalObject, stringCell, WTFMove(string), WTFMove(search), WTFMove(replacement)));
RELEASE_AND_RETURN(scope, (stringReplaceStringString<StringReplaceSubstitutions::Yes, StringReplaceUseTable::No, BoyerMooreHorspoolTable<uint8_t>>(globalObject, stringCell, WTFMove(string), WTFMove(search), WTFMove(replacement), nullptr)));
}

JSC_DEFINE_JIT_OPERATION(operationStringReplaceStringStringWithoutSubstitution, JSString*, (JSGlobalObject* globalObject, JSString* stringCell, JSString* searchCell, JSString* replacementCell))
@@ -2578,7 +2582,7 @@ JSC_DEFINE_JIT_OPERATION(operationStringReplaceStringStringWithoutSubstitution,
String replacement = replacementCell->value(globalObject);
RETURN_IF_EXCEPTION(scope, nullptr);

RELEASE_AND_RETURN(scope, stringReplaceStringString<StringReplaceSubstitutions::No>(globalObject, stringCell, WTFMove(string), WTFMove(search), WTFMove(replacement)));
RELEASE_AND_RETURN(scope, (stringReplaceStringString<StringReplaceSubstitutions::No, StringReplaceUseTable::No, BoyerMooreHorspoolTable<uint8_t>>(globalObject, stringCell, WTFMove(string), WTFMove(search), WTFMove(replacement), nullptr)));
}

JSC_DEFINE_JIT_OPERATION(operationStringReplaceStringEmptyString, JSString*, (JSGlobalObject* globalObject, JSString* stringCell, JSString* searchCell))
@@ -2610,6 +2614,73 @@ JSC_DEFINE_JIT_OPERATION(operationStringReplaceStringEmptyString, JSString*, (JS
return jsString(vm, WTFMove(result));
}

JSC_DEFINE_JIT_OPERATION(operationStringReplaceStringStringWithTable8, JSString*, (JSGlobalObject* globalObject, JSString* stringCell, JSString* searchCell, JSString* replacementCell, const BoyerMooreHorspoolTable<uint8_t>* table))
{
VM& vm = globalObject->vm();
CallFrame* callFrame = DECLARE_CALL_FRAME(vm);
JITOperationPrologueCallFrameTracer tracer(vm, callFrame);
auto scope = DECLARE_THROW_SCOPE(vm);

String string = stringCell->value(globalObject);
RETURN_IF_EXCEPTION(scope, nullptr);

String search = searchCell->value(globalObject);
RETURN_IF_EXCEPTION(scope, nullptr);

String replacement = replacementCell->value(globalObject);
RETURN_IF_EXCEPTION(scope, nullptr);

RELEASE_AND_RETURN(scope, (stringReplaceStringString<StringReplaceSubstitutions::Yes, StringReplaceUseTable::Yes>(globalObject, stringCell, WTFMove(string), WTFMove(search), WTFMove(replacement), table)));
}

JSC_DEFINE_JIT_OPERATION(operationStringReplaceStringStringWithoutSubstitutionWithTable8, JSString*, (JSGlobalObject* globalObject, JSString* stringCell, JSString* searchCell, JSString* replacementCell, const BoyerMooreHorspoolTable<uint8_t>* table))
{
VM& vm = globalObject->vm();
CallFrame* callFrame = DECLARE_CALL_FRAME(vm);
JITOperationPrologueCallFrameTracer tracer(vm, callFrame);
auto scope = DECLARE_THROW_SCOPE(vm);

String string = stringCell->value(globalObject);
RETURN_IF_EXCEPTION(scope, nullptr);

String search = searchCell->value(globalObject);
RETURN_IF_EXCEPTION(scope, nullptr);

String replacement = replacementCell->value(globalObject);
RETURN_IF_EXCEPTION(scope, nullptr);

RELEASE_AND_RETURN(scope, (stringReplaceStringString<StringReplaceSubstitutions::No, StringReplaceUseTable::Yes>(globalObject, stringCell, WTFMove(string), WTFMove(search), WTFMove(replacement), table)));
}

JSC_DEFINE_JIT_OPERATION(operationStringReplaceStringEmptyStringWithTable8, JSString*, (JSGlobalObject* globalObject, JSString* stringCell, JSString* searchCell, const BoyerMooreHorspoolTable<uint8_t>* table))
{
VM& vm = globalObject->vm();
CallFrame* callFrame = DECLARE_CALL_FRAME(vm);
JITOperationPrologueCallFrameTracer tracer(vm, callFrame);
auto scope = DECLARE_THROW_SCOPE(vm);

String string = stringCell->value(globalObject);
RETURN_IF_EXCEPTION(scope, nullptr);

String search = searchCell->value(globalObject);
RETURN_IF_EXCEPTION(scope, nullptr);

size_t matchStart = table->find(string, search);
if (matchStart == notFound)
return stringCell;

// Because replacement string is empty, it cannot include backreferences.
size_t searchLength = search.length();
size_t matchEnd = matchStart + searchLength;
auto result = tryMakeString(StringView(string).substring(0, matchStart), StringView(string).substring(matchEnd, string.length() - matchEnd));
if (UNLIKELY(!result)) {
throwOutOfMemoryError(globalObject, scope);
return nullptr;
}

return jsString(vm, WTFMove(result));
}

JSC_DEFINE_JIT_OPERATION(operationStringSubstr, JSCell*, (JSGlobalObject* globalObject, JSCell* cell, int32_t from, int32_t span))
{
VM& vm = globalObject->vm();
@@ -30,6 +30,7 @@
#include "DFGArithMode.h"
#include "JITOperations.h"
#include "TypedArrayType.h"
#include <wtf/text/StringSearch.h>

namespace JSC {

@@ -245,6 +246,9 @@ JSC_DECLARE_JIT_OPERATION(operationStringValueOf, JSString*, (JSGlobalObject*, E
JSC_DECLARE_JIT_OPERATION(operationStringReplaceStringString, JSString*, (JSGlobalObject*, JSString*, JSString*, JSString*));
JSC_DECLARE_JIT_OPERATION(operationStringReplaceStringStringWithoutSubstitution, JSString*, (JSGlobalObject*, JSString*, JSString*, JSString*));
JSC_DECLARE_JIT_OPERATION(operationStringReplaceStringEmptyString, JSString*, (JSGlobalObject*, JSString*, JSString*));
JSC_DECLARE_JIT_OPERATION(operationStringReplaceStringStringWithTable8, JSString*, (JSGlobalObject*, JSString*, JSString*, JSString*, const BoyerMooreHorspoolTable<uint8_t>*));
JSC_DECLARE_JIT_OPERATION(operationStringReplaceStringStringWithoutSubstitutionWithTable8, JSString*, (JSGlobalObject*, JSString*, JSString*, JSString*, const BoyerMooreHorspoolTable<uint8_t>*));
JSC_DECLARE_JIT_OPERATION(operationStringReplaceStringEmptyStringWithTable8, JSString*, (JSGlobalObject*, JSString*, JSString*, const BoyerMooreHorspoolTable<uint8_t>*));
JSC_DECLARE_JIT_OPERATION(operationToLowerCase, JSString*, (JSGlobalObject*, JSString*, uint32_t));

JSC_DECLARE_JIT_OPERATION(operationInt32ToString, char*, (JSGlobalObject*, int32_t, int32_t));
@@ -13319,9 +13319,14 @@ void SpeculativeJIT::compileStringReplace(Node* node)
&& node->child1().useKind() == StringUse
&& node->child2().useKind() == StringUse
&& node->child3().useKind() == StringUse) {
String replacement = node->child3()->tryGetString(m_graph);
if (!!replacement) {
if (!replacement.length()) {
const BoyerMooreHorspoolTable<uint8_t>* tablePointer = nullptr;
String searchString = node->child2()->tryGetString(m_graph);
if (!!searchString)
tablePointer = m_graph.tryAddStringSearchTable8(searchString);

String replacementString = node->child3()->tryGetString(m_graph);
if (!!replacementString) {
if (!replacementString.length()) {
SpeculateCellOperand string(this, node->child1());
SpeculateCellOperand search(this, node->child2());
GPRReg stringGPR = string.gpr();
@@ -13331,13 +13336,16 @@ void SpeculativeJIT::compileStringReplace(Node* node)

flushRegisters();
GPRFlushedCallResult result(this);
callOperation(operationStringReplaceStringEmptyString, result.gpr(), JITCompiler::LinkableConstant(m_jit, m_graph.globalObjectFor(node->origin.semantic)), stringGPR, searchGPR);
if (tablePointer)
callOperation(operationStringReplaceStringEmptyStringWithTable8, result.gpr(), JITCompiler::LinkableConstant(m_jit, m_graph.globalObjectFor(node->origin.semantic)), stringGPR, searchGPR, TrustedImmPtr(tablePointer));
else
callOperation(operationStringReplaceStringEmptyString, result.gpr(), JITCompiler::LinkableConstant(m_jit, m_graph.globalObjectFor(node->origin.semantic)), stringGPR, searchGPR);
m_jit.exceptionCheck();
cellResult(result.gpr(), node);
return;
}

if (replacement.find('$') == notFound) {
if (replacementString.find('$') == notFound) {
SpeculateCellOperand string(this, node->child1());
SpeculateCellOperand search(this, node->child2());
SpeculateCellOperand replace(this, node->child3());
@@ -13350,7 +13358,10 @@ void SpeculativeJIT::compileStringReplace(Node* node)

flushRegisters();
GPRFlushedCallResult result(this);
callOperation(operationStringReplaceStringStringWithoutSubstitution, result.gpr(), JITCompiler::LinkableConstant(m_jit, m_graph.globalObjectFor(node->origin.semantic)), stringGPR, searchGPR, replaceGPR);
if (tablePointer)
callOperation(operationStringReplaceStringStringWithoutSubstitutionWithTable8, result.gpr(), JITCompiler::LinkableConstant(m_jit, m_graph.globalObjectFor(node->origin.semantic)), stringGPR, searchGPR, replaceGPR, TrustedImmPtr(tablePointer));
else
callOperation(operationStringReplaceStringStringWithoutSubstitution, result.gpr(), JITCompiler::LinkableConstant(m_jit, m_graph.globalObjectFor(node->origin.semantic)), stringGPR, searchGPR, replaceGPR);
m_jit.exceptionCheck();
cellResult(result.gpr(), node);
return;
@@ -13369,7 +13380,10 @@ void SpeculativeJIT::compileStringReplace(Node* node)

flushRegisters();
GPRFlushedCallResult result(this);
callOperation(operationStringReplaceStringString, result.gpr(), JITCompiler::LinkableConstant(m_jit, m_graph.globalObjectFor(node->origin.semantic)), stringGPR, searchGPR, replaceGPR);
if (tablePointer)
callOperation(operationStringReplaceStringStringWithTable8, result.gpr(), JITCompiler::LinkableConstant(m_jit, m_graph.globalObjectFor(node->origin.semantic)), stringGPR, searchGPR, replaceGPR, TrustedImmPtr(tablePointer));
else
callOperation(operationStringReplaceStringString, result.gpr(), JITCompiler::LinkableConstant(m_jit, m_graph.globalObjectFor(node->origin.semantic)), stringGPR, searchGPR, replaceGPR);
m_jit.exceptionCheck();
cellResult(result.gpr(), node);
return;
@@ -49,6 +49,13 @@ void link(State& state)

if (!graph.m_plan.inlineCallFrames()->isEmpty())
state.jitCode->common.inlineCallFrames = graph.m_plan.inlineCallFrames();
if (!graph.m_stringSearchTable8.isEmpty()) {
FixedVector<std::unique_ptr<BoyerMooreHorspoolTable<uint8_t>>> tables(graph.m_stringSearchTable8.size());
unsigned index = 0;
for (auto& entry : graph.m_stringSearchTable8)
tables[index++] = WTFMove(entry.value);
state.jitCode->common.m_stringSearchTable8 = WTFMove(tables);
}

graph.registerFrozenValues();

@@ -14742,28 +14742,42 @@ IGNORE_CLANG_WARNINGS_END
&& m_node->child1().useKind() == StringUse
&& m_node->child2().useKind() == StringUse
&& m_node->child3().useKind() == StringUse) {
const BoyerMooreHorspoolTable<uint8_t>* tablePointer = nullptr;
String searchString = m_node->child2()->tryGetString(m_graph);
if (!!searchString)
tablePointer = m_graph.tryAddStringSearchTable8(searchString);

String replacement = m_node->child3()->tryGetString(m_graph);
if (!!replacement) {
if (!replacement.length()) {
LValue string = lowString(m_node->child1());
LValue search = lowString(m_node->child2());
setJSValue(vmCall(pointerType(), operationStringReplaceStringEmptyString, weakPointer(globalObject), string, search));
if (tablePointer)
setJSValue(vmCall(pointerType(), operationStringReplaceStringEmptyStringWithTable8, weakPointer(globalObject), string, search, m_out.constIntPtr(tablePointer)));
else
setJSValue(vmCall(pointerType(), operationStringReplaceStringEmptyString, weakPointer(globalObject), string, search));
return;
}

if (replacement.find('$') == notFound) {
LValue string = lowString(m_node->child1());
LValue search = lowString(m_node->child2());
LValue replace = lowString(m_node->child3());
setJSValue(vmCall(pointerType(), operationStringReplaceStringStringWithoutSubstitution, weakPointer(globalObject), string, search, replace));
if (tablePointer)
setJSValue(vmCall(pointerType(), operationStringReplaceStringStringWithoutSubstitutionWithTable8, weakPointer(globalObject), string, search, replace, m_out.constIntPtr(tablePointer)));
else
setJSValue(vmCall(pointerType(), operationStringReplaceStringStringWithoutSubstitution, weakPointer(globalObject), string, search, replace));
return;
}
}

LValue string = lowString(m_node->child1());
LValue search = lowString(m_node->child2());
LValue replace = lowString(m_node->child3());
setJSValue(vmCall(pointerType(), operationStringReplaceStringString, weakPointer(globalObject), string, search, replace));
if (tablePointer)
setJSValue(vmCall(pointerType(), operationStringReplaceStringStringWithTable8, weakPointer(globalObject), string, search, replace, m_out.constIntPtr(tablePointer)));
else
setJSValue(vmCall(pointerType(), operationStringReplaceStringString, weakPointer(globalObject), string, search, replace));
return;
}

0 comments on commit d6426c8

Please sign in to comment.