Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
[JSC] Add StringSplitCache
https://bugs.webkit.org/show_bug.cgi?id=257530
rdar://problem/110053165

Reviewed by Justin Michaud.

This patch adds String#split cache, inspired from V8's string split cache.
It is common in the real world code that splitting a literal string with a literal
separator because of readability, and it can be done multiple times instead of doing it globally.
Instead of repeatedly splitting and re-generating strings again and again, we can safely cache the result.
This patch caches result as JSImmutableButterfly when the both subject and separator are atom strings.
In String#split operation, also, instead of generating substrings and extending butterfly, we just compute
offset of substrings. And then finally generating JSImmutableButterfly / JSArray with repeated substrings.
This avoids wasteful intermediate butterfly allocations.

* Source/JavaScriptCore/CMakeLists.txt:
* Source/JavaScriptCore/JavaScriptCore.xcodeproj/project.pbxproj:
* Source/JavaScriptCore/heap/Heap.cpp:
(JSC::Heap::finalize):
* Source/JavaScriptCore/runtime/ArgList.h:
(JSC::ArgList::ArgList):
* Source/JavaScriptCore/runtime/StringPrototype.cpp:
(JSC::splitStringByOneCharacterImpl):
(JSC::JSC_DEFINE_HOST_FUNCTION):
* Source/JavaScriptCore/runtime/StringSplitCache.h: Added.
(JSC::StringSplitCache::clear):
* Source/JavaScriptCore/runtime/StringSplitCacheInlines.h: Added.
(JSC::StringSplitCache::get):
(JSC::StringSplitCache::set):
* Source/JavaScriptCore/runtime/VM.cpp:
(JSC::VM::VM):
* Source/JavaScriptCore/runtime/VM.h:

Canonical link: https://commits.webkit.org/264749@main
  • Loading branch information
Constellation committed May 31, 2023
1 parent 3539f60 commit e4568fa
Show file tree
Hide file tree
Showing 9 changed files with 271 additions and 66 deletions.
1 change: 1 addition & 0 deletions Source/JavaScriptCore/CMakeLists.txt
Expand Up @@ -1209,6 +1209,7 @@ set(JavaScriptCore_PRIVATE_FRAMEWORK_HEADERS
runtime/StackFrame.h
runtime/StringObject.h
runtime/StringPrototype.h
runtime/StringSplitCache.h
runtime/Structure.h
runtime/StructureCache.h
runtime/StructureChain.h
Expand Down
Expand Up @@ -1886,6 +1886,8 @@
E35E89FE25C50F870071EE1E /* BigUint64Array.h in Headers */ = {isa = PBXBuildFile; fileRef = E35E89FC25C50F870071EE1E /* BigUint64Array.h */; settings = {ATTRIBUTES = (Private, ); }; };
E3637EE9236E56B00096BD0A /* LinkTimeConstant.h in Headers */ = {isa = PBXBuildFile; fileRef = E3637EE7236E56B00096BD0A /* LinkTimeConstant.h */; settings = {ATTRIBUTES = (Private, ); }; };
E366441E254409B30001876F /* IntlListFormat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E318CA69254406B5004DC129 /* IntlListFormat.cpp */; };
E367062A2A2705DB00CF892F /* StringSplitCacheInlines.h in Headers */ = {isa = PBXBuildFile; fileRef = E36706282A2705DB00CF892F /* StringSplitCacheInlines.h */; };
E367062B2A2705DB00CF892F /* StringSplitCache.h in Headers */ = {isa = PBXBuildFile; fileRef = E36706292A2705DB00CF892F /* StringSplitCache.h */; settings = {ATTRIBUTES = (Private, ); }; };
E36CC9472086314F0051FFD6 /* WasmCreationMode.h in Headers */ = {isa = PBXBuildFile; fileRef = E36CC9462086314F0051FFD6 /* WasmCreationMode.h */; settings = {ATTRIBUTES = (Private, ); }; };
E36EDCE524F0975700E60DA2 /* Concurrency.h in Headers */ = {isa = PBXBuildFile; fileRef = E36EDCE424F0975700E60DA2 /* Concurrency.h */; settings = {ATTRIBUTES = (Private, ); }; };
E3711992253FA87F00BA69A0 /* Gate.h in Headers */ = {isa = PBXBuildFile; fileRef = E3711991253FA87E00BA69A0 /* Gate.h */; settings = {ATTRIBUTES = (Private, ); }; };
Expand Down Expand Up @@ -5379,6 +5381,8 @@
E3637EE8236E56B00096BD0A /* LinkTimeConstant.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = LinkTimeConstant.cpp; sourceTree = "<group>"; };
E365F33824AA621100C991B2 /* IntlDisplayNamesPrototype.lut.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = IntlDisplayNamesPrototype.lut.h; sourceTree = "<group>"; };
E365F33924AA621200C991B2 /* IntlDisplayNamesConstructor.lut.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = IntlDisplayNamesConstructor.lut.h; sourceTree = "<group>"; };
E36706282A2705DB00CF892F /* StringSplitCacheInlines.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = StringSplitCacheInlines.h; sourceTree = "<group>"; };
E36706292A2705DB00CF892F /* StringSplitCache.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = StringSplitCache.h; sourceTree = "<group>"; };
E36B480123E9573800E4A66E /* UnlinkedCodeBlockGenerator.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = UnlinkedCodeBlockGenerator.cpp; sourceTree = "<group>"; };
E36CC9462086314F0051FFD6 /* WasmCreationMode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = WasmCreationMode.h; sourceTree = "<group>"; };
E36EDCE424F0975700E60DA2 /* Concurrency.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Concurrency.h; sourceTree = "<group>"; };
Expand Down Expand Up @@ -8257,6 +8261,8 @@
E325A35F2221158A007349A1 /* StringPrototypeInlines.h */,
93345A8712D838C400302BE3 /* StringRecursionChecker.cpp */,
93345A8812D838C400302BE3 /* StringRecursionChecker.h */,
E36706292A2705DB00CF892F /* StringSplitCache.h */,
E36706282A2705DB00CF892F /* StringSplitCacheInlines.h */,
BCDE3AB00E6C82CF001453A7 /* Structure.cpp */,
BCDE3AB10E6C82CF001453A7 /* Structure.h */,
798694391F8C0AC7009232AE /* StructureCache.cpp */,
Expand Down Expand Up @@ -11092,6 +11098,8 @@
BC18C4680E16F5CD00B34460 /* StringObject.h in Headers */,
BC18C46A0E16F5CD00B34460 /* StringPrototype.h in Headers */,
E325A36022211590007349A1 /* StringPrototypeInlines.h in Headers */,
E367062B2A2705DB00CF892F /* StringSplitCache.h in Headers */,
E367062A2A2705DB00CF892F /* StringSplitCacheInlines.h in Headers */,
142E313B134FF0A600AFADB5 /* Strong.h in Headers */,
CD1F9B4B270CFE0F00617EB6 /* StrongForward.h in Headers */,
145722861437E140005FDE26 /* StrongInlines.h in Headers */,
Expand Down
1 change: 1 addition & 0 deletions Source/JavaScriptCore/heap/Heap.cpp
Expand Up @@ -2182,6 +2182,7 @@ void Heap::finalize()
if (m_lastCollectionScope && m_lastCollectionScope.value() == CollectionScope::Full)
vm().jsonAtomStringCache.clear();
vm().keyAtomStringCache.clear();
vm().stringSplitCache.clear();
vm().numericStrings.clearOnGarbageCollection();

m_possiblyAccessedStringsFromConcurrentThreads.clear();
Expand Down
3 changes: 2 additions & 1 deletion Source/JavaScriptCore/runtime/ArgList.h
Expand Up @@ -248,7 +248,8 @@ class ArgList {
ASSERT(static_cast<int>(callFrame->argumentCount()) >= startingFrom);
}

ArgList(const MarkedArgumentBuffer& args)
template<size_t inlineCapacity>
ArgList(const MarkedVector<JSValue, inlineCapacity, RecordOverflow>& args)
: m_args(args.m_buffer)
, m_argCount(args.size())
{
Expand Down
166 changes: 101 additions & 65 deletions Source/JavaScriptCore/runtime/StringPrototype.cpp
Expand Up @@ -39,6 +39,7 @@
#include "RegExpConstructor.h"
#include "RegExpGlobalDataInlines.h"
#include "StringPrototypeInlines.h"
#include "StringSplitCacheInlines.h"
#include "SuperSampler.h"
#include "VMEntryScopeInlines.h"
#include <algorithm>
Expand Down Expand Up @@ -1053,31 +1054,27 @@ JSC_DEFINE_HOST_FUNCTION(stringProtoFuncSlice, (JSGlobalObject* globalObject, Ca
}

// Return true in case of early return (resultLength got to limitLength).
template<typename CharacterType>
static ALWAYS_INLINE bool splitStringByOneCharacterImpl(JSGlobalObject* globalObject, JSArray* result, JSValue originalValue, const String& input, StringImpl* string, UChar separatorCharacter, size_t& position, unsigned& resultLength, unsigned limitLength)
template<typename CharacterType, typename Indice>
static ALWAYS_INLINE bool splitStringByOneCharacterImpl(Indice& result, StringImpl* string, UChar separatorCharacter, unsigned limitLength)
{
VM& vm = globalObject->vm();
auto scope = DECLARE_THROW_SCOPE(vm);

// 12. Let q = p.
size_t matchPosition;
size_t position = 0;
unsigned stringLength = string->length();
const CharacterType* characters = string->characters<CharacterType>();
// 13. Repeat, while q != s
// a. Call SplitMatch(S, q, R) and let z be its MatchResult result.
// b. If z is failure, then let q = q+1.
// c. Else, z is not failure
while ((matchPosition = WTF::find(characters, string->length(), separatorCharacter, position)) != notFound) {
while ((matchPosition = WTF::find(characters, stringLength, separatorCharacter, position)) != notFound) {
// 1. Let T be a String value equal to the substring of S consisting of the characters at positions p (inclusive)
// through q (exclusive).
// 2. Call the [[DefineOwnProperty]] internal method of A with arguments ToString(lengthA),
// Property Descriptor {[[Value]]: T, [[Writable]]: true, [[Enumerable]]: true, [[Configurable]]: true}, and false.
auto* substring = jsSubstring(globalObject, originalValue, input, position, matchPosition - position);
RETURN_IF_EXCEPTION(scope, false);
result->putDirectIndex(globalObject, resultLength, substring);
RETURN_IF_EXCEPTION(scope, false);
result.append(matchPosition);
// 3. Increment lengthA by 1.
// 4. If lengthA == lim, return A.
if (++resultLength == limitLength)
if (result.size() == limitLength)
return true;

// 5. Let p = e.
Expand All @@ -1097,70 +1094,118 @@ JSC_DEFINE_HOST_FUNCTION(stringProtoFuncSplitFast, (JSGlobalObject* globalObject

// 3. Let S be the result of calling ToString, giving it the this value as its argument.
// 7. Let s be the number of characters in S.
String input = thisValue.toWTFString(globalObject);
RETURN_IF_EXCEPTION(scope, encodedJSValue());
JSString* thisString = thisValue.toString(globalObject);
RETURN_IF_EXCEPTION(scope, { });
String input = thisString->value(globalObject);
RETURN_IF_EXCEPTION(scope, { });
ASSERT(!input.isNull());

// 4. Let A be a new array created as if by the expression new Array()
// where Array is the standard built-in constructor with that name.
JSArray* result = constructEmptyArray(globalObject, nullptr);
RETURN_IF_EXCEPTION(scope, encodedJSValue());

// 5. Let lengthA be 0.
unsigned resultLength = 0;

// 6. If limit is undefined, let lim = 2^32-1; else let lim = ToUint32(limit).
JSValue limitValue = callFrame->uncheckedArgument(1);
unsigned limit = limitValue.isUndefined() ? 0xFFFFFFFFu : limitValue.toUInt32(globalObject);
RETURN_IF_EXCEPTION(scope, encodedJSValue());

// 8. Let p = 0.
size_t position = 0;
unsigned limit = 0xFFFFFFFFu;
if (!limitValue.isUndefined()) {
limit = limitValue.toUInt32(globalObject);
RETURN_IF_EXCEPTION(scope, { });
}

// 9. If separator is a RegExp object (its [[Class]] is "RegExp"), let R = separator;
// otherwise let R = ToString(separator).
JSValue separatorValue = callFrame->uncheckedArgument(0);
String separator = separatorValue.toWTFString(globalObject);
RETURN_IF_EXCEPTION(scope, encodedJSValue());
RETURN_IF_EXCEPTION(scope, { });
unsigned separatorLength = separator.length();

// 10. If lim == 0, return A.
if (!limit)
return JSValue::encode(result);
RELEASE_AND_RETURN(scope, JSValue::encode(constructEmptyArray(globalObject, nullptr)));

// 11. If separator is undefined, then
if (separatorValue.isUndefined()) {
// a. Call the [[DefineOwnProperty]] internal method of A with arguments "0",
scope.release();
result->putDirectIndex(globalObject, 0, jsStringWithReuse(globalObject, thisValue, input));
MarkedArgumentBuffer result;
result.appendWithCrashOnOverflow(jsStringWithReuse(globalObject, thisString, input));
RETURN_IF_EXCEPTION(scope, { });
// b. Return A.
return JSValue::encode(result);
RELEASE_AND_RETURN(scope, JSValue::encode(constructArray(globalObject, static_cast<ArrayAllocationProfile*>(nullptr), result)));
}

if (LIKELY(limit == 0xFFFFFFFFu && !globalObject->isHavingABadTime())) {
if (auto* immutableButterfly = vm.stringSplitCache.get(input, separator)) {
Structure* arrayStructure = globalObject->originalArrayStructureForIndexingType(CopyOnWriteArrayWithContiguous);
return JSValue::encode(JSArray::createWithButterfly(vm, nullptr, arrayStructure, immutableButterfly->toButterfly()));
}
}

auto& result = vm.stringSplitIndice;
result.resize(0);

auto cacheAndCreateArray = [&]() -> JSArray* {
if (result.isEmpty())
return constructEmptyArray(globalObject, nullptr);

if (LIKELY(limit == 0xFFFFFFFFu && !globalObject->isHavingABadTime() && result.size() < MIN_SPARSE_ARRAY_INDEX)) {
auto* newButterfly = JSImmutableButterfly::create(vm, CopyOnWriteArrayWithContiguous, result.size());
unsigned start = 0;
for (unsigned i = 0, size = result.size(); i < size; ++i) {
unsigned end = result[i];
auto* string = jsSubstring(globalObject, thisString, start, end - start);
RETURN_IF_EXCEPTION(scope, { });
newButterfly->setIndex(vm, i, string);
start = end + separatorLength;
}
vm.stringSplitCache.set(input, separator, newButterfly);
Structure* arrayStructure = globalObject->originalArrayStructureForIndexingType(CopyOnWriteArrayWithContiguous);
return JSArray::createWithButterfly(vm, nullptr, arrayStructure, newButterfly->toButterfly());
}

auto* array = constructEmptyArray(globalObject, static_cast<ArrayAllocationProfile*>(nullptr), result.size());
RETURN_IF_EXCEPTION(scope, { });
unsigned start = 0;
for (unsigned i = 0, size = result.size(); i < size; ++i) {
unsigned end = result[i];
auto* string = jsSubstring(globalObject, thisString, start, end - start);
RETURN_IF_EXCEPTION(scope, { });
array->putDirectIndex(globalObject, i, string);
RETURN_IF_EXCEPTION(scope, { });
start = end + separatorLength;
}
return array;
};

// 12. If s == 0, then
if (input.isEmpty()) {
// a. Let z be SplitMatch(S, 0, R) where S is input, R is separator.
// b. If z is not false, return A.
// c. Call CreateDataProperty(A, "0", S).
// d. Return A.
if (!separator.isEmpty()) {
scope.release();
result->putDirectIndex(globalObject, 0, jsStringWithReuse(globalObject, thisValue, input));
}
return JSValue::encode(result);
scope.release();
if (!separator.isEmpty())
result.append(input.length());
return JSValue::encode(cacheAndCreateArray());
}

// Optimized case for splitting on the empty string.
if (separator.isEmpty()) {
limit = std::min(limit, input.length());
if (!separatorLength) {
unsigned resultSize = std::min(limit, input.length());
// Zero limt/input length handled in steps 9/11 respectively, above.
ASSERT(limit);

do {
result->putDirectIndex(globalObject, position, jsSingleCharacterString(vm, input[position]));
RETURN_IF_EXCEPTION(scope, encodedJSValue());
} while (++position < limit);
ASSERT(resultSize);

if (LIKELY(limit == 0xFFFFFFFFu && !globalObject->isHavingABadTime() && resultSize < MIN_SPARSE_ARRAY_INDEX)) {
auto* newButterfly = JSImmutableButterfly::create(vm, CopyOnWriteArrayWithContiguous, resultSize);
for (unsigned i = 0; i < resultSize; ++i)
newButterfly->setIndex(vm, i, jsSingleCharacterString(vm, input[i]));
vm.stringSplitCache.set(input, separator, newButterfly);
Structure* arrayStructure = globalObject->originalArrayStructureForIndexingType(CopyOnWriteArrayWithContiguous);
return JSValue::encode(JSArray::createWithButterfly(vm, nullptr, arrayStructure, newButterfly->toButterfly()));
}

return JSValue::encode(result);
auto* array = constructEmptyArray(globalObject, static_cast<ArrayAllocationProfile*>(nullptr), resultSize);
RETURN_IF_EXCEPTION(scope, { });
for (unsigned i = 0; i < resultSize; ++i) {
array->putDirectIndex(globalObject, i, jsSingleCharacterString(vm, input[i]));
RETURN_IF_EXCEPTION(scope, { });
}
return JSValue::encode(array);
}

// 3 cases:
Expand All @@ -1169,7 +1214,6 @@ JSC_DEFINE_HOST_FUNCTION(stringProtoFuncSplitFast, (JSGlobalObject* globalObject
// -separator length > 1
StringImpl* stringImpl = input.impl();
StringImpl* separatorImpl = separator.impl();
size_t separatorLength = separatorImpl->length();

if (separatorLength == 1) {
UChar separatorCharacter;
Expand All @@ -1179,49 +1223,41 @@ JSC_DEFINE_HOST_FUNCTION(stringProtoFuncSplitFast, (JSGlobalObject* globalObject
separatorCharacter = separatorImpl->characters16()[0];

if (stringImpl->is8Bit()) {
if (splitStringByOneCharacterImpl<LChar>(globalObject, result, thisValue, input, stringImpl, separatorCharacter, position, resultLength, limit))
RELEASE_AND_RETURN(scope, JSValue::encode(result));
if (splitStringByOneCharacterImpl<LChar>(result, stringImpl, separatorCharacter, limit))
RELEASE_AND_RETURN(scope, JSValue::encode(cacheAndCreateArray()));
} else {
if (splitStringByOneCharacterImpl<UChar>(globalObject, result, thisValue, input, stringImpl, separatorCharacter, position, resultLength, limit))
RELEASE_AND_RETURN(scope, JSValue::encode(result));
if (splitStringByOneCharacterImpl<UChar>(result, stringImpl, separatorCharacter, limit))
RELEASE_AND_RETURN(scope, JSValue::encode(cacheAndCreateArray()));
}
RETURN_IF_EXCEPTION(scope, encodedJSValue());
} else {
// 13. Let q = p.
size_t matchPosition;
// 14. Repeat, while q != s
// a. let e be SplitMatch(S, q, R).
// b. If e is failure, then let q = q+1.
// c. Else, e is an integer index <= s.
size_t position = 0;
while ((matchPosition = stringImpl->find(separatorImpl, position)) != notFound) {
// 1. Let T be a String value equal to the substring of S consisting of the characters at positions p (inclusive)
// through q (exclusive).
// 2. Call CreateDataProperty(A, ToString(lengthA), T).
auto* substring = jsSubstring(globalObject, thisValue, input, position, matchPosition - position);
RETURN_IF_EXCEPTION(scope, { });
result->putDirectIndex(globalObject, resultLength, substring);
RETURN_IF_EXCEPTION(scope, { });
result.append(matchPosition);
// 3. Increment lengthA by 1.
// 4. If lengthA == lim, return A.
if (++resultLength == limit)
return JSValue::encode(result);
if (result.size() == limit)
RELEASE_AND_RETURN(scope, JSValue::encode(cacheAndCreateArray()));

// 5. Let p = e.
// 6. Let q = p.
position = matchPosition + separator.length();
position = matchPosition + separatorLength;
}
}

// 15. Let T be a String value equal to the substring of S consisting of the characters at positions p (inclusive)
// through s (exclusive).
// 16. Call CreateDataProperty(A, ToString(lengthA), T).
auto* substring = jsSubstring(globalObject, thisValue, input, position, input.length() - position);
RETURN_IF_EXCEPTION(scope, { });
scope.release();
result->putDirectIndex(globalObject, resultLength++, substring);

// 17. Return A.
return JSValue::encode(result);
result.append(input.length());
RELEASE_AND_RETURN(scope, JSValue::encode(cacheAndCreateArray()));
}

JSC_DEFINE_HOST_FUNCTION(stringProtoFuncSubstr, (JSGlobalObject* globalObject, CallFrame* callFrame))
Expand Down

0 comments on commit e4568fa

Please sign in to comment.