diff --git a/src/ir/names.h b/src/ir/names.h index 61fc137f5a8..083a54f0ef5 100644 --- a/src/ir/names.h +++ b/src/ir/names.h @@ -56,7 +56,7 @@ inline Name getValidName(Name root, if (check(root)) { return root; } - auto prefixed = std::string(root.str) + separator; + auto prefixed = std::string(root.view()) + separator; Index num = hint; while (1) { auto name = prefixed + std::to_string(num); diff --git a/src/ir/possible-contents.cpp b/src/ir/possible-contents.cpp index 3e1213cd05b..4b76f467825 100644 --- a/src/ir/possible-contents.cpp +++ b/src/ir/possible-contents.cpp @@ -1211,8 +1211,7 @@ struct InfoCollector addRoot(curr, PossibleContents::exactType(curr->type)); } void visitStringConst(StringConst* curr) { - addRoot(curr, - PossibleContents::literal(Literal(std::string(curr->string.str)))); + addRoot(curr, PossibleContents::literal(Literal(curr->string.view()))); } void visitStringMeasure(StringMeasure* curr) { // TODO: optimize when possible diff --git a/src/parser/contexts.h b/src/parser/contexts.h index eac35a543c5..06a515e32d1 100644 --- a/src/parser/contexts.h +++ b/src/parser/contexts.h @@ -1996,7 +1996,7 @@ struct ParseDefsCtx : TypeParserCtx, AnnotationParserCtx { void setSrcLoc(const std::vector& annotations) { const Annotation* annotation = nullptr; for (auto& a : annotations) { - if (a.kind.str == std::string_view("src")) { + if (a.kind.view() == std::string_view("src")) { annotation = &a; } } diff --git a/src/passes/Asyncify.cpp b/src/passes/Asyncify.cpp index a8410a24c05..feea9adf7c2 100644 --- a/src/passes/Asyncify.cpp +++ b/src/passes/Asyncify.cpp @@ -1730,7 +1730,7 @@ struct AsyncifyLocals : public WalkerPass> { } // anonymous namespace static std::string getFullImportName(Name module, Name base) { - return std::string(module.str) + '.' + base.toString(); + return module.toString() + '.' + base.toString(); } struct Asyncify : public Pass { diff --git a/src/passes/J2CLOpts.cpp b/src/passes/J2CLOpts.cpp index 759ef7cc287..0474a31ef74 100644 --- a/src/passes/J2CLOpts.cpp +++ b/src/passes/J2CLOpts.cpp @@ -197,7 +197,7 @@ class ConstantHoister : public WalkerPass> { } Name getEnclosingClass(Name name) { - return Name(name.str.substr(name.str.find_last_of('@'))); + return Name(name.view().substr(name.view().find_last_of('@'))); } AssignmentCountMap& assignmentCounts; diff --git a/src/passes/MinifyImportsAndExports.cpp b/src/passes/MinifyImportsAndExports.cpp index 882f9a8b5d9..8c43ee694c3 100644 --- a/src/passes/MinifyImportsAndExports.cpp +++ b/src/passes/MinifyImportsAndExports.cpp @@ -112,9 +112,9 @@ struct MinifyImportsAndExports : public Pass { std::cout << ','; } std::cout << "\n ["; - String::printEscaped(std::cout, key.first.str) << ", "; - String::printEscaped(std::cout, key.second.str) << ", "; - String::printEscaped(std::cout, new_.str) << "]"; + String::printEscaped(std::cout, key.first.view()) << ", "; + String::printEscaped(std::cout, key.second.view()) << ", "; + String::printEscaped(std::cout, new_.view()) << "]"; } } std::cout << "\n ],\n\"exports\": ["; @@ -127,8 +127,8 @@ struct MinifyImportsAndExports : public Pass { std::cout << ','; } std::cout << "\n ["; - String::printEscaped(std::cout, key.second.str) << ", "; - String::printEscaped(std::cout, new_.str) << "]"; + String::printEscaped(std::cout, key.second.view()) << ", "; + String::printEscaped(std::cout, new_.view()) << "]"; } } std::cout << "\n ]\n"; diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index 4ffdb031dd4..d22168f359d 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -2566,7 +2566,7 @@ struct PrintExpressionContents // Re-encode from WTF-16 to WTF-8. std::stringstream wtf8; [[maybe_unused]] bool valid = - String::convertWTF16ToWTF8(wtf8, curr->string.str); + String::convertWTF16ToWTF8(wtf8, curr->string.view()); assert(valid); // TODO: Use wtf8.view() once we have C++20. String::printEscaped(o, wtf8.str()); @@ -3190,7 +3190,7 @@ void PrintSExpression::visitExport(Export* curr) { o << '('; printMedium(o, "export "); std::stringstream escaped; - String::printEscaped(escaped, curr->name.str); + String::printEscaped(escaped, curr->name.view()); printText(o, escaped.str(), false) << " ("; switch (curr->kind) { case ExternalKind::Function: @@ -3219,8 +3219,8 @@ void PrintSExpression::visitExport(Export* curr) { void PrintSExpression::emitImportHeader(Importable* curr) { printMedium(o, "import "); std::stringstream escapedModule, escapedBase; - String::printEscaped(escapedModule, curr->module.str); - String::printEscaped(escapedBase, curr->base.str); + String::printEscaped(escapedModule, curr->module.view()); + String::printEscaped(escapedBase, curr->base.view()); printText(o, escapedModule.str(), false) << ' '; printText(o, escapedBase.str(), false) << ' '; } diff --git a/src/passes/StringLifting.cpp b/src/passes/StringLifting.cpp index cd3a8ffabb6..ac15d2a8614 100644 --- a/src/passes/StringLifting.cpp +++ b/src/passes/StringLifting.cpp @@ -70,11 +70,11 @@ struct StringLifting : public Pass { // Encode from WTF-8 to WTF-16. auto wtf8 = global->base; std::stringstream wtf16; - bool valid = String::convertWTF8ToWTF16(wtf16, wtf8.str); + bool valid = String::convertWTF8ToWTF16(wtf16, wtf8.view()); if (!valid) { Fatal() << "Bad string to lift: " << wtf8; } - importedStrings[global->name] = wtf16.str(); + importedStrings[global->name] = wtf16.view(); found = true; } } @@ -101,7 +101,7 @@ struct StringLifting : public Pass { continue; } // The index in the array is the basename. - Index index = std::stoi(std::string(global->base.str)); + Index index = std::stoi(std::string(global->base.view())); if (index >= array.size()) { Fatal() << "StringLifting: bad index in string.const section"; } @@ -222,7 +222,7 @@ struct StringLifting : public Pass { auto iter = parent.importedStrings.find(curr->name); if (iter != parent.importedStrings.end()) { auto wtf16 = iter->second; - replaceCurrent(Builder(*getModule()).makeStringConst(wtf16.str)); + replaceCurrent(Builder(*getModule()).makeStringConst(wtf16.view())); modified = true; } } diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index b4641bda134..bd753efaf91 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -153,7 +153,7 @@ struct StringGathering : public Pass { // Re-encode from WTF-16 to WTF-8 to make the name easier to read. std::stringstream wtf8; [[maybe_unused]] bool valid = - String::convertWTF16ToWTF8(wtf8, string.str); + String::convertWTF16ToWTF8(wtf8, string.view()); assert(valid); // Then escape it because identifiers must be valid UTF-8. // TODO: Use wtf8.view() and escaped.view() once we have C++20. @@ -246,7 +246,7 @@ struct StringLowering : public StringGathering { if (auto* c = global->init->dynCast()) { std::stringstream utf8; if (useMagicImports && - String::convertUTF16ToUTF8(utf8, c->string.str)) { + String::convertUTF16ToUTF8(utf8, c->string.view())) { global->module = stringConstsModule; global->base = Name(utf8.str()); } else { @@ -263,7 +263,7 @@ struct StringLowering : public StringGathering { } else { json << ','; } - String::printEscapedJSON(json, c->string.str); + String::printEscapedJSON(json, c->string.view()); jsonImportIndex++; } global->init = nullptr; diff --git a/src/support/istring.cpp b/src/support/istring.cpp index 60c8b59be6d..9156ff84b03 100644 --- a/src/support/istring.cpp +++ b/src/support/istring.cpp @@ -14,32 +14,43 @@ * limitations under the License. */ +#include +#include + #include "istring.h" #include "mixed_arena.h" namespace wasm { -std::string_view IString::interned(std::string_view s, bool reuse) { - // We need a set of string_views that can be modified in-place to minimize - // the number of lookups we do. Since set elements cannot normally be - // modified, wrap the string_views in a container that provides mutability - // even through a const reference. - struct MutStringView { - mutable std::string_view str; - MutStringView(std::string_view str) : str(str) {} - }; - struct MutStringViewHash { - size_t operator()(const MutStringView& mut) const { - return std::hash{}(mut.str); +const char* IString::interned(std::string_view s) { + if (s.data() == nullptr) { + return nullptr; + } + + // A set of interned Views, i.e., that contains our pascal-style strings. We + // need to query this using a std::string_view, as that is what we receive as + // input (turning it into pascal-style storage would add overhead). To do so, + // use overloading in the hash and equality functions (which works thanks to + // `is_transparent`). + struct InternedHash { + using is_transparent = void; + size_t operator()(View v) const { + return std::hash{}(v.view()); + } + size_t operator()(std::string_view sv) const { + return std::hash{}(sv); } }; - struct MutStringViewEqual { - bool operator()(const MutStringView& a, const MutStringView& b) const { - return a.str == b.str; + struct InternedEqual { + using is_transparent = void; + bool operator()(View a, View b) const { return a.view() == b.view(); } + bool operator()(std::string_view a, View b) const { return a == b.view(); } + bool operator()(View a, std::string_view b) const { return a.view() == b; } + bool operator()(std::string_view a, std::string_view b) const { + return a == b; } }; - using StringSet = - std::unordered_set; + using StringSet = std::unordered_set; // The authoritative global set of interned string views. static StringSet globalStrings; @@ -54,34 +65,37 @@ std::string_view IString::interned(std::string_view s, bool reuse) { // A thread-local cache of strings to reduce contention. thread_local static StringSet localStrings; - auto [localIt, localInserted] = localStrings.insert(s); - if (!localInserted) { + if (auto it = localStrings.find(s); it != localStrings.end()) { // We already had a local copy of this string. - return localIt->str; + return it->internal; } // No copy yet in the local cache. Check the global cache. std::unique_lock lock(mutex); - auto [globalIt, globalInserted] = globalStrings.insert(s); - if (!globalInserted) { + if (auto it = globalStrings.find(s); it != globalStrings.end()) { // We already had a global copy of this string. Cache it locally. - localIt->str = globalIt->str; - return localIt->str; + localStrings.insert(*it); + return it->internal; } - if (!reuse) { - // We have a new string, but it doesn't have a stable address. Create a copy - // of the data at a stable address we can use. Make sure it is null - // terminated so legacy uses that get a C string still work. - char* data = (char*)arena.allocSpace(s.size() + 1, 1); - std::copy(s.begin(), s.end(), data); - data[s.size()] = '\0'; - s = std::string_view(data, s.size()); - } + // We have a new string. Create a copy of the data at a stable address with a + // header we can use. Make sure it is null terminated so legacy uses that get + // a C string still work. + size_t size = s.size(); + // The string's size must fit in 32 bits. + assert(size <= std::numeric_limits::max()); + char* buffer = + (char*)arena.allocSpace(sizeof(uint32_t) + size + 1, alignof(uint32_t)); + *(uint32_t*)(buffer) = size; + char* data = buffer + sizeof(uint32_t); + std::copy(s.begin(), s.end(), data); + data[size] = '\0'; // Intern our new string. - localIt->str = globalIt->str = s; - return s; + View v{data}; + globalStrings.insert(v); + localStrings.insert(v); + return data; } } // namespace wasm diff --git a/src/support/istring.h b/src/support/istring.h index dad20b8fb00..a567eb82f12 100644 --- a/src/support/istring.h +++ b/src/support/istring.h @@ -33,21 +33,49 @@ namespace wasm { struct IString { private: - static std::string_view interned(std::string_view s, bool reuse = true); + static const char* interned(std::string_view s); public: - const std::string_view str; + // Strings are stored in Pascal style: a size followed by the characters. We + // keep the internal pointer pointing to the data, so that data() is a no-op; + // computing the size, which is more rare, requires looking back and doing a + // load. + // + // The size is limited to 4 bytes, so the maximum string we support is 4GB. + // + // The alternative approach of using a string_view here, i.e., keeping the + // pointer and size in the IString, uses more more memory. That is, this + // optimization saves a lot of space, because while it adds 4 bytes to each + // interned string itself, we tend to have many views on each. + // + // We provide a View here, which is a simple interface. Users that need more + // convert to a std::string_view with .view() or a cast. + struct View { + const char* internal = nullptr; + const char* data() const { return internal; } + size_t size() const { + return internal ? *(const uint32_t*)(internal - 4) : 0; + } + char operator[](size_t x) const { return internal[x]; } + std::string_view view() const { + if (!internal) { + // No size to read. + return {}; + } + return {internal, size()}; + } + }; + const View str; + + std::string_view view() const { return str.view(); } IString() = default; - // TODO: This is a wildly unsafe default inherited from the previous - // implementation. Change it? - IString(std::string_view str, bool reuse = true) - : str(interned(str, reuse)) {} + IString(View v) : str(v) {} - // But other C strings generally do need to be copied. - IString(const char* str) : str(interned(str, false)) {} - IString(const std::string& str) : str(interned(str, false)) {} + IString(std::string_view s) : str{interned(s)} {} + IString(const char* str) : str{interned(str)} {} + IString(const std::string& str) : str{interned(str)} {} IString(const IString& other) = default; @@ -57,17 +85,24 @@ struct IString { bool operator==(const IString& other) const { // Fast! No need to compare contents due to interning - return str.data() == other.str.data(); + return str.internal == other.str.internal; } bool operator!=(const IString& other) const { return !(*this == other); } - bool operator<(const IString& other) const { return str < other.str; } - bool operator<=(const IString& other) const { return str <= other.str; } - bool operator>(const IString& other) const { return str > other.str; } - bool operator>=(const IString& other) const { return str >= other.str; } + bool operator<(const IString& other) const { + if (str.internal == other.str.internal) { + return false; + } + return view() < other.view(); + } + bool operator<=(const IString& other) const { + return *this == other || *this < other; + } + bool operator>(const IString& other) const { return !(*this <= other); } + bool operator>=(const IString& other) const { return !(*this < other); } char operator[](int x) const { return str[x]; } - explicit operator bool() const { return str.data() != nullptr; } + explicit operator bool() const { return str.internal != nullptr; } // TODO: deprecate? bool is() const { return bool(*this); } @@ -75,13 +110,13 @@ struct IString { std::string toString() const { return {str.data(), str.size()}; } - bool equals(std::string_view other) const { return str == other; } + bool equals(std::string_view other) const { return str.view() == other; } bool startsWith(std::string_view prefix) const { // TODO: Use C++20 `starts_with`. - return str.substr(0, prefix.size()) == prefix; + return view().substr(0, prefix.size()) == prefix; } - bool startsWith(IString str) const { return startsWith(str.str); } + bool startsWith(IString other) const { return startsWith(other.view()); } // Disambiguate for string literals. template bool startsWith(const char (&str)[N]) const { @@ -93,9 +128,9 @@ struct IString { if (suffix.size() > str.size()) { return false; } - return str.substr(str.size() - suffix.size()) == suffix; + return view().substr(str.size() - suffix.size()) == suffix; } - bool endsWith(IString str) const { return endsWith(str.str); } + bool endsWith(IString other) const { return endsWith(other.view()); } // Disambiguate for string literals. template bool endsWith(const char (&str)[N]) const { @@ -103,7 +138,7 @@ struct IString { } IString substr(size_t pos, size_t len = std::string_view::npos) const { - return IString(str.substr(pos, len)); + return IString(view().substr(pos, len)); } size_t size() const { return str.size(); } @@ -120,7 +155,7 @@ template<> struct hash { }; inline std::ostream& operator<<(std::ostream& os, const wasm::IString& str) { - return os << str.str; + return os << str.view(); } } // namespace std diff --git a/src/support/json.cpp b/src/support/json.cpp index dd94719d47d..ff393317410 100644 --- a/src/support/json.cpp +++ b/src/support/json.cpp @@ -23,7 +23,7 @@ void Value::stringify(std::ostream& os, bool pretty) { if (isString()) { std::stringstream wtf16; [[maybe_unused]] bool valid = - wasm::String::convertWTF8ToWTF16(wtf16, getIString().str); + wasm::String::convertWTF8ToWTF16(wtf16, getIString().view()); assert(valid); // TODO: Use wtf16.view() once we have C++20. wasm::String::printEscapedJSON(os, wtf16.str()); diff --git a/src/support/name.cpp b/src/support/name.cpp index 4c599defca0..34b7546fba9 100644 --- a/src/support/name.cpp +++ b/src/support/name.cpp @@ -46,6 +46,7 @@ std::ostream& Name::print(std::ostream& o) const { // TODO: This is not spec-compliant since the spec does not yet support // quoted identifiers and has a limited set of valid idchars. o << '$'; + auto str = view(); if (size() >= 1 && std::all_of(str.begin(), str.end(), isIDChar)) { return o << str; } else { diff --git a/src/support/name.h b/src/support/name.h index 8e3f7a291d5..d6c71f732c9 100644 --- a/src/support/name.h +++ b/src/support/name.h @@ -33,9 +33,10 @@ namespace wasm { struct Name : public IString { Name() : IString() {} - Name(std::string_view str) : IString(str, false) {} - Name(const char* str) : IString(str, false) {} + Name(std::string_view str) : IString(str) {} + Name(const char* str) : IString(str) {} Name(IString str) : IString(str) {} + Name(IString::View str) : IString(str) {} Name(const std::string& str) : IString(str) {} // String literals do not need to be copied. Note: Not safe to construct from @@ -50,13 +51,11 @@ struct Name : public IString { } } - static Name fromInt(size_t i) { - return IString(std::to_string(i).c_str(), false); - } + static Name fromInt(size_t i) { return IString(std::to_string(i).c_str()); } bool hasSubstring(IString substring) { // TODO: Use C++23 `contains`. - return str.find(substring.str) != std::string_view::npos; + return view().find(substring.view()) != std::string_view::npos; } std::ostream& print(std::ostream& o) const; diff --git a/src/tools/execution-results.h b/src/tools/execution-results.h index 7717ec23811..09f7657ee96 100644 --- a/src/tools/execution-results.h +++ b/src/tools/execution-results.h @@ -412,7 +412,7 @@ class FuzzerImportResolver // fuzz_shell.js. Index payload = 0; for (auto name : {name.module, name.name}) { - for (auto c : name.str) { + for (auto c : name.view()) { payload = (payload + static_cast(c)) % 251; } } diff --git a/src/tools/wasm-metadce.cpp b/src/tools/wasm-metadce.cpp index 96b2063df4b..29bba15aaf8 100644 --- a/src/tools/wasm-metadce.cpp +++ b/src/tools/wasm-metadce.cpp @@ -78,7 +78,7 @@ struct MetaDCEGraph { // to be kept alive. module = ENV; } - return std::string(module.str) + " (*) " + std::string(base.str); + return std::string(module.view()) + " (*) " + std::string(base.view()); } ImportId getImportId(ModuleItemKind kind, Name name) { diff --git a/src/tools/wasm2c-wrapper.h b/src/tools/wasm2c-wrapper.h index 242442a1d8a..e39bf54a3d2 100644 --- a/src/tools/wasm2c-wrapper.h +++ b/src/tools/wasm2c-wrapper.h @@ -30,7 +30,7 @@ namespace wasm { inline std::string wasm2cMangle(Name name, Signature sig) { const char escapePrefix = 'Z'; std::string mangled = "Z_"; - for (unsigned char c : name.str) { + for (unsigned char c : name.view()) { if ((isalnum(c) && c != escapePrefix) || c == '_') { // This character is ok to emit as it is. mangled += c; diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h index 67101d641e8..2f122ac3ea7 100644 --- a/src/wasm-interpreter.h +++ b/src/wasm-interpreter.h @@ -121,7 +121,7 @@ class Flow { } friend std::ostream& operator<<(std::ostream& o, const Flow& flow) { - o << "(flow " << (flow.breakTo.is() ? flow.breakTo.str : "-") << " : {"; + o << "(flow " << (flow.breakTo.is() ? flow.breakTo.view() : "-") << " : {"; for (size_t i = 0; i < flow.values.size(); ++i) { if (i > 0) { o << ", "; @@ -2727,7 +2727,9 @@ class ExpressionRunner : public OverriddenVisitor { return Flow(NONCONSTANT_FLOW); } } - Flow visitStringConst(StringConst* curr) { return Literal(curr->string.str); } + Flow visitStringConst(StringConst* curr) { + return Literal(curr->string.view()); + } Flow visitStringMeasure(StringMeasure* curr) { // For now we only support JS-style strings. diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 61d505bd205..6bcf9bd5fd2 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -207,7 +207,7 @@ void WasmBinaryWriter::writeStart() { return; } auto start = startSection(BinaryConsts::Section::Start); - o << U32LEB(getFunctionIndex(wasm->start.str)); + o << U32LEB(getFunctionIndex(wasm->start.view())); finishSection(start); } @@ -339,8 +339,8 @@ void WasmBinaryWriter::writeImports() { auto start = startSection(BinaryConsts::Section::Import); o << U32LEB(num); auto writeImportHeader = [&](Importable* import) { - writeInlineString(import->module.str); - writeInlineString(import->base.str); + writeInlineString(import->module.view()); + writeInlineString(import->base.view()); }; ModuleUtils::iterImportedFunctions(*wasm, [&](Function* func) { writeImportHeader(func); @@ -581,7 +581,8 @@ void WasmBinaryWriter::writeStrings() { for (auto& string : sorted) { // Re-encode from WTF-16 to WTF-8. std::stringstream wtf8; - [[maybe_unused]] bool valid = String::convertWTF16ToWTF8(wtf8, string.str); + [[maybe_unused]] bool valid = + String::convertWTF16ToWTF8(wtf8, string.view()); assert(valid); // TODO: Use wtf8.view() once we have C++20. writeInlineString(wtf8.str()); @@ -640,7 +641,7 @@ void WasmBinaryWriter::writeExports() { auto start = startSection(BinaryConsts::Section::Export); o << U32LEB(wasm->exports.size()); for (auto& curr : wasm->exports) { - writeInlineString(curr->name.str); + writeInlineString(curr->name.view()); o << U32LEB(int32_t(curr->kind)); switch (curr->kind) { case ExternalKind::Function: @@ -917,7 +918,7 @@ void WasmBinaryWriter::writeNames() { if (emitModuleName && wasm->name.is()) { auto substart = startSubsection(BinaryConsts::CustomSections::Subsection::NameModule); - writeEscapedName(wasm->name.str); + writeEscapedName(wasm->name.view()); finishSubsection(substart); } @@ -946,7 +947,7 @@ void WasmBinaryWriter::writeNames() { o << U32LEB(functionsWithNames.size()); for (auto& [index, global] : functionsWithNames) { o << U32LEB(index); - writeEscapedName(global->name.str); + writeEscapedName(global->name.view()); } finishSubsection(substart); } @@ -1006,7 +1007,7 @@ void WasmBinaryWriter::writeNames() { o << U32LEB(localsWithNames.size()); for (auto& [indexInBinary, name] : localsWithNames) { o << U32LEB(indexInBinary); - writeEscapedName(name.str); + writeEscapedName(name.view()); } emitted++; } @@ -1029,7 +1030,7 @@ void WasmBinaryWriter::writeNames() { o << U32LEB(namedTypes.size()); for (auto type : namedTypes) { o << U32LEB(indexedTypes.indices[type]); - writeEscapedName(wasm->typeNames[type].name.str); + writeEscapedName(wasm->typeNames[type].name.view()); } finishSubsection(substart); } @@ -1056,7 +1057,7 @@ void WasmBinaryWriter::writeNames() { for (auto& [index, table] : tablesWithNames) { o << U32LEB(index); - writeEscapedName(table->name.str); + writeEscapedName(table->name.view()); } finishSubsection(substart); @@ -1082,7 +1083,7 @@ void WasmBinaryWriter::writeNames() { o << U32LEB(memoriesWithNames.size()); for (auto& [index, memory] : memoriesWithNames) { o << U32LEB(index); - writeEscapedName(memory->name.str); + writeEscapedName(memory->name.view()); } finishSubsection(substart); } @@ -1107,7 +1108,7 @@ void WasmBinaryWriter::writeNames() { o << U32LEB(globalsWithNames.size()); for (auto& [index, global] : globalsWithNames) { o << U32LEB(index); - writeEscapedName(global->name.str); + writeEscapedName(global->name.view()); } finishSubsection(substart); } @@ -1132,7 +1133,7 @@ void WasmBinaryWriter::writeNames() { for (auto& [index, elem] : elemsWithNames) { o << U32LEB(index); - writeEscapedName(elem->name.str); + writeEscapedName(elem->name.view()); } finishSubsection(substart); @@ -1156,7 +1157,7 @@ void WasmBinaryWriter::writeNames() { auto& seg = wasm->dataSegments[i]; if (seg->hasExplicitName) { o << U32LEB(i); - writeEscapedName(seg->name.str); + writeEscapedName(seg->name.view()); } } finishSubsection(substart); @@ -1187,7 +1188,7 @@ void WasmBinaryWriter::writeNames() { o << U32LEB(fieldNames.size()); for (auto& [index, name] : fieldNames) { o << U32LEB(index); - writeEscapedName(name.str); + writeEscapedName(name.view()); } } finishSubsection(substart); @@ -1213,7 +1214,7 @@ void WasmBinaryWriter::writeNames() { o << U32LEB(tagsWithNames.size()); for (auto& [index, tag] : tagsWithNames) { o << U32LEB(index); - writeEscapedName(tag->name.str); + writeEscapedName(tag->name.view()); } finishSubsection(substart); } @@ -1232,7 +1233,8 @@ void WasmBinaryWriter::writeSourceMapUrl() { void WasmBinaryWriter::writeSymbolMap() { std::ofstream file(symbolMap); auto write = [&](Function* func) { - file << getFunctionIndex(func->name) << ":" << func->name.str << std::endl; + file << getFunctionIndex(func->name) << ":" << func->name.view() + << std::endl; }; ModuleUtils::iterImportedFunctions(*wasm, write); ModuleUtils::iterDefinedFunctions(*wasm, write); @@ -1523,7 +1525,7 @@ void WasmBinaryWriter::writeLegacyDylinkSection() { o << U32LEB(wasm->dylinkSection->tableAlignment); o << U32LEB(wasm->dylinkSection->neededDynlibs.size()); for (auto& neededDynlib : wasm->dylinkSection->neededDynlibs) { - writeInlineString(neededDynlib.str); + writeInlineString(neededDynlib.view()); } finishSection(start); } @@ -1554,7 +1556,7 @@ void WasmBinaryWriter::writeDylinkSection() { startSubsection(BinaryConsts::CustomSections::Subsection::DylinkNeeded); o << U32LEB(wasm->dylinkSection->neededDynlibs.size()); for (auto& neededDynlib : wasm->dylinkSection->neededDynlibs) { - writeInlineString(neededDynlib.str); + writeInlineString(neededDynlib.view()); } finishSubsection(substart); } @@ -1744,7 +1746,7 @@ std::optional WasmBinaryWriter::writeExpressionHints( // We found data: emit the section. buffer << uint8_t(BinaryConsts::Custom); auto lebPos = buffer.writeU32LEBPlaceholder(); - buffer.writeInlineString(sectionName.str); + buffer.writeInlineString(sectionName.view()); buffer << U32LEB(funcHintsVec.size()); for (auto& funcHints : funcHintsVec) { @@ -2279,7 +2281,7 @@ void WasmBinaryReader::readCustomSection(size_t payloadLen) { } wasm.customSections.resize(wasm.customSections.size() + 1); auto& section = wasm.customSections.back(); - section.name = sectionName.str; + section.name = sectionName.view(); auto data = getByteView(payloadLen); section.data = {data.begin(), data.end()}; } @@ -4926,7 +4928,7 @@ void WasmBinaryReader::readStrings() { auto string = getInlineString(false); // Re-encode from WTF-8 to WTF-16. std::stringstream wtf16; - if (!String::convertWTF8ToWTF16(wtf16, string.str)) { + if (!String::convertWTF8ToWTF16(wtf16, string.view())) { throwError("invalid string constant"); } // TODO: Use wtf16.view() once we have C++20. @@ -5208,7 +5210,7 @@ static char formatNibble(int nibble) { Name WasmBinaryReader::escape(Name name) { bool allIdChars = true; - for (char c : name.str) { + for (char c : name.view()) { if (!(allIdChars = isIdChar(c))) { break; } @@ -5218,7 +5220,7 @@ Name WasmBinaryReader::escape(Name name) { } // encode name, if at least one non-idchar (per WebAssembly spec) was found std::string escaped; - for (char c : name.str) { + for (char c : name.view()) { if (isIdChar(c)) { escaped.push_back(c); continue; diff --git a/src/wasm2js.h b/src/wasm2js.h index 08ac6e6e26e..5eb937b0f88 100644 --- a/src/wasm2js.h +++ b/src/wasm2js.h @@ -126,7 +126,7 @@ bool needsBufferView(Module& wasm) { return need; } -IString stringToIString(std::string str) { return IString(str.c_str(), false); } +IString stringToIString(std::string str) { return IString(str.c_str()); } // Used when taking a wasm name and generating a JS identifier. Each scope here // is used to ensure that all names have a unique name but the same wasm name @@ -215,8 +215,7 @@ class Wasm2JSBuilder { auto index = temps[type]++; ret = IString((std::string("wasm2js_") + type.toString() + "$" + std::to_string(index)) - .c_str(), - false); + .c_str()); ret = fromName(ret, NameScope::Local); } if (func->localIndices.find(ret) == func->localIndices.end()) { @@ -427,7 +426,7 @@ Ref Wasm2JSBuilder::processWasm(Module* wasm, Name funcName) { Output out(flags.symbolsFile, wasm::Flags::Text); Index i = 0; for (auto& func : wasm->functions) { - out.getStream() << i++ << ':' << func->name.str << '\n'; + out.getStream() << i++ << ':' << func->name.view() << '\n'; } } @@ -600,7 +599,7 @@ void Wasm2JSBuilder::addBasics(Ref ast, Module* wasm) { static bool needsQuoting(Name name) { auto mangled = asmangle(name.toString()); - return mangled != name.str; + return mangled != name.view(); } void Wasm2JSBuilder::ensureModuleVar(Ref ast, const Importable& imp) { @@ -1587,7 +1586,7 @@ Ref Wasm2JSBuilder::processExpression(Expression* curr, std::ostringstream out; out << lo << "," << hi; std::string os = out.str(); - IString name(os.c_str(), false); + IString name(os.c_str()); return ValueBuilder::makeName(name); } case Type::f32: { diff --git a/test/gtest/CMakeLists.txt b/test/gtest/CMakeLists.txt index 254f7a38c9f..05ce44006a6 100644 --- a/test/gtest/CMakeLists.txt +++ b/test/gtest/CMakeLists.txt @@ -19,6 +19,7 @@ set(unittest_SOURCES glbs.cpp interpreter.cpp intervals.cpp + istring.cpp json.cpp lattices.cpp local-graph.cpp diff --git a/test/gtest/istring.cpp b/test/gtest/istring.cpp new file mode 100644 index 00000000000..fc56075d8dc --- /dev/null +++ b/test/gtest/istring.cpp @@ -0,0 +1,86 @@ +// Copyright 2026 WebAssembly Community Group participants +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "support/istring.h" +#include "gtest/gtest.h" + +using namespace wasm; + +using IStringTest = ::testing::Test; + +TEST_F(IStringTest, Empty) { + // Null and empty strings differ. + auto null = IString(); + auto empty = IString(""); + EXPECT_NE(null, empty); + + // But they are equal to themselves. + EXPECT_EQ(null, null); + EXPECT_EQ(empty, empty); + + // A default string_view is a null. + auto default1 = std::string_view(); + EXPECT_EQ(default1.data(), nullptr); + auto default2 = std::string_view{}; + EXPECT_EQ(default2.data(), nullptr); + + EXPECT_EQ(null, default1); + EXPECT_EQ(null, default2); +} + +TEST_F(IStringTest, Interning) { + // The same string interned twice is equal. + auto foo1 = IString("foo"); + auto foo2 = IString("foo"); + EXPECT_EQ(foo1, foo2); + + // The internal pointers are equal too. + EXPECT_EQ(foo1.str.data(), foo2.str.data()); + + // Other things are different. + auto bar = IString("bar"); + EXPECT_NE(foo1, bar); + EXPECT_NE(foo2, bar); + + // Things are equal to themselves. + EXPECT_EQ(foo1, foo1); + EXPECT_EQ(foo2, foo2); + EXPECT_EQ(bar, bar); +} + +TEST_F(IStringTest, StartsWith) { + auto foo = IString("foo"); + EXPECT_TRUE(foo.startsWith("f")); + EXPECT_TRUE(foo.startsWith("fo")); + EXPECT_TRUE(foo.startsWith("foo")); + + EXPECT_FALSE(foo.startsWith("oo")); + EXPECT_FALSE(foo.startsWith("o")); + + EXPECT_FALSE(foo.startsWith("foobar")); + EXPECT_FALSE(foo.startsWith("bar")); +} + +TEST_F(IStringTest, EndsWith) { + auto foo = IString("foo"); + EXPECT_TRUE(foo.endsWith("o")); + EXPECT_TRUE(foo.endsWith("oo")); + EXPECT_TRUE(foo.endsWith("foo")); + + EXPECT_FALSE(foo.endsWith("f")); + EXPECT_FALSE(foo.endsWith("fo")); + + EXPECT_FALSE(foo.endsWith("foobar")); + EXPECT_FALSE(foo.endsWith("bar")); +}