From d50c00582accbb92b11065bc80c41970254a18a3 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 29 Apr 2026 16:05:41 -0700 Subject: [PATCH 01/21] go --- src/support/istring.cpp | 32 ++++++++----- src/support/istring.h | 104 +++++++++++++++++++++++++++++++++------- src/support/name.h | 1 + src/wasm-interpreter.h | 3 +- 4 files changed, 110 insertions(+), 30 deletions(-) diff --git a/src/support/istring.cpp b/src/support/istring.cpp index 60c8b59be6d..00131f44536 100644 --- a/src/support/istring.cpp +++ b/src/support/istring.cpp @@ -19,7 +19,11 @@ namespace wasm { -std::string_view IString::interned(std::string_view s, bool reuse) { +const char* IString::interned(std::string_view s) { + if (s.data() == nullptr) { + return nullptr; + } + // We need a set of string_views that can be modified in-place to minimize // the number of lookups we do. Since set elements cannot normally be // modified, wrap the string_views in a container that provides mutability @@ -57,7 +61,7 @@ std::string_view IString::interned(std::string_view s, bool reuse) { auto [localIt, localInserted] = localStrings.insert(s); if (!localInserted) { // We already had a local copy of this string. - return localIt->str; + return localIt->str.data(); } // No copy yet in the local cache. Check the global cache. @@ -66,22 +70,24 @@ std::string_view IString::interned(std::string_view s, bool reuse) { if (!globalInserted) { // We already had a global copy of this string. Cache it locally. localIt->str = globalIt->str; - return localIt->str; + return localIt->str.data(); } - if (!reuse) { - // We have a new string, but it doesn't have a stable address. Create a copy - // of the data at a stable address we can use. Make sure it is null - // terminated so legacy uses that get a C string still work. - char* data = (char*)arena.allocSpace(s.size() + 1, 1); - std::copy(s.begin(), s.end(), data); - data[s.size()] = '\0'; - s = std::string_view(data, s.size()); - } + // We have a new string. Create a copy of the data at a stable address with a + // header we can use. Make sure it is null terminated so legacy uses that get + // a C string still work. + uint32_t size = s.size(); + char* buffer = (char*)arena.allocSpace(sizeof(uint32_t) + size + 1, + alignof(uint32_t)); + *(uint32_t*)buffer = size; + char* data = buffer + sizeof(uint32_t); + std::copy(s.begin(), s.end(), data); + data[size] = '\0'; + s = std::string_view(data, size); // Intern our new string. localIt->str = globalIt->str = s; - return s; + return data; } } // namespace wasm diff --git a/src/support/istring.h b/src/support/istring.h index dad20b8fb00..d7383cfa1da 100644 --- a/src/support/istring.h +++ b/src/support/istring.h @@ -33,21 +33,84 @@ namespace wasm { struct IString { private: - static std::string_view interned(std::string_view s, bool reuse = true); + static const char* interned(std::string_view s); public: - const std::string_view str; + struct View { + const char* internal = nullptr; + const char* data() const { return internal; } + size_t size() const { + return internal ? *(const uint32_t*)(internal - 4) : 0; + } + char operator[](size_t x) const { return internal[x]; } + operator std::string_view() const { return {internal, size()}; } + + bool operator==(std::string_view other) const { + return std::string_view(*this) == other; + } + bool operator!=(std::string_view other) const { return !(*this == other); } + bool operator<(std::string_view other) const { + return std::string_view(*this) < other; + } + bool operator<=(std::string_view other) const { + return std::string_view(*this) <= other; + } + bool operator>(std::string_view other) const { + return std::string_view(*this) > other; + } + bool operator>=(std::string_view other) const { + return std::string_view(*this) >= other; + } + + friend bool operator==(const View& a, const std::string& b) { + return std::string_view(a) == b; + } + friend bool operator!=(const View& a, const std::string& b) { + return !(a == b); + } + friend bool operator==(const std::string& a, const View& b) { + return a == std::string_view(b); + } + friend bool operator!=(const std::string& a, const View& b) { + return !(a == b); + } + + friend bool operator==(const View& a, const char* b) { + return std::string_view(a) == b; + } + friend bool operator!=(const View& a, const char* b) { return !(a == b); } + friend bool operator==(const char* a, const View& b) { + return a == std::string_view(b); + } + friend bool operator!=(const char* a, const View& b) { return !(a == b); } + + std::string_view substr(size_t pos, size_t len = std::string_view::npos) const { + return std::string_view(*this).substr(pos, len); + } + size_t find(std::string_view s, size_t pos = 0) const { + return std::string_view(*this).find(s, pos); + } + size_t find_last_of(char c, size_t pos = std::string_view::npos) const { + return std::string_view(*this).find_last_of(c, pos); + } + const char* begin() const { return data(); } + const char* end() const { return data() + size(); } + + friend std::ostream& operator<<(std::ostream& os, const View& view) { + return os << std::string_view(view); + } + }; + const View str; IString() = default; - // TODO: This is a wildly unsafe default inherited from the previous - // implementation. Change it? - IString(std::string_view str, bool reuse = true) - : str(interned(str, reuse)) {} + IString(View v) : str(v) {} + + IString(std::string_view s, bool reuse = true) : str{interned(s)} {} // But other C strings generally do need to be copied. - IString(const char* str) : str(interned(str, false)) {} - IString(const std::string& str) : str(interned(str, false)) {} + IString(const char* str) : str{interned(str)} {} + IString(const std::string& str) : str{interned(str)} {} IString(const IString& other) = default; @@ -57,17 +120,24 @@ struct IString { bool operator==(const IString& other) const { // Fast! No need to compare contents due to interning - return str.data() == other.str.data(); + return str.internal == other.str.internal; } bool operator!=(const IString& other) const { return !(*this == other); } - bool operator<(const IString& other) const { return str < other.str; } - bool operator<=(const IString& other) const { return str <= other.str; } - bool operator>(const IString& other) const { return str > other.str; } - bool operator>=(const IString& other) const { return str >= other.str; } + bool operator<(const IString& other) const { + if (str.internal == other.str.internal) { + return false; + } + return std::string_view(str) < std::string_view(other.str); + } + bool operator<=(const IString& other) const { + return *this == other || *this < other; + } + bool operator>(const IString& other) const { return !(*this <= other); } + bool operator>=(const IString& other) const { return !(*this < other); } char operator[](int x) const { return str[x]; } - explicit operator bool() const { return str.data() != nullptr; } + explicit operator bool() const { return str.internal != nullptr; } // TODO: deprecate? bool is() const { return bool(*this); } @@ -81,7 +151,9 @@ struct IString { // TODO: Use C++20 `starts_with`. return str.substr(0, prefix.size()) == prefix; } - bool startsWith(IString str) const { return startsWith(str.str); } + bool startsWith(IString other) const { + return startsWith(std::string_view(other.str)); + } // Disambiguate for string literals. template bool startsWith(const char (&str)[N]) const { @@ -95,7 +167,7 @@ struct IString { } return str.substr(str.size() - suffix.size()) == suffix; } - bool endsWith(IString str) const { return endsWith(str.str); } + bool endsWith(IString other) const { return endsWith(std::string_view(other.str)); } // Disambiguate for string literals. template bool endsWith(const char (&str)[N]) const { diff --git a/src/support/name.h b/src/support/name.h index 8e3f7a291d5..a95e8ca71b0 100644 --- a/src/support/name.h +++ b/src/support/name.h @@ -36,6 +36,7 @@ struct Name : public IString { Name(std::string_view str) : IString(str, false) {} Name(const char* str) : IString(str, false) {} Name(IString str) : IString(str) {} + Name(IString::View str) : IString(str) {} Name(const std::string& str) : IString(str) {} // String literals do not need to be copied. Note: Not safe to construct from diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h index 8dd1bb4f4c6..7acdd6bcfc8 100644 --- a/src/wasm-interpreter.h +++ b/src/wasm-interpreter.h @@ -120,7 +120,8 @@ class Flow { } friend std::ostream& operator<<(std::ostream& o, const Flow& flow) { - o << "(flow " << (flow.breakTo.is() ? flow.breakTo.str : "-") << " : {"; + o << "(flow " + << (flow.breakTo.is() ? std::string_view(flow.breakTo.str) : "-") << " : {"; for (size_t i = 0; i < flow.values.size(); ++i) { if (i > 0) { o << ", "; From cb6a32f77dc2a303360484308fb56723533a8799 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 29 Apr 2026 16:05:49 -0700 Subject: [PATCH 02/21] style --- src/support/istring.cpp | 4 ++-- src/support/istring.h | 7 +++++-- src/wasm-interpreter.h | 3 ++- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/support/istring.cpp b/src/support/istring.cpp index 00131f44536..502ac3291c3 100644 --- a/src/support/istring.cpp +++ b/src/support/istring.cpp @@ -77,8 +77,8 @@ const char* IString::interned(std::string_view s) { // header we can use. Make sure it is null terminated so legacy uses that get // a C string still work. uint32_t size = s.size(); - char* buffer = (char*)arena.allocSpace(sizeof(uint32_t) + size + 1, - alignof(uint32_t)); + char* buffer = + (char*)arena.allocSpace(sizeof(uint32_t) + size + 1, alignof(uint32_t)); *(uint32_t*)buffer = size; char* data = buffer + sizeof(uint32_t); std::copy(s.begin(), s.end(), data); diff --git a/src/support/istring.h b/src/support/istring.h index d7383cfa1da..67f8cf1a56d 100644 --- a/src/support/istring.h +++ b/src/support/istring.h @@ -84,7 +84,8 @@ struct IString { } friend bool operator!=(const char* a, const View& b) { return !(a == b); } - std::string_view substr(size_t pos, size_t len = std::string_view::npos) const { + std::string_view substr(size_t pos, + size_t len = std::string_view::npos) const { return std::string_view(*this).substr(pos, len); } size_t find(std::string_view s, size_t pos = 0) const { @@ -167,7 +168,9 @@ struct IString { } return str.substr(str.size() - suffix.size()) == suffix; } - bool endsWith(IString other) const { return endsWith(std::string_view(other.str)); } + bool endsWith(IString other) const { + return endsWith(std::string_view(other.str)); + } // Disambiguate for string literals. template bool endsWith(const char (&str)[N]) const { diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h index 7acdd6bcfc8..7f64516afcb 100644 --- a/src/wasm-interpreter.h +++ b/src/wasm-interpreter.h @@ -121,7 +121,8 @@ class Flow { friend std::ostream& operator<<(std::ostream& o, const Flow& flow) { o << "(flow " - << (flow.breakTo.is() ? std::string_view(flow.breakTo.str) : "-") << " : {"; + << (flow.breakTo.is() ? std::string_view(flow.breakTo.str) : "-") + << " : {"; for (size_t i = 0; i < flow.values.size(); ++i) { if (i > 0) { o << ", "; From 21de09c7ae011ea7ad03ae30e2d2e1262d1278cf Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 29 Apr 2026 16:27:01 -0700 Subject: [PATCH 03/21] work --- src/support/istring.cpp | 7 +++++-- src/support/istring.h | 21 +++++++++++++++++---- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/src/support/istring.cpp b/src/support/istring.cpp index 502ac3291c3..6e0cb19f020 100644 --- a/src/support/istring.cpp +++ b/src/support/istring.cpp @@ -76,13 +76,16 @@ const char* IString::interned(std::string_view s) { // We have a new string. Create a copy of the data at a stable address with a // header we can use. Make sure it is null terminated so legacy uses that get // a C string still work. - uint32_t size = s.size(); + size_t size = s.size(); + // The string must fit in 32 bits, including the null terminator. + assert(size < std::numeric_limits::max()); char* buffer = (char*)arena.allocSpace(sizeof(uint32_t) + size + 1, alignof(uint32_t)); - *(uint32_t*)buffer = size; + *static_cast(buffer) = size; char* data = buffer + sizeof(uint32_t); std::copy(s.begin(), s.end(), data); data[size] = '\0'; + // TODO s = std::string_view(data, size); // Intern our new string. diff --git a/src/support/istring.h b/src/support/istring.h index 67f8cf1a56d..ecc3b40ac8b 100644 --- a/src/support/istring.h +++ b/src/support/istring.h @@ -36,11 +36,26 @@ struct IString { static const char* interned(std::string_view s); public: + // Strings are stored in Pascal style: a size followed by the characters. We + // keep the internal pointer pointing to the data, so that data() is a no-op; + // computing the size, which is more rare, requires looking back and doing a + // load. + // + // The size is limited to 4 bytes, so the maximum string we support is 4GB-1. + // + // The alternative approach of using a string_view here, i.e., keeping the + // pointer and size in the IString, uses 4% more memory. That is, this + // optimization saves a lot of space, because while it adds 4 bytes to each + // interned string itself, we tend to have many views on each (e.g. a single + // interned name of a function, and a view in every Call of it). + // + // We provide a View here, which is a string_view-like interface that hides + // those internal details. struct View { const char* internal = nullptr; const char* data() const { return internal; } size_t size() const { - return internal ? *(const uint32_t*)(internal - 4) : 0; + return internal ? *static_cast)(internal - 4) : 0; } char operator[](size_t x) const { return internal[x]; } operator std::string_view() const { return {internal, size()}; } @@ -107,9 +122,7 @@ struct IString { IString(View v) : str(v) {} - IString(std::string_view s, bool reuse = true) : str{interned(s)} {} - - // But other C strings generally do need to be copied. + IString(std::string_view s) : str{interned(s)} {} IString(const char* str) : str{interned(str)} {} IString(const std::string& str) : str{interned(str)} {} From 042cd554e23facd7286f4dbdf7a1e20ed094cd07 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 29 Apr 2026 16:29:53 -0700 Subject: [PATCH 04/21] bettr --- src/support/istring.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/support/istring.cpp b/src/support/istring.cpp index 6e0cb19f020..0fe2ddf57b8 100644 --- a/src/support/istring.cpp +++ b/src/support/istring.cpp @@ -29,12 +29,12 @@ const char* IString::interned(std::string_view s) { // modified, wrap the string_views in a container that provides mutability // even through a const reference. struct MutStringView { - mutable std::string_view str; - MutStringView(std::string_view str) : str(str) {} + mutable IString::View str; + MutStringView(IString::View str) : str(str) {} }; struct MutStringViewHash { size_t operator()(const MutStringView& mut) const { - return std::hash{}(mut.str); + return std::hash{}(mut.str); } }; struct MutStringViewEqual { @@ -85,11 +85,9 @@ const char* IString::interned(std::string_view s) { char* data = buffer + sizeof(uint32_t); std::copy(s.begin(), s.end(), data); data[size] = '\0'; - // TODO - s = std::string_view(data, size); // Intern our new string. - localIt->str = globalIt->str = s; + localIt->str = globalIt->str = IString::View(buffer); return data; } From 130d20beab39f94a470282733a3639834a5308da Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 29 Apr 2026 16:37:09 -0700 Subject: [PATCH 05/21] fix --- src/support/istring.cpp | 13 ++++++++----- src/support/istring.h | 2 +- src/support/name.h | 6 +++--- src/wasm2js.h | 4 ++-- 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/support/istring.cpp b/src/support/istring.cpp index 0fe2ddf57b8..17dc0618615 100644 --- a/src/support/istring.cpp +++ b/src/support/istring.cpp @@ -29,12 +29,12 @@ const char* IString::interned(std::string_view s) { // modified, wrap the string_views in a container that provides mutability // even through a const reference. struct MutStringView { - mutable IString::View str; - MutStringView(IString::View str) : str(str) {} + mutable std::string_view str; + MutStringView(std::string_view str) : str(str) {} }; struct MutStringViewHash { size_t operator()(const MutStringView& mut) const { - return std::hash{}(mut.str); + return std::hash{}(mut.str); } }; struct MutStringViewEqual { @@ -81,13 +81,16 @@ const char* IString::interned(std::string_view s) { assert(size < std::numeric_limits::max()); char* buffer = (char*)arena.allocSpace(sizeof(uint32_t) + size + 1, alignof(uint32_t)); - *static_cast(buffer) = size; + *(uint32_t*)(buffer) = size; char* data = buffer + sizeof(uint32_t); std::copy(s.begin(), s.end(), data); data[size] = '\0'; + // TODO: We store the size twice, once in the string_view and once in the + // data (right before it). A special wrapper could avoid that. + s = std::string_view(data, size); // Intern our new string. - localIt->str = globalIt->str = IString::View(buffer); + localIt->str = globalIt->str = s; return data; } diff --git a/src/support/istring.h b/src/support/istring.h index ecc3b40ac8b..2e646c29e8b 100644 --- a/src/support/istring.h +++ b/src/support/istring.h @@ -55,7 +55,7 @@ struct IString { const char* internal = nullptr; const char* data() const { return internal; } size_t size() const { - return internal ? *static_cast)(internal - 4) : 0; + return internal ? *(const uint32_t*)(internal - 4) : 0; } char operator[](size_t x) const { return internal[x]; } operator std::string_view() const { return {internal, size()}; } diff --git a/src/support/name.h b/src/support/name.h index a95e8ca71b0..52c20256d56 100644 --- a/src/support/name.h +++ b/src/support/name.h @@ -33,8 +33,8 @@ namespace wasm { struct Name : public IString { Name() : IString() {} - Name(std::string_view str) : IString(str, false) {} - Name(const char* str) : IString(str, false) {} + Name(std::string_view str) : IString(str) {} + Name(const char* str) : IString(str) {} Name(IString str) : IString(str) {} Name(IString::View str) : IString(str) {} Name(const std::string& str) : IString(str) {} @@ -52,7 +52,7 @@ struct Name : public IString { } static Name fromInt(size_t i) { - return IString(std::to_string(i).c_str(), false); + return IString(std::to_string(i).c_str()); } bool hasSubstring(IString substring) { diff --git a/src/wasm2js.h b/src/wasm2js.h index 735669d3d27..257d15084a3 100644 --- a/src/wasm2js.h +++ b/src/wasm2js.h @@ -126,7 +126,7 @@ bool needsBufferView(Module& wasm) { return need; } -IString stringToIString(std::string str) { return IString(str.c_str(), false); } +IString stringToIString(std::string str) { return IString(str.c_str()); } // Used when taking a wasm name and generating a JS identifier. Each scope here // is used to ensure that all names have a unique name but the same wasm name @@ -1587,7 +1587,7 @@ Ref Wasm2JSBuilder::processExpression(Expression* curr, std::ostringstream out; out << lo << "," << hi; std::string os = out.str(); - IString name(os.c_str(), false); + IString name(os.c_str()); return ValueBuilder::makeName(name); } case Type::f32: { From 7eaf41d0df3faefd1fd5981a484fbb346d13674e Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 29 Apr 2026 16:37:16 -0700 Subject: [PATCH 06/21] foemat --- src/support/name.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/support/name.h b/src/support/name.h index 52c20256d56..ed361cbf3d7 100644 --- a/src/support/name.h +++ b/src/support/name.h @@ -51,9 +51,7 @@ struct Name : public IString { } } - static Name fromInt(size_t i) { - return IString(std::to_string(i).c_str()); - } + static Name fromInt(size_t i) { return IString(std::to_string(i).c_str()); } bool hasSubstring(IString substring) { // TODO: Use C++23 `contains`. From ad6118f18b10c2fa9396e844e213dff902904487 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 29 Apr 2026 16:40:20 -0700 Subject: [PATCH 07/21] fix --- src/wasm2js.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/wasm2js.h b/src/wasm2js.h index 257d15084a3..2cb6d290610 100644 --- a/src/wasm2js.h +++ b/src/wasm2js.h @@ -215,8 +215,7 @@ class Wasm2JSBuilder { auto index = temps[type]++; ret = IString((std::string("wasm2js_") + type.toString() + "$" + std::to_string(index)) - .c_str(), - false); + .c_str()); ret = fromName(ret, NameScope::Local); } if (func->localIndices.find(ret) == func->localIndices.end()) { From fde833da6bf91c76626df01e88a78cf73c80ea75 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 30 Apr 2026 08:16:56 -0700 Subject: [PATCH 08/21] more --- src/support/istring.cpp | 42 ++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/src/support/istring.cpp b/src/support/istring.cpp index 17dc0618615..af9f6aa8d55 100644 --- a/src/support/istring.cpp +++ b/src/support/istring.cpp @@ -14,6 +14,9 @@ * limitations under the License. */ +#include +#include + #include "istring.h" #include "mixed_arena.h" @@ -24,26 +27,25 @@ const char* IString::interned(std::string_view s) { return nullptr; } - // We need a set of string_views that can be modified in-place to minimize - // the number of lookups we do. Since set elements cannot normally be - // modified, wrap the string_views in a container that provides mutability - // even through a const reference. - struct MutStringView { - mutable std::string_view str; - MutStringView(std::string_view str) : str(str) {} - }; - struct MutStringViewHash { - size_t operator()(const MutStringView& mut) const { - return std::hash{}(mut.str); + // A set of interned Views. The ones in the set are already interned, while + // we will compare it to a View constructed "manually", which is not yet + // interned. This requires us to be careful with hashing and equality, and + // make sure we do them using a string_view. + + struct InternedHash { + size_t operator()(View v) const { return std::hash{}(std::string_view(v)); } + size_t operator()(std::string_view sv) const { + return std::hash{}(sv); } }; - struct MutStringViewEqual { - bool operator()(const MutStringView& a, const MutStringView& b) const { - return a.str == b.str; - } + struct InternedEqual { + using is_transparent = void; + bool operator()(View a, View b) const { return std::string_view(a) == std::string_view(b); } + bool operator()(std::string_view a, View b) const { return a == std::string_view(b); } + bool operator()(View a, std::string_view b) const { return std::string_view(a) == b; } + bool operator()(std::string_view a, std::string_view b) const { return a == b; } }; - using StringSet = - std::unordered_set; + using StringSet = std::unordered_set; // The authoritative global set of interned string views. static StringSet globalStrings; @@ -63,7 +65,7 @@ const char* IString::interned(std::string_view s) { // We already had a local copy of this string. return localIt->str.data(); } - + // No copy yet in the local cache. Check the global cache. std::unique_lock lock(mutex); auto [globalIt, globalInserted] = globalStrings.insert(s); @@ -85,11 +87,9 @@ const char* IString::interned(std::string_view s) { char* data = buffer + sizeof(uint32_t); std::copy(s.begin(), s.end(), data); data[size] = '\0'; - // TODO: We store the size twice, once in the string_view and once in the - // data (right before it). A special wrapper could avoid that. - s = std::string_view(data, size); // Intern our new string. + View v{data}; localIt->str = globalIt->str = s; return data; } From 2552f5f9c9b4143d8fb780c604e0329c8d6daaea Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 30 Apr 2026 08:20:27 -0700 Subject: [PATCH 09/21] more --- src/support/istring.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/support/istring.cpp b/src/support/istring.cpp index af9f6aa8d55..8de81d1f3ec 100644 --- a/src/support/istring.cpp +++ b/src/support/istring.cpp @@ -33,6 +33,7 @@ const char* IString::interned(std::string_view s) { // make sure we do them using a string_view. struct InternedHash { + using is_transparent = void; size_t operator()(View v) const { return std::hash{}(std::string_view(v)); } size_t operator()(std::string_view sv) const { return std::hash{}(sv); @@ -60,19 +61,17 @@ const char* IString::interned(std::string_view s) { // A thread-local cache of strings to reduce contention. thread_local static StringSet localStrings; - auto [localIt, localInserted] = localStrings.insert(s); - if (!localInserted) { + if (auto it = localStrings.find(s); it != localStrings.end()) { // We already had a local copy of this string. - return localIt->str.data(); + return it->internal; } - + // No copy yet in the local cache. Check the global cache. std::unique_lock lock(mutex); - auto [globalIt, globalInserted] = globalStrings.insert(s); - if (!globalInserted) { + if (auto it = globalStrings.find(s); it != globalStrings.end()) { // We already had a global copy of this string. Cache it locally. - localIt->str = globalIt->str; - return localIt->str.data(); + localStrings.insert(*it); + return it->internal; } // We have a new string. Create a copy of the data at a stable address with a @@ -90,7 +89,8 @@ const char* IString::interned(std::string_view s) { // Intern our new string. View v{data}; - localIt->str = globalIt->str = s; + globalStrings.insert(v); + localStrings.insert(v); return data; } From 6b033d35ec756630915aa615e0d63b5bbf0c2ae9 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 30 Apr 2026 09:09:25 -0700 Subject: [PATCH 10/21] format --- src/support/istring.cpp | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/support/istring.cpp b/src/support/istring.cpp index 8de81d1f3ec..247a2be78d9 100644 --- a/src/support/istring.cpp +++ b/src/support/istring.cpp @@ -34,17 +34,27 @@ const char* IString::interned(std::string_view s) { struct InternedHash { using is_transparent = void; - size_t operator()(View v) const { return std::hash{}(std::string_view(v)); } + size_t operator()(View v) const { + return std::hash{}(std::string_view(v)); + } size_t operator()(std::string_view sv) const { return std::hash{}(sv); } }; struct InternedEqual { using is_transparent = void; - bool operator()(View a, View b) const { return std::string_view(a) == std::string_view(b); } - bool operator()(std::string_view a, View b) const { return a == std::string_view(b); } - bool operator()(View a, std::string_view b) const { return std::string_view(a) == b; } - bool operator()(std::string_view a, std::string_view b) const { return a == b; } + bool operator()(View a, View b) const { + return std::string_view(a) == std::string_view(b); + } + bool operator()(std::string_view a, View b) const { + return a == std::string_view(b); + } + bool operator()(View a, std::string_view b) const { + return std::string_view(a) == b; + } + bool operator()(std::string_view a, std::string_view b) const { + return a == b; + } }; using StringSet = std::unordered_set; From b1016ccdbdb01fb8fee045e1c57cb8b0bc457f83 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 30 Apr 2026 09:39:56 -0700 Subject: [PATCH 11/21] cleanup --- src/support/istring.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/support/istring.cpp b/src/support/istring.cpp index 247a2be78d9..a0b11bc6d7a 100644 --- a/src/support/istring.cpp +++ b/src/support/istring.cpp @@ -27,11 +27,11 @@ const char* IString::interned(std::string_view s) { return nullptr; } - // A set of interned Views. The ones in the set are already interned, while - // we will compare it to a View constructed "manually", which is not yet - // interned. This requires us to be careful with hashing and equality, and - // make sure we do them using a string_view. - + // A set of interned Views, i.e., that contains our pascal-style strings. We + // need to query this using a std::string_view, as that is what we receive as + // input (turning it into pascal-style storage would add overhead). To do so, + // use overloading in the hash and equality functions (which works thanks to + // `is_transparent`). struct InternedHash { using is_transparent = void; size_t operator()(View v) const { @@ -88,8 +88,8 @@ const char* IString::interned(std::string_view s) { // header we can use. Make sure it is null terminated so legacy uses that get // a C string still work. size_t size = s.size(); - // The string must fit in 32 bits, including the null terminator. - assert(size < std::numeric_limits::max()); + // The string must fit in 32 bits, including the null terminator and size. + assert(size < uint64_t(std::numeric_limits::max()) + sizeof(uint32_t)); char* buffer = (char*)arena.allocSpace(sizeof(uint32_t) + size + 1, alignof(uint32_t)); *(uint32_t*)(buffer) = size; From 303eb0e4c4f161839ebf8b0d0acf21e751f03ef4 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 30 Apr 2026 09:40:53 -0700 Subject: [PATCH 12/21] oops --- src/support/istring.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/support/istring.cpp b/src/support/istring.cpp index a0b11bc6d7a..c9a1160b34f 100644 --- a/src/support/istring.cpp +++ b/src/support/istring.cpp @@ -89,7 +89,7 @@ const char* IString::interned(std::string_view s) { // a C string still work. size_t size = s.size(); // The string must fit in 32 bits, including the null terminator and size. - assert(size < uint64_t(std::numeric_limits::max()) + sizeof(uint32_t)); + assert(size < std::numeric_limits::max() - sizeof(uint32_t)); char* buffer = (char*)arena.allocSpace(sizeof(uint32_t) + size + 1, alignof(uint32_t)); *(uint32_t*)(buffer) = size; From 11857b3a92d52bed848d17540bab9d9057798887 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 30 Apr 2026 09:42:15 -0700 Subject: [PATCH 13/21] oops --- src/support/istring.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/support/istring.cpp b/src/support/istring.cpp index c9a1160b34f..9ae7413454f 100644 --- a/src/support/istring.cpp +++ b/src/support/istring.cpp @@ -88,8 +88,8 @@ const char* IString::interned(std::string_view s) { // header we can use. Make sure it is null terminated so legacy uses that get // a C string still work. size_t size = s.size(); - // The string must fit in 32 bits, including the null terminator and size. - assert(size < std::numeric_limits::max() - sizeof(uint32_t)); + // The string must fit in 32 bits, including the null terminator. + assert(size < std::numeric_limits::max()); char* buffer = (char*)arena.allocSpace(sizeof(uint32_t) + size + 1, alignof(uint32_t)); *(uint32_t*)(buffer) = size; From 78d8a43570e48a0057d4dddbba783aa0aa5eb278 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 30 Apr 2026 09:43:17 -0700 Subject: [PATCH 14/21] oops --- src/support/istring.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/support/istring.cpp b/src/support/istring.cpp index 9ae7413454f..330bfb1900b 100644 --- a/src/support/istring.cpp +++ b/src/support/istring.cpp @@ -88,8 +88,8 @@ const char* IString::interned(std::string_view s) { // header we can use. Make sure it is null terminated so legacy uses that get // a C string still work. size_t size = s.size(); - // The string must fit in 32 bits, including the null terminator. - assert(size < std::numeric_limits::max()); + // The string's size must fit in 32 bits. + assert(size <= std::numeric_limits::max()); char* buffer = (char*)arena.allocSpace(sizeof(uint32_t) + size + 1, alignof(uint32_t)); *(uint32_t*)(buffer) = size; From 2982bef3aaec34d6e7305599092b6184b3ae7ab6 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 30 Apr 2026 09:44:27 -0700 Subject: [PATCH 15/21] clean --- src/support/istring.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/support/istring.h b/src/support/istring.h index 2e646c29e8b..8d67d31739e 100644 --- a/src/support/istring.h +++ b/src/support/istring.h @@ -41,13 +41,12 @@ struct IString { // computing the size, which is more rare, requires looking back and doing a // load. // - // The size is limited to 4 bytes, so the maximum string we support is 4GB-1. + // The size is limited to 4 bytes, so the maximum string we support is 4GB. // // The alternative approach of using a string_view here, i.e., keeping the - // pointer and size in the IString, uses 4% more memory. That is, this + // pointer and size in the IString, uses more more memory. That is, this // optimization saves a lot of space, because while it adds 4 bytes to each - // interned string itself, we tend to have many views on each (e.g. a single - // interned name of a function, and a view in every Call of it). + // interned string itself, we tend to have many views on each. // // We provide a View here, which is a string_view-like interface that hides // those internal details. From 9c4f3540ed4ff389309bdc2aba86a240c15a7e61 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 4 May 2026 16:22:45 -0700 Subject: [PATCH 16/21] simplify, require users to convert to std::string_view if they want nice things --- src/passes/J2CLOpts.cpp | 2 +- src/support/istring.h | 84 ++++++----------------------------- src/support/name.cpp | 1 + src/support/name.h | 2 +- src/tools/execution-results.h | 2 +- src/tools/wasm2c-wrapper.h | 2 +- src/wasm/wasm-binary.cpp | 4 +- src/wasm2js.h | 4 +- 8 files changed, 22 insertions(+), 79 deletions(-) diff --git a/src/passes/J2CLOpts.cpp b/src/passes/J2CLOpts.cpp index 759ef7cc287..0474a31ef74 100644 --- a/src/passes/J2CLOpts.cpp +++ b/src/passes/J2CLOpts.cpp @@ -197,7 +197,7 @@ class ConstantHoister : public WalkerPass> { } Name getEnclosingClass(Name name) { - return Name(name.str.substr(name.str.find_last_of('@'))); + return Name(name.view().substr(name.view().find_last_of('@'))); } AssignmentCountMap& assignmentCounts; diff --git a/src/support/istring.h b/src/support/istring.h index 8d67d31739e..ccadcf3ed04 100644 --- a/src/support/istring.h +++ b/src/support/istring.h @@ -48,8 +48,8 @@ struct IString { // optimization saves a lot of space, because while it adds 4 bytes to each // interned string itself, we tend to have many views on each. // - // We provide a View here, which is a string_view-like interface that hides - // those internal details. + // We provide a View here, which is a simple interface. Users that need more + // convert to a std::string_view with .view() or a cast. struct View { const char* internal = nullptr; const char* data() const { return internal; } @@ -58,65 +58,11 @@ struct IString { } char operator[](size_t x) const { return internal[x]; } operator std::string_view() const { return {internal, size()}; } - - bool operator==(std::string_view other) const { - return std::string_view(*this) == other; - } - bool operator!=(std::string_view other) const { return !(*this == other); } - bool operator<(std::string_view other) const { - return std::string_view(*this) < other; - } - bool operator<=(std::string_view other) const { - return std::string_view(*this) <= other; - } - bool operator>(std::string_view other) const { - return std::string_view(*this) > other; - } - bool operator>=(std::string_view other) const { - return std::string_view(*this) >= other; - } - - friend bool operator==(const View& a, const std::string& b) { - return std::string_view(a) == b; - } - friend bool operator!=(const View& a, const std::string& b) { - return !(a == b); - } - friend bool operator==(const std::string& a, const View& b) { - return a == std::string_view(b); - } - friend bool operator!=(const std::string& a, const View& b) { - return !(a == b); - } - - friend bool operator==(const View& a, const char* b) { - return std::string_view(a) == b; - } - friend bool operator!=(const View& a, const char* b) { return !(a == b); } - friend bool operator==(const char* a, const View& b) { - return a == std::string_view(b); - } - friend bool operator!=(const char* a, const View& b) { return !(a == b); } - - std::string_view substr(size_t pos, - size_t len = std::string_view::npos) const { - return std::string_view(*this).substr(pos, len); - } - size_t find(std::string_view s, size_t pos = 0) const { - return std::string_view(*this).find(s, pos); - } - size_t find_last_of(char c, size_t pos = std::string_view::npos) const { - return std::string_view(*this).find_last_of(c, pos); - } - const char* begin() const { return data(); } - const char* end() const { return data() + size(); } - - friend std::ostream& operator<<(std::ostream& os, const View& view) { - return os << std::string_view(view); - } }; const View str; + std::string_view view() const { return std::string_view(str); } + IString() = default; IString(View v) : str(v) {} @@ -140,7 +86,7 @@ struct IString { if (str.internal == other.str.internal) { return false; } - return std::string_view(str) < std::string_view(other.str); + return view() < other.view(); } bool operator<=(const IString& other) const { return *this == other || *this < other; @@ -162,15 +108,13 @@ struct IString { bool startsWith(std::string_view prefix) const { // TODO: Use C++20 `starts_with`. - return str.substr(0, prefix.size()) == prefix; - } - bool startsWith(IString other) const { - return startsWith(std::string_view(other.str)); + return view().substr(0, prefix.size()) == prefix; } + bool startsWith(IString other) const { return startsWith(other.view()); } // Disambiguate for string literals. template bool startsWith(const char (&str)[N]) const { - return startsWith(std::string_view(str)); + return startsWith(view()); } bool endsWith(std::string_view suffix) const { @@ -178,19 +122,17 @@ struct IString { if (suffix.size() > str.size()) { return false; } - return str.substr(str.size() - suffix.size()) == suffix; - } - bool endsWith(IString other) const { - return endsWith(std::string_view(other.str)); + return view().substr(str.size() - suffix.size()) == suffix; } + bool endsWith(IString other) const { return endsWith(other.view()); } // Disambiguate for string literals. template bool endsWith(const char (&str)[N]) const { - return endsWith(std::string_view(str)); + return endsWith(view()); } IString substr(size_t pos, size_t len = std::string_view::npos) const { - return IString(str.substr(pos, len)); + return IString(view().substr(pos, len)); } size_t size() const { return str.size(); } @@ -207,7 +149,7 @@ template<> struct hash { }; inline std::ostream& operator<<(std::ostream& os, const wasm::IString& str) { - return os << str.str; + return os << str.view(); } } // namespace std diff --git a/src/support/name.cpp b/src/support/name.cpp index 4c599defca0..34b7546fba9 100644 --- a/src/support/name.cpp +++ b/src/support/name.cpp @@ -46,6 +46,7 @@ std::ostream& Name::print(std::ostream& o) const { // TODO: This is not spec-compliant since the spec does not yet support // quoted identifiers and has a limited set of valid idchars. o << '$'; + auto str = view(); if (size() >= 1 && std::all_of(str.begin(), str.end(), isIDChar)) { return o << str; } else { diff --git a/src/support/name.h b/src/support/name.h index ed361cbf3d7..957ae800f1e 100644 --- a/src/support/name.h +++ b/src/support/name.h @@ -55,7 +55,7 @@ struct Name : public IString { bool hasSubstring(IString substring) { // TODO: Use C++23 `contains`. - return str.find(substring.str) != std::string_view::npos; + return view().find(substring.str) != std::string_view::npos; } std::ostream& print(std::ostream& o) const; diff --git a/src/tools/execution-results.h b/src/tools/execution-results.h index 7717ec23811..09f7657ee96 100644 --- a/src/tools/execution-results.h +++ b/src/tools/execution-results.h @@ -412,7 +412,7 @@ class FuzzerImportResolver // fuzz_shell.js. Index payload = 0; for (auto name : {name.module, name.name}) { - for (auto c : name.str) { + for (auto c : name.view()) { payload = (payload + static_cast(c)) % 251; } } diff --git a/src/tools/wasm2c-wrapper.h b/src/tools/wasm2c-wrapper.h index 242442a1d8a..e39bf54a3d2 100644 --- a/src/tools/wasm2c-wrapper.h +++ b/src/tools/wasm2c-wrapper.h @@ -30,7 +30,7 @@ namespace wasm { inline std::string wasm2cMangle(Name name, Signature sig) { const char escapePrefix = 'Z'; std::string mangled = "Z_"; - for (unsigned char c : name.str) { + for (unsigned char c : name.view()) { if ((isalnum(c) && c != escapePrefix) || c == '_') { // This character is ok to emit as it is. mangled += c; diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 61d505bd205..260a6f2d383 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -5208,7 +5208,7 @@ static char formatNibble(int nibble) { Name WasmBinaryReader::escape(Name name) { bool allIdChars = true; - for (char c : name.str) { + for (char c : name.view()) { if (!(allIdChars = isIdChar(c))) { break; } @@ -5218,7 +5218,7 @@ Name WasmBinaryReader::escape(Name name) { } // encode name, if at least one non-idchar (per WebAssembly spec) was found std::string escaped; - for (char c : name.str) { + for (char c : name.view()) { if (isIdChar(c)) { escaped.push_back(c); continue; diff --git a/src/wasm2js.h b/src/wasm2js.h index 2618223629f..5eb937b0f88 100644 --- a/src/wasm2js.h +++ b/src/wasm2js.h @@ -426,7 +426,7 @@ Ref Wasm2JSBuilder::processWasm(Module* wasm, Name funcName) { Output out(flags.symbolsFile, wasm::Flags::Text); Index i = 0; for (auto& func : wasm->functions) { - out.getStream() << i++ << ':' << func->name.str << '\n'; + out.getStream() << i++ << ':' << func->name.view() << '\n'; } } @@ -599,7 +599,7 @@ void Wasm2JSBuilder::addBasics(Ref ast, Module* wasm) { static bool needsQuoting(Name name) { auto mangled = asmangle(name.toString()); - return mangled != name.str; + return mangled != name.view(); } void Wasm2JSBuilder::ensureModuleVar(Ref ast, const Importable& imp) { From 3c581c6a93245158818ffbc99e56325b07ef5736 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 4 May 2026 16:56:01 -0700 Subject: [PATCH 17/21] handle null view --- src/support/istring.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/support/istring.h b/src/support/istring.h index ccadcf3ed04..fff711a2fd6 100644 --- a/src/support/istring.h +++ b/src/support/istring.h @@ -57,7 +57,13 @@ struct IString { return internal ? *(const uint32_t*)(internal - 4) : 0; } char operator[](size_t x) const { return internal[x]; } - operator std::string_view() const { return {internal, size()}; } + operator std::string_view() const { + if (!internal) { + // No size to read, just return an empty view. + return std::string_view(); + } + return {internal, size()}; + } }; const View str; From 2cf2ad92bdfe9250945e3e85d5dfcd3fcffe6004 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 5 May 2026 08:18:35 -0700 Subject: [PATCH 18/21] Add some gtesting --- test/gtest/CMakeLists.txt | 1 + test/gtest/istring.cpp | 51 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 test/gtest/istring.cpp diff --git a/test/gtest/CMakeLists.txt b/test/gtest/CMakeLists.txt index 254f7a38c9f..05ce44006a6 100644 --- a/test/gtest/CMakeLists.txt +++ b/test/gtest/CMakeLists.txt @@ -19,6 +19,7 @@ set(unittest_SOURCES glbs.cpp interpreter.cpp intervals.cpp + istring.cpp json.cpp lattices.cpp local-graph.cpp diff --git a/test/gtest/istring.cpp b/test/gtest/istring.cpp new file mode 100644 index 00000000000..bcd0560529f --- /dev/null +++ b/test/gtest/istring.cpp @@ -0,0 +1,51 @@ +// Copyright 2026 WebAssembly Community Group participants +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "support/istring.h" +#include "gtest/gtest.h" + +using namespace wasm; + +using IStringTest = ::testing::Test; + +TEST_F(IStringTest, Empty) { + // Null and empty strings differ. + auto null = IString(); + auto empty = IString(""); + EXPECT_NE(null, empty); + + // But they are equal to themselves. + EXPECT_EQ(null, null); + EXPECT_EQ(empty, empty); +} + +TEST_F(IStringTest, Interning) { + // The same string interned twice is equal. + auto foo1 = IString("foo"); + auto foo2 = IString("foo"); + EXPECT_EQ(foo1, foo2); + + // The internal pointers are equal too. + EXPECT_EQ(foo1.str.data(), foo2.str.data()); + + // Other things are different. + auto bar = IString("bar"); + EXPECT_NE(foo1, bar); + EXPECT_NE(foo2, bar); + + // Things are equal to themselves. + EXPECT_EQ(foo1, foo1); + EXPECT_EQ(foo2, foo2); + EXPECT_EQ(bar, bar); +} From 95730dd5c51fc59ec09c6dc0144ecf6ccb77a91b Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 5 May 2026 08:40:14 -0700 Subject: [PATCH 19/21] yikes, fix startsWith/endsWith --- src/support/istring.h | 4 ++-- test/gtest/istring.cpp | 26 ++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/support/istring.h b/src/support/istring.h index fff711a2fd6..8e86a035f1e 100644 --- a/src/support/istring.h +++ b/src/support/istring.h @@ -120,7 +120,7 @@ struct IString { // Disambiguate for string literals. template bool startsWith(const char (&str)[N]) const { - return startsWith(view()); + return startsWith(std::string_view(str)); } bool endsWith(std::string_view suffix) const { @@ -134,7 +134,7 @@ struct IString { // Disambiguate for string literals. template bool endsWith(const char (&str)[N]) const { - return endsWith(view()); + return endsWith(std::string_view(str)); } IString substr(size_t pos, size_t len = std::string_view::npos) const { diff --git a/test/gtest/istring.cpp b/test/gtest/istring.cpp index bcd0560529f..2830ecec436 100644 --- a/test/gtest/istring.cpp +++ b/test/gtest/istring.cpp @@ -49,3 +49,29 @@ TEST_F(IStringTest, Interning) { EXPECT_EQ(foo2, foo2); EXPECT_EQ(bar, bar); } + +TEST_F(IStringTest, StartsWith) { + auto foo = IString("foo"); + EXPECT_TRUE(foo.startsWith("f")); + EXPECT_TRUE(foo.startsWith("fo")); + EXPECT_TRUE(foo.startsWith("foo")); + + EXPECT_FALSE(foo.startsWith("oo")); + EXPECT_FALSE(foo.startsWith("o")); + + EXPECT_FALSE(foo.startsWith("foobar")); + EXPECT_FALSE(foo.startsWith("bar")); +} + +TEST_F(IStringTest, EndsWith) { + auto foo = IString("foo"); + EXPECT_TRUE(foo.endsWith("o")); + EXPECT_TRUE(foo.endsWith("oo")); + EXPECT_TRUE(foo.endsWith("foo")); + + EXPECT_FALSE(foo.endsWith("f")); + EXPECT_FALSE(foo.endsWith("fo")); + + EXPECT_FALSE(foo.endsWith("foobar")); + EXPECT_FALSE(foo.endsWith("bar")); +} From 313c6575d9f9442a5ca813be3302efaa5d2bb43e Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 5 May 2026 10:51:26 -0700 Subject: [PATCH 20/21] add tests --- test/gtest/istring.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/gtest/istring.cpp b/test/gtest/istring.cpp index 2830ecec436..fc56075d8dc 100644 --- a/test/gtest/istring.cpp +++ b/test/gtest/istring.cpp @@ -28,6 +28,15 @@ TEST_F(IStringTest, Empty) { // But they are equal to themselves. EXPECT_EQ(null, null); EXPECT_EQ(empty, empty); + + // A default string_view is a null. + auto default1 = std::string_view(); + EXPECT_EQ(default1.data(), nullptr); + auto default2 = std::string_view{}; + EXPECT_EQ(default2.data(), nullptr); + + EXPECT_EQ(null, default1); + EXPECT_EQ(null, default2); } TEST_F(IStringTest, Interning) { From 81c6675e62721ab8b364ec57c80e51831e2e2d55 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 5 May 2026 13:01:20 -0700 Subject: [PATCH 21/21] avoid implicit casts --- src/ir/names.h | 2 +- src/ir/possible-contents.cpp | 3 +- src/parser/contexts.h | 2 +- src/passes/Asyncify.cpp | 2 +- src/passes/MinifyImportsAndExports.cpp | 10 +++--- src/passes/Print.cpp | 8 ++--- src/passes/StringLifting.cpp | 8 ++--- src/passes/StringLowering.cpp | 6 ++-- src/support/istring.cpp | 14 +++----- src/support/istring.h | 10 +++--- src/support/json.cpp | 2 +- src/support/name.h | 2 +- src/tools/wasm-metadce.cpp | 2 +- src/wasm-interpreter.h | 8 ++--- src/wasm/wasm-binary.cpp | 46 ++++++++++++++------------ 15 files changed, 60 insertions(+), 65 deletions(-) diff --git a/src/ir/names.h b/src/ir/names.h index 61fc137f5a8..083a54f0ef5 100644 --- a/src/ir/names.h +++ b/src/ir/names.h @@ -56,7 +56,7 @@ inline Name getValidName(Name root, if (check(root)) { return root; } - auto prefixed = std::string(root.str) + separator; + auto prefixed = std::string(root.view()) + separator; Index num = hint; while (1) { auto name = prefixed + std::to_string(num); diff --git a/src/ir/possible-contents.cpp b/src/ir/possible-contents.cpp index 3e1213cd05b..4b76f467825 100644 --- a/src/ir/possible-contents.cpp +++ b/src/ir/possible-contents.cpp @@ -1211,8 +1211,7 @@ struct InfoCollector addRoot(curr, PossibleContents::exactType(curr->type)); } void visitStringConst(StringConst* curr) { - addRoot(curr, - PossibleContents::literal(Literal(std::string(curr->string.str)))); + addRoot(curr, PossibleContents::literal(Literal(curr->string.view()))); } void visitStringMeasure(StringMeasure* curr) { // TODO: optimize when possible diff --git a/src/parser/contexts.h b/src/parser/contexts.h index eac35a543c5..06a515e32d1 100644 --- a/src/parser/contexts.h +++ b/src/parser/contexts.h @@ -1996,7 +1996,7 @@ struct ParseDefsCtx : TypeParserCtx, AnnotationParserCtx { void setSrcLoc(const std::vector& annotations) { const Annotation* annotation = nullptr; for (auto& a : annotations) { - if (a.kind.str == std::string_view("src")) { + if (a.kind.view() == std::string_view("src")) { annotation = &a; } } diff --git a/src/passes/Asyncify.cpp b/src/passes/Asyncify.cpp index a8410a24c05..feea9adf7c2 100644 --- a/src/passes/Asyncify.cpp +++ b/src/passes/Asyncify.cpp @@ -1730,7 +1730,7 @@ struct AsyncifyLocals : public WalkerPass> { } // anonymous namespace static std::string getFullImportName(Name module, Name base) { - return std::string(module.str) + '.' + base.toString(); + return module.toString() + '.' + base.toString(); } struct Asyncify : public Pass { diff --git a/src/passes/MinifyImportsAndExports.cpp b/src/passes/MinifyImportsAndExports.cpp index 882f9a8b5d9..8c43ee694c3 100644 --- a/src/passes/MinifyImportsAndExports.cpp +++ b/src/passes/MinifyImportsAndExports.cpp @@ -112,9 +112,9 @@ struct MinifyImportsAndExports : public Pass { std::cout << ','; } std::cout << "\n ["; - String::printEscaped(std::cout, key.first.str) << ", "; - String::printEscaped(std::cout, key.second.str) << ", "; - String::printEscaped(std::cout, new_.str) << "]"; + String::printEscaped(std::cout, key.first.view()) << ", "; + String::printEscaped(std::cout, key.second.view()) << ", "; + String::printEscaped(std::cout, new_.view()) << "]"; } } std::cout << "\n ],\n\"exports\": ["; @@ -127,8 +127,8 @@ struct MinifyImportsAndExports : public Pass { std::cout << ','; } std::cout << "\n ["; - String::printEscaped(std::cout, key.second.str) << ", "; - String::printEscaped(std::cout, new_.str) << "]"; + String::printEscaped(std::cout, key.second.view()) << ", "; + String::printEscaped(std::cout, new_.view()) << "]"; } } std::cout << "\n ]\n"; diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index 4ffdb031dd4..d22168f359d 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -2566,7 +2566,7 @@ struct PrintExpressionContents // Re-encode from WTF-16 to WTF-8. std::stringstream wtf8; [[maybe_unused]] bool valid = - String::convertWTF16ToWTF8(wtf8, curr->string.str); + String::convertWTF16ToWTF8(wtf8, curr->string.view()); assert(valid); // TODO: Use wtf8.view() once we have C++20. String::printEscaped(o, wtf8.str()); @@ -3190,7 +3190,7 @@ void PrintSExpression::visitExport(Export* curr) { o << '('; printMedium(o, "export "); std::stringstream escaped; - String::printEscaped(escaped, curr->name.str); + String::printEscaped(escaped, curr->name.view()); printText(o, escaped.str(), false) << " ("; switch (curr->kind) { case ExternalKind::Function: @@ -3219,8 +3219,8 @@ void PrintSExpression::visitExport(Export* curr) { void PrintSExpression::emitImportHeader(Importable* curr) { printMedium(o, "import "); std::stringstream escapedModule, escapedBase; - String::printEscaped(escapedModule, curr->module.str); - String::printEscaped(escapedBase, curr->base.str); + String::printEscaped(escapedModule, curr->module.view()); + String::printEscaped(escapedBase, curr->base.view()); printText(o, escapedModule.str(), false) << ' '; printText(o, escapedBase.str(), false) << ' '; } diff --git a/src/passes/StringLifting.cpp b/src/passes/StringLifting.cpp index cd3a8ffabb6..ac15d2a8614 100644 --- a/src/passes/StringLifting.cpp +++ b/src/passes/StringLifting.cpp @@ -70,11 +70,11 @@ struct StringLifting : public Pass { // Encode from WTF-8 to WTF-16. auto wtf8 = global->base; std::stringstream wtf16; - bool valid = String::convertWTF8ToWTF16(wtf16, wtf8.str); + bool valid = String::convertWTF8ToWTF16(wtf16, wtf8.view()); if (!valid) { Fatal() << "Bad string to lift: " << wtf8; } - importedStrings[global->name] = wtf16.str(); + importedStrings[global->name] = wtf16.view(); found = true; } } @@ -101,7 +101,7 @@ struct StringLifting : public Pass { continue; } // The index in the array is the basename. - Index index = std::stoi(std::string(global->base.str)); + Index index = std::stoi(std::string(global->base.view())); if (index >= array.size()) { Fatal() << "StringLifting: bad index in string.const section"; } @@ -222,7 +222,7 @@ struct StringLifting : public Pass { auto iter = parent.importedStrings.find(curr->name); if (iter != parent.importedStrings.end()) { auto wtf16 = iter->second; - replaceCurrent(Builder(*getModule()).makeStringConst(wtf16.str)); + replaceCurrent(Builder(*getModule()).makeStringConst(wtf16.view())); modified = true; } } diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index b4641bda134..bd753efaf91 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -153,7 +153,7 @@ struct StringGathering : public Pass { // Re-encode from WTF-16 to WTF-8 to make the name easier to read. std::stringstream wtf8; [[maybe_unused]] bool valid = - String::convertWTF16ToWTF8(wtf8, string.str); + String::convertWTF16ToWTF8(wtf8, string.view()); assert(valid); // Then escape it because identifiers must be valid UTF-8. // TODO: Use wtf8.view() and escaped.view() once we have C++20. @@ -246,7 +246,7 @@ struct StringLowering : public StringGathering { if (auto* c = global->init->dynCast()) { std::stringstream utf8; if (useMagicImports && - String::convertUTF16ToUTF8(utf8, c->string.str)) { + String::convertUTF16ToUTF8(utf8, c->string.view())) { global->module = stringConstsModule; global->base = Name(utf8.str()); } else { @@ -263,7 +263,7 @@ struct StringLowering : public StringGathering { } else { json << ','; } - String::printEscapedJSON(json, c->string.str); + String::printEscapedJSON(json, c->string.view()); jsonImportIndex++; } global->init = nullptr; diff --git a/src/support/istring.cpp b/src/support/istring.cpp index 330bfb1900b..9156ff84b03 100644 --- a/src/support/istring.cpp +++ b/src/support/istring.cpp @@ -35,7 +35,7 @@ const char* IString::interned(std::string_view s) { struct InternedHash { using is_transparent = void; size_t operator()(View v) const { - return std::hash{}(std::string_view(v)); + return std::hash{}(v.view()); } size_t operator()(std::string_view sv) const { return std::hash{}(sv); @@ -43,15 +43,9 @@ const char* IString::interned(std::string_view s) { }; struct InternedEqual { using is_transparent = void; - bool operator()(View a, View b) const { - return std::string_view(a) == std::string_view(b); - } - bool operator()(std::string_view a, View b) const { - return a == std::string_view(b); - } - bool operator()(View a, std::string_view b) const { - return std::string_view(a) == b; - } + bool operator()(View a, View b) const { return a.view() == b.view(); } + bool operator()(std::string_view a, View b) const { return a == b.view(); } + bool operator()(View a, std::string_view b) const { return a.view() == b; } bool operator()(std::string_view a, std::string_view b) const { return a == b; } diff --git a/src/support/istring.h b/src/support/istring.h index 8e86a035f1e..a567eb82f12 100644 --- a/src/support/istring.h +++ b/src/support/istring.h @@ -57,17 +57,17 @@ struct IString { return internal ? *(const uint32_t*)(internal - 4) : 0; } char operator[](size_t x) const { return internal[x]; } - operator std::string_view() const { + std::string_view view() const { if (!internal) { - // No size to read, just return an empty view. - return std::string_view(); + // No size to read. + return {}; } return {internal, size()}; } }; const View str; - std::string_view view() const { return std::string_view(str); } + std::string_view view() const { return str.view(); } IString() = default; @@ -110,7 +110,7 @@ struct IString { std::string toString() const { return {str.data(), str.size()}; } - bool equals(std::string_view other) const { return str == other; } + bool equals(std::string_view other) const { return str.view() == other; } bool startsWith(std::string_view prefix) const { // TODO: Use C++20 `starts_with`. diff --git a/src/support/json.cpp b/src/support/json.cpp index dd94719d47d..ff393317410 100644 --- a/src/support/json.cpp +++ b/src/support/json.cpp @@ -23,7 +23,7 @@ void Value::stringify(std::ostream& os, bool pretty) { if (isString()) { std::stringstream wtf16; [[maybe_unused]] bool valid = - wasm::String::convertWTF8ToWTF16(wtf16, getIString().str); + wasm::String::convertWTF8ToWTF16(wtf16, getIString().view()); assert(valid); // TODO: Use wtf16.view() once we have C++20. wasm::String::printEscapedJSON(os, wtf16.str()); diff --git a/src/support/name.h b/src/support/name.h index 957ae800f1e..d6c71f732c9 100644 --- a/src/support/name.h +++ b/src/support/name.h @@ -55,7 +55,7 @@ struct Name : public IString { bool hasSubstring(IString substring) { // TODO: Use C++23 `contains`. - return view().find(substring.str) != std::string_view::npos; + return view().find(substring.view()) != std::string_view::npos; } std::ostream& print(std::ostream& o) const; diff --git a/src/tools/wasm-metadce.cpp b/src/tools/wasm-metadce.cpp index 96b2063df4b..29bba15aaf8 100644 --- a/src/tools/wasm-metadce.cpp +++ b/src/tools/wasm-metadce.cpp @@ -78,7 +78,7 @@ struct MetaDCEGraph { // to be kept alive. module = ENV; } - return std::string(module.str) + " (*) " + std::string(base.str); + return std::string(module.view()) + " (*) " + std::string(base.view()); } ImportId getImportId(ModuleItemKind kind, Name name) { diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h index 12a6bf32eaf..2f122ac3ea7 100644 --- a/src/wasm-interpreter.h +++ b/src/wasm-interpreter.h @@ -121,9 +121,7 @@ class Flow { } friend std::ostream& operator<<(std::ostream& o, const Flow& flow) { - o << "(flow " - << (flow.breakTo.is() ? std::string_view(flow.breakTo.str) : "-") - << " : {"; + o << "(flow " << (flow.breakTo.is() ? flow.breakTo.view() : "-") << " : {"; for (size_t i = 0; i < flow.values.size(); ++i) { if (i > 0) { o << ", "; @@ -2729,7 +2727,9 @@ class ExpressionRunner : public OverriddenVisitor { return Flow(NONCONSTANT_FLOW); } } - Flow visitStringConst(StringConst* curr) { return Literal(curr->string.str); } + Flow visitStringConst(StringConst* curr) { + return Literal(curr->string.view()); + } Flow visitStringMeasure(StringMeasure* curr) { // For now we only support JS-style strings. diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index 260a6f2d383..6bcf9bd5fd2 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -207,7 +207,7 @@ void WasmBinaryWriter::writeStart() { return; } auto start = startSection(BinaryConsts::Section::Start); - o << U32LEB(getFunctionIndex(wasm->start.str)); + o << U32LEB(getFunctionIndex(wasm->start.view())); finishSection(start); } @@ -339,8 +339,8 @@ void WasmBinaryWriter::writeImports() { auto start = startSection(BinaryConsts::Section::Import); o << U32LEB(num); auto writeImportHeader = [&](Importable* import) { - writeInlineString(import->module.str); - writeInlineString(import->base.str); + writeInlineString(import->module.view()); + writeInlineString(import->base.view()); }; ModuleUtils::iterImportedFunctions(*wasm, [&](Function* func) { writeImportHeader(func); @@ -581,7 +581,8 @@ void WasmBinaryWriter::writeStrings() { for (auto& string : sorted) { // Re-encode from WTF-16 to WTF-8. std::stringstream wtf8; - [[maybe_unused]] bool valid = String::convertWTF16ToWTF8(wtf8, string.str); + [[maybe_unused]] bool valid = + String::convertWTF16ToWTF8(wtf8, string.view()); assert(valid); // TODO: Use wtf8.view() once we have C++20. writeInlineString(wtf8.str()); @@ -640,7 +641,7 @@ void WasmBinaryWriter::writeExports() { auto start = startSection(BinaryConsts::Section::Export); o << U32LEB(wasm->exports.size()); for (auto& curr : wasm->exports) { - writeInlineString(curr->name.str); + writeInlineString(curr->name.view()); o << U32LEB(int32_t(curr->kind)); switch (curr->kind) { case ExternalKind::Function: @@ -917,7 +918,7 @@ void WasmBinaryWriter::writeNames() { if (emitModuleName && wasm->name.is()) { auto substart = startSubsection(BinaryConsts::CustomSections::Subsection::NameModule); - writeEscapedName(wasm->name.str); + writeEscapedName(wasm->name.view()); finishSubsection(substart); } @@ -946,7 +947,7 @@ void WasmBinaryWriter::writeNames() { o << U32LEB(functionsWithNames.size()); for (auto& [index, global] : functionsWithNames) { o << U32LEB(index); - writeEscapedName(global->name.str); + writeEscapedName(global->name.view()); } finishSubsection(substart); } @@ -1006,7 +1007,7 @@ void WasmBinaryWriter::writeNames() { o << U32LEB(localsWithNames.size()); for (auto& [indexInBinary, name] : localsWithNames) { o << U32LEB(indexInBinary); - writeEscapedName(name.str); + writeEscapedName(name.view()); } emitted++; } @@ -1029,7 +1030,7 @@ void WasmBinaryWriter::writeNames() { o << U32LEB(namedTypes.size()); for (auto type : namedTypes) { o << U32LEB(indexedTypes.indices[type]); - writeEscapedName(wasm->typeNames[type].name.str); + writeEscapedName(wasm->typeNames[type].name.view()); } finishSubsection(substart); } @@ -1056,7 +1057,7 @@ void WasmBinaryWriter::writeNames() { for (auto& [index, table] : tablesWithNames) { o << U32LEB(index); - writeEscapedName(table->name.str); + writeEscapedName(table->name.view()); } finishSubsection(substart); @@ -1082,7 +1083,7 @@ void WasmBinaryWriter::writeNames() { o << U32LEB(memoriesWithNames.size()); for (auto& [index, memory] : memoriesWithNames) { o << U32LEB(index); - writeEscapedName(memory->name.str); + writeEscapedName(memory->name.view()); } finishSubsection(substart); } @@ -1107,7 +1108,7 @@ void WasmBinaryWriter::writeNames() { o << U32LEB(globalsWithNames.size()); for (auto& [index, global] : globalsWithNames) { o << U32LEB(index); - writeEscapedName(global->name.str); + writeEscapedName(global->name.view()); } finishSubsection(substart); } @@ -1132,7 +1133,7 @@ void WasmBinaryWriter::writeNames() { for (auto& [index, elem] : elemsWithNames) { o << U32LEB(index); - writeEscapedName(elem->name.str); + writeEscapedName(elem->name.view()); } finishSubsection(substart); @@ -1156,7 +1157,7 @@ void WasmBinaryWriter::writeNames() { auto& seg = wasm->dataSegments[i]; if (seg->hasExplicitName) { o << U32LEB(i); - writeEscapedName(seg->name.str); + writeEscapedName(seg->name.view()); } } finishSubsection(substart); @@ -1187,7 +1188,7 @@ void WasmBinaryWriter::writeNames() { o << U32LEB(fieldNames.size()); for (auto& [index, name] : fieldNames) { o << U32LEB(index); - writeEscapedName(name.str); + writeEscapedName(name.view()); } } finishSubsection(substart); @@ -1213,7 +1214,7 @@ void WasmBinaryWriter::writeNames() { o << U32LEB(tagsWithNames.size()); for (auto& [index, tag] : tagsWithNames) { o << U32LEB(index); - writeEscapedName(tag->name.str); + writeEscapedName(tag->name.view()); } finishSubsection(substart); } @@ -1232,7 +1233,8 @@ void WasmBinaryWriter::writeSourceMapUrl() { void WasmBinaryWriter::writeSymbolMap() { std::ofstream file(symbolMap); auto write = [&](Function* func) { - file << getFunctionIndex(func->name) << ":" << func->name.str << std::endl; + file << getFunctionIndex(func->name) << ":" << func->name.view() + << std::endl; }; ModuleUtils::iterImportedFunctions(*wasm, write); ModuleUtils::iterDefinedFunctions(*wasm, write); @@ -1523,7 +1525,7 @@ void WasmBinaryWriter::writeLegacyDylinkSection() { o << U32LEB(wasm->dylinkSection->tableAlignment); o << U32LEB(wasm->dylinkSection->neededDynlibs.size()); for (auto& neededDynlib : wasm->dylinkSection->neededDynlibs) { - writeInlineString(neededDynlib.str); + writeInlineString(neededDynlib.view()); } finishSection(start); } @@ -1554,7 +1556,7 @@ void WasmBinaryWriter::writeDylinkSection() { startSubsection(BinaryConsts::CustomSections::Subsection::DylinkNeeded); o << U32LEB(wasm->dylinkSection->neededDynlibs.size()); for (auto& neededDynlib : wasm->dylinkSection->neededDynlibs) { - writeInlineString(neededDynlib.str); + writeInlineString(neededDynlib.view()); } finishSubsection(substart); } @@ -1744,7 +1746,7 @@ std::optional WasmBinaryWriter::writeExpressionHints( // We found data: emit the section. buffer << uint8_t(BinaryConsts::Custom); auto lebPos = buffer.writeU32LEBPlaceholder(); - buffer.writeInlineString(sectionName.str); + buffer.writeInlineString(sectionName.view()); buffer << U32LEB(funcHintsVec.size()); for (auto& funcHints : funcHintsVec) { @@ -2279,7 +2281,7 @@ void WasmBinaryReader::readCustomSection(size_t payloadLen) { } wasm.customSections.resize(wasm.customSections.size() + 1); auto& section = wasm.customSections.back(); - section.name = sectionName.str; + section.name = sectionName.view(); auto data = getByteView(payloadLen); section.data = {data.begin(), data.end()}; } @@ -4926,7 +4928,7 @@ void WasmBinaryReader::readStrings() { auto string = getInlineString(false); // Re-encode from WTF-8 to WTF-16. std::stringstream wtf16; - if (!String::convertWTF8ToWTF16(wtf16, string.str)) { + if (!String::convertWTF8ToWTF16(wtf16, string.view())) { throwError("invalid string constant"); } // TODO: Use wtf16.view() once we have C++20.