From add64beb5e79b61c10a0f86e4c5a27ec6ae48fc0 Mon Sep 17 00:00:00 2001 From: "Richard (Rikki) Andrew Cattermole" Date: Thu, 2 Apr 2026 11:44:12 +1300 Subject: [PATCH] Improve std.uni:toCase performance --- std/uni/package.d | 77 ++++++++++++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 35 deletions(-) diff --git a/std/uni/package.d b/std/uni/package.d index d689f08e538..d09b39ccf9c 100644 --- a/std/uni/package.d +++ b/std/uni/package.d @@ -9273,54 +9273,61 @@ if (isSomeString!S || (isRandomAccessRange!S && hasLength!S && hasSlicing!S && i { import std.array : appender, array; import std.ascii : isASCII; - import std.utf : byDchar, codeLength; + import std.utf : byDchar, codeLength, encode, decode; + import std.traits : Unqual; alias C = ElementEncodingType!S; + alias C2 = Unqual!C; - auto r = s.byDchar; - for (size_t i; !r.empty; i += r.front.codeLength!C , r.popFront()) + auto result = appender!(C[])(); + result.reserve(s.length); + + void put(dchar c) @trusted { + static if (is(C2 == dchar)) + { + result ~= c; + } + else + { + C2[4 / C.sizeof] buf; + const len = encode(buf, c); + result ~= cast(C[]) buf[0 .. len]; + } + } + + for (size_t i; i < s.length;) { - auto cOuter = r.front; - ushort idx = indexFn(cOuter); - if (idx == ushort.max) - continue; - auto result = appender!(C[])(); - result.reserve(s.length); - result.put(s[0 .. i]); - foreach (dchar c; s[i .. $].byDchar) + if (s[i].isASCII) + { + result ~= cast(C) asciiConvert(s[i]); + i++; + } + else { - if (c.isASCII) + dchar c = decode(s, i); + auto idx = indexFn(c); + if (idx == ushort.max) + put(c); // not present + else if (idx < maxIdx) { - result.put(asciiConvert(c)); + c = tableFn(idx); + put(c); } else { - idx = indexFn(c); - if (idx == ushort.max) - result.put(c); - else if (idx < maxIdx) - { - c = tableFn(idx); - result.put(c); - } - else - { - auto val = tableFn(idx); - // unpack length + codepoint - immutable uint len = val >> 24; - result.put(cast(dchar)(val & 0xFF_FFFF)); - foreach (j; idx+1 .. idx+len) - result.put(tableFn(j)); - } + auto val = tableFn(idx); + // unpack length + codepoint + immutable uint len = val >> 24; + put(cast(dchar) (val & 0xFF_FFFF)); + foreach (j; idx+1 .. idx+len) + put(tableFn(j)); } } - return result.data; } - static if (isSomeString!S) - return s; - else - return s.array; + // Don't do anything clever if we might not convert. + // It is literally slower doing the lookup than duplicating memory. + return result.data; } // https://issues.dlang.org/show_bug.cgi?id=12428