186 changes: 88 additions & 98 deletions std/uni.d
Original file line number Diff line number Diff line change
Expand Up @@ -1096,7 +1096,8 @@ unittest

size_t spaceFor(size_t _bits)(size_t new_len) @safe pure nothrow @nogc
{
enum bits = _bits == 1 ? 1 : ceilPowerOf2(_bits);// see PackedArrayView
import std.math : nextPow2;
enum bits = _bits == 1 ? 1 : nextPow2(_bits - 1);// see PackedArrayView
static if (bits > 8*size_t.sizeof)
{
static assert(bits % (size_t.sizeof*8) == 0);
Expand All @@ -1120,17 +1121,19 @@ template PackedArrayView(T)
if ((is(T dummy == BitPacked!(U, sz), U, size_t sz)
&& isBitPackableType!U) || isBitPackableType!T)
{
import std.math : nextPow2;
private enum bits = bitSizeOf!T;
alias PackedArrayView = PackedArrayViewImpl!(T, bits > 1 ? ceilPowerOf2(bits) : 1);
alias PackedArrayView = PackedArrayViewImpl!(T, bits > 1 ? nextPow2(bits - 1) : 1);
}

//unsafe and fast access to a chunk of RAM as if it contains packed values
template PackedPtr(T)
if ((is(T dummy == BitPacked!(U, sz), U, size_t sz)
&& isBitPackableType!U) || isBitPackableType!T)
{
import std.math : nextPow2;
private enum bits = bitSizeOf!T;
alias PackedPtr = PackedPtrImpl!(T, bits > 1 ? ceilPowerOf2(bits) : 1);
alias PackedPtr = PackedPtrImpl!(T, bits > 1 ? nextPow2(bits - 1) : 1);
}

@trusted struct PackedPtrImpl(T, size_t bits)
Expand Down Expand Up @@ -1601,36 +1604,22 @@ size_t switchUniformLowerBound(alias pred, Range, T)(Range range, T needle)
return idx;
}

//
size_t floorPowerOf2(size_t arg) @safe pure nothrow @nogc
{
import core.bitop : bsr;
assert(arg > 1); // else bsr is undefined
return 1<<bsr(arg-1);
}

size_t ceilPowerOf2(size_t arg) @safe pure nothrow @nogc
{
import core.bitop : bsr;
assert(arg > 1); // else bsr is undefined
return 1<<bsr(arg-1)+1;
}

template sharMethod(alias uniLowerBound)
{
size_t sharMethod(alias _pred="a<b", Range, T)(Range range, T needle)
if (is(T : ElementType!Range))
{
import std.functional;
import std.functional : binaryFun;
import std.math : nextPow2, truncPow2;
alias pred = binaryFun!_pred;
if (range.length == 0)
return 0;
if (isPowerOf2(range.length))
return uniLowerBound!pred(range, needle);
size_t n = floorPowerOf2(range.length);
size_t n = truncPow2(range.length);
if (pred(range[n-1], needle))
{// search in another 2^^k area that fully covers the tail of range
size_t k = ceilPowerOf2(range.length - n + 1);
size_t k = nextPow2(range.length - n + 1);
return range.length - k + uniLowerBound!pred(range[$-k..$], needle);
}
else
Expand Down Expand Up @@ -1706,7 +1695,7 @@ unittest


// Simple storage manipulation policy
@trusted public struct GcPolicy
@trusted private struct GcPolicy
{
static T[] dup(T)(const T[] arr)
{
Expand Down Expand Up @@ -1820,6 +1809,8 @@ alias _RealArray = CowArray!ReallocPolicy;

unittest
{
import std.algorithm.comparison : equal;

with(ReallocPolicy)
{
bool test(T, U, V)(T orig, size_t from, size_t to, U toReplace, V result,
Expand All @@ -1828,7 +1819,7 @@ unittest
{
replaceImpl(orig, from, to, toReplace);
scope(exit) destroy(orig);
if (!equalS(orig, result))
if (!equal(orig, result))
return false;
}
return true;
Expand Down Expand Up @@ -1913,23 +1904,6 @@ pure:
@property ref inout(uint) b() inout { return _tuple[1]; }
}

//@@@BUG another forward reference workaround
@trusted bool equalS(R1, R2)(R1 lhs, R2 rhs)
{
for (;;){
if (lhs.empty)
return rhs.empty;
if (rhs.empty)
return false;
if (lhs.front != rhs.front)
return false;
lhs.popFront();
rhs.popFront();
}
}



/**
$(P
$(D InversionList) is a set of $(CODEPOINTS)
Expand Down Expand Up @@ -3322,13 +3296,13 @@ private:
u24.length = 0;
assert(u24.empty);
u24.append([1, 2]);
assert(equalS(u24[], [1, 2]));
assert(equal(u24[], [1, 2]));
u24.append(111);
assert(equalS(u24[], [1, 2, 111]));
assert(equal(u24[], [1, 2, 111]));
assert(!u24_c.empty && u24_c[1] == 1024);
u24.length = 3;
copy(iota(0, 3), u24[]);
assert(equalS(u24[], iota(0, 3)));
assert(equal(u24[], iota(0, 3)));
assert(u24_c[1] == 1024);
}

Expand All @@ -3338,15 +3312,15 @@ private:
T u24_3;
u24_3 = u24_2;
assert(u24_2 == u24_3);
assert(equalS(u24[], u24_2[]));
assert(equalS(u24_2[], u24_3[]));
assert(equal(u24[], u24_2[]));
assert(equal(u24_2[], u24_3[]));
funcRef(u24_3);

assert(equalS(u24_3[], iota(0, 3)));
assert(!equalS(u24_2[], u24_3[]));
assert(equalS(u24_2[], u24[]));
assert(equal(u24_3[], iota(0, 3)));
assert(!equal(u24_2[], u24_3[]));
assert(equal(u24_2[], u24[]));
u24_2 = u24_3;
assert(equalS(u24_2[], iota(0, 3)));
assert(equal(u24_2[], iota(0, 3)));
// to test that passed arg is intact outside
// plus try out opEquals
u24 = u24_3;
Expand Down Expand Up @@ -3383,12 +3357,12 @@ private:
// set this to about 100M to stress-test COW memory management
foreach (v; 0..10_000)
func2(arr);
assert(equalS(arr[], [72, 0xFE_FEFE, 100]));
assert(equal(arr[], [72, 0xFE_FEFE, 100]));

auto r2 = U24A(iota(0, 100));
assert(equalS(r2[], iota(0, 100)), text(r2[]));
assert(equal(r2[], iota(0, 100)), text(r2[]));
copy(iota(10, 170, 2), r2[10..90]);
assert(equalS(r2[], chain(iota(0, 10), iota(10, 170, 2), iota(90, 100)))
assert(equal(r2[], chain(iota(0, 10), iota(10, 170, 2), iota(90, 100)))
, text(r2[]));
}
}
Expand Down Expand Up @@ -3634,36 +3608,38 @@ version(unittest)
{
import std.conv;
import std.typecons;
import std.algorithm.comparison : equal;

foreach (CodeList; AliasSeq!(InversionList!(ReallocPolicy)))
{
auto arr = "ABCDEFGHIJKLMabcdefghijklm"d;
auto a = CodeList('A','N','a', 'n');
assert(equalS(a.byInterval,
assert(equal(a.byInterval,
[tuple(cast(uint)'A', cast(uint)'N'), tuple(cast(uint)'a', cast(uint)'n')]
), text(a.byInterval));

// same @@@BUG as in issue 8949 ?
version(bug8949)
{
assert(equalS(retro(a.byInterval),
assert(equal(retro(a.byInterval),
[tuple(cast(uint)'a', cast(uint)'n'), tuple(cast(uint)'A', cast(uint)'N')]
), text(retro(a.byInterval)));
}
auto achr = a.byCodepoint;
assert(equalS(achr, arr), text(a.byCodepoint));
assert(equal(achr, arr), text(a.byCodepoint));
foreach (ch; a.byCodepoint)
assert(a[ch]);
auto x = CodeList(100, 500, 600, 900, 1200, 1500);
assert(equalS(x.byInterval, [ tuple(100, 500), tuple(600, 900), tuple(1200, 1500)]), text(x.byInterval));
assert(equal(x.byInterval, [ tuple(100, 500), tuple(600, 900), tuple(1200, 1500)]), text(x.byInterval));
foreach (ch; x.byCodepoint)
assert(x[ch]);
static if (is(CodeList == CodepointSet))
{
auto y = CodeList(x.byInterval);
assert(equalS(x.byInterval, y.byInterval));
assert(equal(x.byInterval, y.byInterval));
}
assert(equalS(CodepointSet.init.byInterval, cast(Tuple!(uint, uint)[])[]));
assert(equalS(CodepointSet.init.byCodepoint, cast(dchar[])[]));
assert(equal(CodepointSet.init.byInterval, cast(Tuple!(uint, uint)[])[]));
assert(equal(CodepointSet.init.byCodepoint, cast(dchar[])[]));
}
}

Expand Down Expand Up @@ -3714,7 +3690,7 @@ template mapTrieIndex(Prefix...)
See $(LREF buildTrie) for generic helpers built on top of it.
*/
@trusted struct TrieBuilder(Value, Key, Args...)
@trusted private struct TrieBuilder(Value, Key, Args...)
if (isBitPackableType!Value && isValidArgsForTrie!(Key, Args))
{
import std.exception : enforce;
Expand Down Expand Up @@ -4002,7 +3978,7 @@ public:
}
}

/*
/**
$(P A generic Trie data-structure for a fixed number of stages.
The design goal is optimal speed with smallest footprint size.
)
Expand All @@ -4012,21 +3988,21 @@ public:
)
*/
@trusted public struct Trie(Value, Key, Args...)
@trusted private struct Trie(Value, Key, Args...)
if (isValidPrefixForTrie!(Key, Args)
|| (isValidPrefixForTrie!(Key, Args[1..$])
&& is(typeof(Args[0]) : size_t)))
{
static if (is(typeof(Args[0]) : size_t))
{
enum maxIndex = Args[0];
enum hasBoundsCheck = true;
alias Prefix = Args[1..$];
private enum maxIndex = Args[0];
private enum hasBoundsCheck = true;
private alias Prefix = Args[1..$];
}
else
{
enum hasBoundsCheck = false;
alias Prefix = Args;
private enum hasBoundsCheck = false;
private alias Prefix = Args;
}

private this()(typeof(_table) table)
Expand All @@ -4041,7 +4017,7 @@ public:
_table = typeof(_table)(offsets, sizes, data);
}

/*
/**
$(P Lookup the $(D key) in this $(D Trie). )
$(P The lookup always succeeds if key fits the domain
Expand All @@ -4056,7 +4032,6 @@ public:
Domain range-checking is only enabled in debug builds
and results in assertion failure.
*/
// templated to auto-detect pure, @safe and nothrow
TypeOfBitPacked!Value opIndex()(Key key) const
{
static if (hasBoundsCheck)
Expand All @@ -4069,17 +4044,20 @@ public:
return _table.ptr!(p.length-1)[idx];
}

///
@property size_t bytes(size_t n=size_t.max)() const
{
return _table.bytes!n;
}

///
@property size_t pages(size_t n)() const
{
return (bytes!n+2^^(Prefix[n].bitSize-1))
/2^^Prefix[n].bitSize;
}

///
void store(OutRange)(scope OutRange sink) const
if (isOutputRange!(OutRange, char))
{
Expand Down Expand Up @@ -4300,8 +4278,7 @@ public template CodepointTrie(T, sizes...)
alias CodepointTrie = typeof(TrieBuilder!(T, dchar, lastDchar+1, Prefix)(T.init).build());
}

// @@@BUG multiSort can's access private symbols from uni
public template cmpK0(alias Pred)
package template cmpK0(alias Pred)
{
import std.typecons;
static bool cmpK0(Value, Key)
Expand All @@ -4311,7 +4288,7 @@ public template cmpK0(alias Pred)
}
}

/*
/**
The most general utility for construction of $(D Trie)s
short of using $(D TrieBuilder) directly.
Expand All @@ -4323,7 +4300,7 @@ public template cmpK0(alias Pred)
then the whole tuple of $(D Args) is treated as predicates
and the maximum Key is deduced from predicates.
*/
public template buildTrie(Value, Key, Args...)
private template buildTrie(Value, Key, Args...)
if (isValidArgsForTrie!(Key, Args))
{
static if (is(typeof(Args[0]) : Key)) // prefix starts with upper bound on Key
Expand Down Expand Up @@ -4522,7 +4499,7 @@ public struct MatcherConcept
assert(truth == "= 4"); // test never affects argument
}

/*
/**
Advanced feature - provide direct access to a subset of matcher based a
set of known encoding lengths. Lengths are provided in
$(S_LINK Code unit, code units). The sub-matcher then may do less
Expand Down Expand Up @@ -5757,6 +5734,8 @@ package ubyte[] compressIntervals(Range)(Range intervals)
@safe pure unittest
{
import std.typecons;
import std.algorithm.comparison : equal;

auto run = [tuple(80, 127), tuple(128, (1<<10)+128)];
ubyte[] enc = [cast(ubyte)80, 47, 1, (0b1_00<<5) | (1<<2), 0];
assert(compressIntervals(run) == enc);
Expand All @@ -5771,8 +5750,8 @@ package ubyte[] compressIntervals(Range)(Range intervals)
idx = 0;
assert(decompressFrom(enc2, idx) == 0);
assert(decompressFrom(enc2, idx) == (1<<20)+512+1);
assert(equalS(decompressIntervals(compressIntervals(run)), run));
assert(equalS(decompressIntervals(compressIntervals(run2)), run2));
assert(equal(decompressIntervals(compressIntervals(run)), run));
assert(equal(decompressIntervals(compressIntervals(run2)), run2));
}

// Creates a range of $(D CodepointInterval) that lazily decodes compressed data.
Expand Down Expand Up @@ -6379,18 +6358,20 @@ Grapheme decodeGrapheme(Input)(ref Input inp)

unittest
{
import std.algorithm.comparison : equal;

Grapheme gr;
string s = " \u0020\u0308 ";
gr = decodeGrapheme(s);
assert(gr.length == 1 && gr[0] == ' ');
gr = decodeGrapheme(s);
assert(gr.length == 2 && equalS(gr[0..2], " \u0308"));
assert(gr.length == 2 && equal(gr[0..2], " \u0308"));
s = "\u0300\u0308\u1100";
assert(equalS(decodeGrapheme(s)[], "\u0300\u0308"));
assert(equalS(decodeGrapheme(s)[], "\u1100"));
assert(equal(decodeGrapheme(s)[], "\u0300\u0308"));
assert(equal(decodeGrapheme(s)[], "\u1100"));
s = "\u11A8\u0308\uAC01";
assert(equalS(decodeGrapheme(s)[], "\u11A8\u0308"));
assert(equalS(decodeGrapheme(s)[], "\uAC01"));
assert(equal(decodeGrapheme(s)[], "\u11A8\u0308"));
assert(equal(decodeGrapheme(s)[], "\uAC01"));
}

/++
Expand Down Expand Up @@ -6609,12 +6590,14 @@ unittest
import std.exception : enforce;

public:
/// Ctor
this(C)(in C[] chars...)
if (is(C : dchar))
{
this ~= chars;
}

///ditto
this(Input)(Input seq)
if (!isDynamicArray!Input
&& isInputRange!Input && is(ElementType!Input : dchar))
Expand Down Expand Up @@ -6836,6 +6819,8 @@ static assert(Grapheme.sizeof == size_t.sizeof*4);
unittest
{
import std.algorithm : filter;
import std.algorithm.comparison : equal;

string bold = "ku\u0308hn";

// note that decodeGrapheme takes parameter by ref
Expand All @@ -6847,23 +6832,23 @@ unittest
// the next grapheme is 2 characters long
auto wideOne = decodeGrapheme(bold);
// slicing a grapheme yields a random-access range of dchar
assert(wideOne[].equalS("u\u0308"));
assert(wideOne[].equal("u\u0308"));
assert(wideOne.length == 2);
static assert(isRandomAccessRange!(typeof(wideOne[])));

// all of the usual range manipulation is possible
assert(wideOne[].filter!isMark().equalS("\u0308"));
assert(wideOne[].filter!isMark().equal("\u0308"));

auto g = Grapheme("A");
assert(g.valid);
g ~= '\u0301';
assert(g[].equalS("A\u0301"));
assert(g[].equal("A\u0301"));
assert(g.valid);
g ~= "B";
// not a valid grapheme cluster anymore
assert(!g.valid);
// still could be useful though
assert(g[].equalS("A\u0301B"));
assert(g[].equal("A\u0301B"));
}

unittest
Expand Down Expand Up @@ -6911,17 +6896,17 @@ unittest
copy[1] = '-';
assert(g[0] == 'a' && copy[0] == 'X');
assert(g[1] == 'b' && copy[1] == '-');
assert(equalS(g[2..g.length], copy[2..copy.length]));
assert(equal(g[2..g.length], copy[2..copy.length]));
copy = Grapheme("АБВГДЕЁЖЗИКЛМ");
assert(equalS(copy[0..8], "АБВГДЕЁЖ"), text(copy[0..8]));
assert(equal(copy[0..8], "АБВГДЕЁЖ"), text(copy[0..8]));
copy ~= "xyz";
assert(equalS(copy[13..15], "xy"), text(copy[13..15]));
assert(equal(copy[13..15], "xy"), text(copy[13..15]));
assert(!copy.valid);

Grapheme h;
foreach (dchar v; iota(cast(int)'A', cast(int)'Z'+1).map!"cast(dchar)a"())
h ~= v;
assert(equalS(h[], iota(cast(int)'A', cast(int)'Z'+1)));
assert(equal(h[], iota(cast(int)'A', cast(int)'Z'+1)));
}

/++
Expand Down Expand Up @@ -7255,14 +7240,14 @@ package auto simpleCaseFoldings(dchar ch)
unittest
{
import std.exception : assertCTFEable;
import std.algorithm : canFind;
import std.algorithm : canFind, equal;
import std.array;
assertCTFEable!((){
auto r = simpleCaseFoldings('Э').array;
assert(r.length == 2);
assert(r.canFind('э') && r.canFind('Э'));
auto sr = simpleCaseFoldings('~');
assert(sr.equalS("~"));
assert(sr.equal("~"));
//A with ring above - casefolds to the same bucket as Angstrom sign
sr = simpleCaseFoldings('Å');
assert(sr.length == 3);
Expand Down Expand Up @@ -7402,17 +7387,19 @@ public Grapheme decompose(UnicodeDecomposition decompType=Canonical)(dchar ch)
///
unittest
{
import std.algorithm.comparison : equal;

assert(compose('A','\u0308') == '\u00C4');
assert(compose('A', 'B') == dchar.init);
assert(compose('C', '\u0301') == '\u0106');
// note that the starter is the first one
// thus the following doesn't compose
assert(compose('\u0308', 'A') == dchar.init);

assert(decompose('Ĉ')[].equalS("C\u0302"));
assert(decompose('D')[].equalS("D"));
assert(decompose('\uD4DC')[].equalS("\u1111\u1171\u11B7"));
assert(decompose!Compatibility('¹')[].equalS("1"));
assert(decompose('Ĉ')[].equal("C\u0302"));
assert(decompose('D')[].equal("D"));
assert(decompose('\uD4DC')[].equal("\u1111\u1171\u11B7"));
assert(decompose!Compatibility('¹')[].equal("1"));
}

//----------------------------------------------------------------------------
Expand Down Expand Up @@ -7549,19 +7536,20 @@ unittest
unittest
{
import std.conv;
import std.algorithm.comparison : equal;

static void testDecomp(UnicodeDecomposition T)(dchar ch, string r)
{
Grapheme g = decompose!T(ch);
assert(equalS(g[], r), text(g[], " vs ", r));
assert(equal(g[], r), text(g[], " vs ", r));
}
testDecomp!Canonical('\u1FF4', "\u03C9\u0301\u0345");
testDecomp!Canonical('\uF907', "\u9F9C");
testDecomp!Compatibility('\u33FF', "\u0067\u0061\u006C");
testDecomp!Compatibility('\uA7F9', "\u0153");

// check examples
assert(decomposeHangul('\uD4DB')[].equalS("\u1111\u1171\u11B6"));
assert(decomposeHangul('\uD4DB')[].equal("\u1111\u1171\u11B6"));
assert(composeJamo('\u1111', '\u1171', '\u11B6') == '\uD4DB');
assert(composeJamo('\u1111', '\u1171') == '\uD4CC'); // leave out T-vowel
assert(composeJamo('\u1111', '\u1171', ' ') == '\uD4CC');
Expand Down Expand Up @@ -7916,7 +7904,7 @@ else
{

// trusted -> avoid bounds check
@trusted pure nothrow @nogc
@trusted pure nothrow @nogc private
{
// hide template instances behind functions (Bugzilla 13232)
ushort toLowerIndex(dchar c) { return toLowerIndexTrie[c]; }
Expand Down Expand Up @@ -8240,12 +8228,14 @@ auto asUpperCase(Range)(Range str)
assert("hEllo".asUpperCase.equal("HELLO"));
}

// explicitly undocumented
auto asLowerCase(Range)(auto ref Range str)
if (isConvertibleToString!Range)
{
return asLowerCase!(StringTypeOf!Range)(str);
}

// explicitly undocumented
auto asUpperCase(Range)(auto ref Range str)
if (isConvertibleToString!Range)
{
Expand Down