@@ -1,8 +1,8 @@
/*
Kickstart is a coarse-grained "filter" engine that finds likely matches
to be verified by full-blown matcher.
ShiftOr is a kickstart engine, a coarse-grained "filter" engine that finds
potential matches to be verified by a full-blown matcher.
*/
module std.regex.internal.kickstart ;
module std.regex.internal.shiftor ;
package (std.regex ):
Expand All
@@ -26,9 +26,10 @@ uint effectiveSize(Char)()
Kickstart engine using ShiftOr algorithm,
a bit parallel technique for inexact string searching.
*/
struct ShiftOr (Char)
class ShiftOr (Char) : Kickstart !Char
{
private :
pure :
uint [] table;
uint fChar;
uint n_length;
Expand Down
Expand Up
@@ -115,8 +116,8 @@ private:
{
auto t = worklist[$- 1 ];
worklist.length -= 1 ;
if (! __ctfe)
cast (void )worklist.assumeSafeAppend ();
// if (!__ctfe)
// cast(void)worklist.assumeSafeAppend();
return t;
}
Expand All
@@ -127,13 +128,13 @@ private:
}
public :
@trusted this (ref Regex! Char re, uint [] memory )
@trusted this (ref Regex! Char re)
{
static import std.algorithm.comparison ;
import std.algorithm.searching : countUntil;
import std.conv : text;
import std.range : assumeSorted;
assert ( memory.length == 256 ) ;
uint [] memory = new uint [ 256 ] ;
fChar = uint .max;
// FNV-1a flavored hash (uses 32bits at a time)
ulong hash (uint [] tab)
Expand Down
Expand Up
@@ -241,9 +242,9 @@ public:
static immutable codeBounds = [0x0 , 0x7F , 0x80 , 0x7FF , 0x800 , 0xFFFF , 0x10000 , 0x10FFFF ];
else // == 2
static immutable codeBounds = [0x0 , 0xFFFF , 0x10000 , 0x10FFFF ];
uint [] arr = new uint [set.byInterval. length * 2 ];
uint [] arr = new uint [set.length * 2 ];
size_t ofs = 0 ;
foreach (ival; set.byInterval )
foreach (ival; set)
{
arr[ofs++ ] = ival.a;
arr[ofs++ ] = ival.b;
Expand All
@@ -262,7 +263,8 @@ public:
auto chars = set.length;
if (chars > charsetThreshold)
goto L_StopThread;
foreach (ch; set.byCodepoint)
foreach (ival; set)
foreach (ch; ival.a.. ival.b)
{
// avoid surrogate pairs
if (0xD800 <= ch && ch <= 0xDFFF )
Expand Down
Expand Up
@@ -339,25 +341,6 @@ public:
t.pc += IRL ! (IR .RepeatEnd);
}
break ;
case IR .InfiniteStart, IR .InfiniteQStart:
t.pc += re.ir[t.pc].data + IRL ! (IR .InfiniteStart);
goto case IR .InfiniteEnd; // both Q and non-Q
case IR .InfiniteEnd:
case IR .InfiniteQEnd:
auto slot = re.ir[t.pc+ 1 ].raw+ t.counter;
auto val = hash(t.tab);
if (val in merge[slot])
goto L_StopThread; // merge equivalent
merge[slot][val] = true ;
uint len = re.ir[t.pc].data;
uint pc1, pc2; // branches to take in priority order
if (++ t.hops == 32 )
goto L_StopThread;
pc1 = t.pc + IRL ! (IR .InfiniteEnd);
pc2 = t.pc - len;
trs ~= fork(t, pc2, t.counter);
t.pc = pc1;
break ;
case IR .GroupStart, IR .GroupEnd:
t.pc += IRL ! (IR .GroupStart);
break ;
Expand All
@@ -370,7 +353,6 @@ public:
default :
L_StopThread:
assert (re.ir[t.pc].code >= 0x80 , text(re.ir[t.pc].code));
debug (fred_search) writeln(" ShiftOr stumbled on " ,re.ir[t.pc].mnemonic);
n_length = std.algorithm.comparison.min (t.idx, n_length);
break L_Eval_Thread;
}
Expand All
@@ -385,22 +367,23 @@ public:
}
}
@property bool empty() const { return n_length == 0 ; }
final @property bool empty() const { return n_length < 3 && fChar == uint .max ; }
@property uint length() const { return n_length/ charSize; }
final @property uint length() const { return n_length/ charSize; }
// lookup compatible bit pattern in haystack, return starting index
// has a useful trait: if supplied with valid UTF indexes,
// returns only valid UTF indexes
// (that given the haystack in question is valid UTF string)
@trusted size_t search(const ( Char)[] haystack, size_t idx)
final @trusted bool search(ref Input ! Char s) const
{// @BUG: apparently assumes little endian machines
import std.conv : text;
import core.stdc.string : memchr;
assert (! empty);
auto p = cast ( const ( ubyte ) * )(haystack.ptr + idx) ;
auto haystack = s._origin ;
uint state = uint .max;
uint limit = 1u << (n_length - 1u );
auto p = cast (const (ubyte )* )(haystack.ptr+ s._index);
debug (std_regex_search) writefln(" Limit: %32b" ,limit);
if (fChar != uint .max)
{
Expand All
@@ -415,11 +398,17 @@ public:
assert (p <= end, text(p," vs " , end));
p = cast (ubyte * )memchr(p, fChar, end - p);
if (! p)
return haystack.length;
{
s._index = haystack.length;
return false ;
}
if ((cast (size_t )p & (Char.sizeof- 1 )) == orginalAlign)
break ;
if (++ p == end)
return haystack.length;
{
s._index = haystack.length;
return false ;
}
}
state = ~ 1u ;
assert ((cast (size_t )p & (Char.sizeof- 1 )) == orginalAlign);
Expand All
@@ -433,8 +422,10 @@ public:
p++ ;
// first char is tested, see if that's all
if (! (state & limit))
return (p- cast (ubyte * )haystack.ptr)/ Char.sizeof
- length;
{
s._index = (p- cast (ubyte * )haystack.ptr)/ Char.sizeof- length;
return true ;
}
}
else
{// have some bits/states for possible matches,
Expand All
@@ -452,8 +443,10 @@ public:
p++ ;
}
if (! (state & limit))
return (p- cast (ubyte * )haystack.ptr)/ Char.sizeof
- length;
{
s._index = (p- cast (ubyte * )haystack.ptr)/ Char.sizeof- length;
return true ;
}
}
debug (std_regex_search) writefln(" State: %32b" , state);
}
Expand All
@@ -471,8 +464,10 @@ public:
state = (state<< 1 ) | table[p[2 ]];
p += 4 ;
if (! (state & limit))// division rounds down for dchar
return (p- cast (ubyte * )haystack.ptr)/ Char.sizeof
- length;
{
s._index = (p- cast (ubyte * )haystack.ptr)/ Char.sizeof- length;
return true ;
}
}
}
else
Expand All
@@ -483,97 +478,105 @@ public:
{
state = (state<< 1 ) | table[p[i++ ]];
if (! (state & limit))
return idx+ i/ Char.sizeof- length;
{
s._index += i/ Char.sizeof- length;
return true ;
}
}
while (i < len)
{
state = (state<< 1 ) | table[p[i++ ]];
if (! (state & limit))
return idx+ i/ Char.sizeof
- length;
{
s._index += i/ Char.sizeof- length;
return true ;
}
state = (state<< 1 ) | table[p[i++ ]];
if (! (state & limit))
return idx+ i/ Char.sizeof
- length;
{
s._index += i/ Char.sizeof- length;
return true ;
}
debug (std_regex_search) writefln(" State: %32b" , state);
}
}
}
return haystack.length;
s._index = haystack.length;
return false ;
}
final @trusted bool match(ref Input! Char s) const
{
// TODO: stub
return false ;
}
@system debug static void dump(uint [] table)
{// @@@BUG@@@ writef(ln) is @system
import std.stdio : writefln;
for (size_t i = 0 ; i < table.length; i += 4 )
{
writefln(" %32b %32b %32b %32b" ,table[i], table[i+ 1 ], table[i+ 2 ], table[i+ 3 ]);
debug writefln(" %32b %32b %32b %32b" ,table[i], table[i+ 1 ], table[i+ 2 ], table[i+ 3 ]);
}
}
}
unittest
{
import std.conv , std.regex ;
@trusted void test_fixed( alias Kick)( )
auto shiftOrLength (C)( const (C)[] pat, uint length )
{
foreach (i, v; AliasSeq! (char , wchar , dchar ))
auto r = regex(pat, " s" );
auto kick = new ShiftOr! C(r);
assert (kick.length == length, text(C.stringof, " == " , kick.length));
return kick;
}
auto searches (C)(const (C)[] source, ShiftOr! C kick, uint [] results... )
{
auto inp = Input! C(source);
foreach (r; results)
{
alias Char = v;
alias String = immutable (v)[];
auto r = regex(to! String(` abc$` ));
auto kick = Kick! Char(r, new uint [256 ]);
assert (kick.length == 3 , text(Kick.stringof," " ,v.stringof, " == " , kick.length));
auto r2 = regex(to! String(` (abc){2}a+` ));
kick = Kick! Char(r2, new uint [256 ]);
assert (kick.length == 7 , text(Kick.stringof,v.stringof," == " , kick.length));
auto r3 = regex(to! String(` \b(a{2}b{3}){2,4}` ));
kick = Kick! Char(r3, new uint [256 ]);
assert (kick.length == 10 , text(Kick.stringof,v.stringof," == " , kick.length));
auto r4 = regex(to! String(` \ba{2}c\bxyz` ));
kick = Kick! Char(r4, new uint [256 ]);
assert (kick.length == 6 , text(Kick.stringof,v.stringof, " == " , kick.length));
auto r5 = regex(to! String(` \ba{2}c\b` ));
kick = Kick! Char(r5, new uint [256 ]);
size_t x = kick.search(" aabaacaa" , 0 );
assert (x == 3 , text(Kick.stringof,v.stringof," == " , kick.length));
x = kick.search(" aabaacaa" , x+ 1 );
assert (x == 8 , text(Kick.stringof,v.stringof," == " , kick.length));
kick.search(inp);
dchar ch;
size_t idx;
assert (inp._index == r, text(inp._index, " vs " , r));
inp.nextChar(ch, idx);
}
}
@trusted void test_flex(alias Kick)()
foreach (i, Char; AliasSeq! (char , wchar , dchar ))
{
foreach (i, v; AliasSeq! (char , wchar , dchar ))
{
alias Char = v;
alias String = immutable (v)[];
auto r = regex(to! String(` abc[a-z]` ));
auto kick = Kick! Char(r, new uint [256 ]);
auto x = kick.search(to! String(" abbabca" ), 0 );
assert (x == 3 , text(" real x is " , x, " " ,v.stringof));
alias String = immutable (Char)[];
shiftOrLength(` abc` .to! String, 3 );
shiftOrLength(` abc$` .to! String, 3 );
shiftOrLength(` (abc){2}a+` .to! String, 7 );
shiftOrLength(` \b(a{2}b{3}){2,4}` .to! String, 10 );
shiftOrLength(` \ba{2}c\bxyz` .to! String, 6 );
auto kick = shiftOrLength(` \ba{2}c\b` .to! String, 3 );
auto inp = Input! Char(" aabaacaa" );
assert (kick.search(inp));
assert (inp._index == 3 , text(Char.stringof," == " , kick.length));
dchar ch;
size_t idx;
inp.nextChar(ch, idx);
assert (! kick.search(inp));
assert (inp._index == 8 , text(Char.stringof," == " , kick.length));
}
auto r2 = regex(to! String(` (ax|bd|cdy)` ));
String s2 = to! String(" abdcdyabax" );
kick = Kick! Char(r2, new uint [256 ]);
x = kick.search(s2, 0 );
assert (x == 1 , text(" real x is " , x));
x = kick.search(s2, x+ 1 );
assert (x == 3 , text(" real x is " , x));
x = kick.search(s2, x+ 1 );
assert (x == 8 , text(" real x is " , x));
auto rdot = regex(to! String(` ...` ));
kick = Kick! Char(rdot, new uint [256 ]);
assert (kick.length == 0 );
auto rN = regex(to! String(` a(b+|c+)x` ));
kick = Kick! Char(rN, new uint [256 ]);
assert (kick.length == 3 , to! string (kick.length));
assert (kick.search(" ababx" ,0 ) == 2 );
assert (kick.search(" abaacba" ,0 ) == 3 );// expected inexact
foreach (i, Char; AliasSeq! (char , wchar , dchar ))
{
alias String = immutable (Char)[];
auto kick = shiftOrLength(` abc[a-z]` .to! String, 4 );
searches(" abbabca" .to! String, kick, 3 );
kick = shiftOrLength(` (axx|bdx|cdy)` .to! String, 3 );
searches(" abdcdxabax" .to! String, kick, 3 );
}
shiftOrLength(` ...` .to! String, 0 );
kick = shiftOrLength(` a(b{1,2}|c{1,2})x` .to! String, 3 );
searches(" ababx" .to! String, kick, 2 );
searches(" abaacba" .to! String, kick, 3 ); // expected inexact
}
test_fixed! (ShiftOr)();
test_flex! (ShiftOr)();
}
alias Kickstart = ShiftOr;