Permalink
Browse files

Merge pull request #1040 from blackwhale/regex-bug9211

Regex bugfix (issue 9211). I assume FreeBSD's failure is unrelated.
  • Loading branch information...
2 parents 901500b + 15879f4 commit efb7a7b5100ea133ef68f5aa65f9e240f20672d4 @andralex andralex committed Dec 29, 2012
Showing with 121 additions and 108 deletions.
  1. +121 −108 std/regex.d
View
@@ -576,12 +576,14 @@ static assert(Bytecode.sizeof == 4);
{
put(sink,`"`);
int i=0;
- do{
+ do
+ {
put(sink,cast(char[])([cast(dchar)irb[i].data]));
++i;
} while(i<irb.length && irb[i].code==IR.Char);
put(sink,"\"");
- if (pc<i){
+ if(pc<i)
+ {
put(sink,"\n");
for (int ii=indent+pc+1;ii>0;++ii)
put(sink,"=");
@@ -2163,7 +2165,7 @@ bool startOfLine(dchar back, bool seenNl)
}
//Test if bytecode starting at pc in program 're' can match given codepoint
-//Returns: length of matched atom if test is positive, 0 - can't tell, -1 if doesn't match
+//Returns: 0 - can't tell, -1 if doesn't match
int quickTestFwd(RegEx)(uint pc, dchar front, const ref RegEx re)
{
static assert(IRL!(IR.OrChar) == 1);//used in code processing IR.OrChar
@@ -2199,7 +2201,7 @@ int quickTestFwd(RegEx)(uint pc, dchar front, const ref RegEx re)
break;
case IR.Trie:
if(re.tries[re.ir[pc].data][front])
- return IRL!(IR.Trie);
+ return 0;
else
return -1;
default:
@@ -3042,7 +3044,8 @@ struct StreamTester(Char)
refStream=Input!(Char)(input,splits);
stream=new StreamCBuf!(Char)();
pos=0;
- if (splits.length) {
+ if (splits.length)
+ {
stream.addChunk(allStr);
stream.hasEnd=true;
}
@@ -5042,7 +5045,8 @@ enum OneShot { Fwd, Bwd };
if(!searchFn())
break;
}
- else if(!next()){
+ else if(!next())
+ {
if (!atEnd) return false;
exhausted = true;
break;
@@ -5097,7 +5101,7 @@ enum OneShot { Fwd, Bwd };
void eval(bool withInput)(Thread!DataIndex* t, Group!DataIndex[] matches)
{
ThreadList!DataIndex worklist;
- debug(fred_matching) writeln("Evaluating thread");
+ debug(fred_matching) writeln("---- Evaluating thread");
for(;;)
{
debug(fred_matching)
@@ -5312,12 +5316,7 @@ enum OneShot { Fwd, Bwd };
static if(withInput)
{
int test = quickTestFwd(pc1, front, re);
- if(test > 0)
- {
- nlist.insertBack(fork(t, pc1 + test, t.counter));
- t.pc = pc2;
- }
- else if(test == 0)
+ if(test >= 0)
{
worklist.insertFront(fork(t, pc2, t.counter));
t.pc = pc1;
@@ -5479,90 +5478,89 @@ enum OneShot { Fwd, Bwd };
case IR.Nop:
t.pc += IRL!(IR.Nop);
break;
- static if(withInput)
- {
- case IR.OrChar:
- uint len = re.ir[t.pc].sequence;
- uint end = t.pc + len;
- static assert(IRL!(IR.OrChar) == 1);
- for(; t.pc<end; t.pc++)
- if(re.ir[t.pc].data == front)
- break;
- if(t.pc != end)
- {
- t.pc = end;
- nlist.insertBack(t);
- }
- else
- recycle(t);
- t = worklist.fetch();
- if(!t)
- return;
- break;
- case IR.Char:
- if(front == re.ir[t.pc].data)
- {
- t.pc += IRL!(IR.Char);
- nlist.insertBack(t);
- }
- else
- recycle(t);
- t = worklist.fetch();
- if(!t)
- return;
- break;
- case IR.Any:
- t.pc += IRL!(IR.Any);
- if(!(re.flags & RegexOption.singleline)
- && (front == '\r' || front == '\n'))
- recycle(t);
- else
- nlist.insertBack(t);
- t = worklist.fetch();
- if(!t)
- return;
- break;
- case IR.CodepointSet:
- if(re.charsets[re.ir[t.pc].data].scanFor(front))
- {
- t.pc += IRL!(IR.CodepointSet);
- nlist.insertBack(t);
- }
- else
- {
- recycle(t);
- }
- t = worklist.fetch();
- if(!t)
- return;
- break;
- case IR.Trie:
- if(re.tries[re.ir[t.pc].data][front])
- {
- t.pc += IRL!(IR.Trie);
- nlist.insertBack(t);
- }
- else
- {
+
+ static if(withInput)
+ {
+ case IR.OrChar:
+ uint len = re.ir[t.pc].sequence;
+ uint end = t.pc + len;
+ static assert(IRL!(IR.OrChar) == 1);
+ for(; t.pc<end; t.pc++)
+ if(re.ir[t.pc].data == front)
+ break;
+ if(t.pc != end)
+ {
+ t.pc = end;
+ nlist.insertBack(t);
+ }
+ else
+ recycle(t);
+ t = worklist.fetch();
+ if(!t)
+ return;
+ break;
+ case IR.Char:
+ if(front == re.ir[t.pc].data)
+ {
+ t.pc += IRL!(IR.Char);
+ nlist.insertBack(t);
+ }
+ else
+ recycle(t);
+ t = worklist.fetch();
+ if(!t)
+ return;
+ break;
+ case IR.Any:
+ t.pc += IRL!(IR.Any);
+ if(!(re.flags & RegexOption.singleline)
+ && (front == '\r' || front == '\n'))
+ recycle(t);
+ else
+ nlist.insertBack(t);
+ t = worklist.fetch();
+ if(!t)
+ return;
+ break;
+ case IR.CodepointSet:
+ if(re.charsets[re.ir[t.pc].data].scanFor(front))
+ {
+ t.pc += IRL!(IR.CodepointSet);
+ nlist.insertBack(t);
+ }
+ else
+ {
+ recycle(t);
+ }
+ t = worklist.fetch();
+ if(!t)
+ return;
+ break;
+ case IR.Trie:
+ if(re.tries[re.ir[t.pc].data][front])
+ {
+ t.pc += IRL!(IR.Trie);
+ nlist.insertBack(t);
+ }
+ else
+ {
+ recycle(t);
+ }
+ t = worklist.fetch();
+ if(!t)
+ return;
+ break;
+ default:
+ assert(0, "Unrecognized instruction " ~ re.ir[t.pc].mnemonic);
+ }
+ else
+ {
+ default:
recycle(t);
- }
- t = worklist.fetch();
- if(!t)
- return;
- break;
- default:
- assert(0, "Unrecognized instruction " ~ re.ir[t.pc].mnemonic);
- }
- else
- {
- default:
- recycle(t);
- t = worklist.fetch();
- if(t)
- break;
- else
- return;
- }
+ t = worklist.fetch();
+ if(!t)
+ return;
+ }
}
}
@@ -5588,26 +5586,28 @@ enum OneShot { Fwd, Bwd };
startPc = cast(uint)re.ir.length-IRL!(IR.LookbehindEnd);
if(!atEnd)//if no char
{
- if (startPc!=RestartPc){
+ debug(fred_matching)
+ {
+ static if(direction == OneShot.Fwd)
+ writefln("-- Threaded matching (forward) threads at %s", s[index..s.lastIndex]);
+ else
+ writefln("-- Threaded matching (backward) threads at %s", retro(s[index..s.lastIndex]));
+ }
+ if(startPc!=RestartPc)
+ {
auto startT = createStart(index, startPc);
genCounter++;
evalFn!true(startT, matches);
}
for(;;)
{
- genCounter++;
+ debug(fred_matching) writeln("\n-- Started iteration of main cycle");
+ genCounter++;
debug(fred_matching)
{
- static if(direction == OneShot.Fwd)
- writefln("Threaded matching (forward) threads at %s", s[index..s.lastIndex]);
- else
- writefln("Threaded matching (backward) threads at %s", retro(s[index..s.lastIndex]));
foreach(t; clist[])
{
assert(t);
- writef("pc=%s ",t.pc);
- write(t.matches);
- writeln();
}
}
for(Thread!DataIndex* t = clist.fetch(); t; t = clist.fetch())
@@ -5621,14 +5621,16 @@ enum OneShot { Fwd, Bwd };
}
clist = nlist;
nlist = (ThreadList!DataIndex).init;
- if(!next()){
+ if(!next())
+ {
if (!atEnd) return MatchResult.PartialMatch;
break;
}
+ debug(fred_matching) writeln("-- Ended iteration of main cycle\n");
}
}
genCounter++; //increment also on each end
- debug(fred_matching) writefln("Threaded matching (%s) threads at end",
+ debug(fred_matching) writefln("-- Threaded matching (%s) threads at end",
direction == OneShot.Fwd ? "forward" : "backward");
//try out all zero-width posibilities
for(Thread!DataIndex* t = clist.fetch(); t; t = clist.fetch())
@@ -5647,7 +5649,7 @@ enum OneShot { Fwd, Bwd };
void evalBack(bool withInput)(Thread!DataIndex* t, Group!DataIndex[] matches)
{
ThreadList!DataIndex worklist;
- debug(fred_matching) writeln("Evaluating thread backwards");
+ debug(fred_matching) writeln("---- Evaluating thread backwards");
do
{
debug(fred_matching)
@@ -7629,6 +7631,17 @@ else
//note that the regex pattern itself is probably bogus
assert(match(r"\>wgEncode-blah-Tfbs.narrow</a>", peakRegex));
}
+
+ // bugzilla 9211
+ unittest
+ {
+ auto rx_1 = regex(r"^(\w)*(\d)");
+ auto m = match("1234", rx_1);
+ assert(equal(m.front, ["1234", "3", "4"]));
+ auto rx_2 = regex(r"^([0-9])*(\d)");
+ auto m2 = match("1234", rx_2);
+ assert(equal(m2.front, ["1234", "3", "4"]));
+ }
}
}//version(unittest)

0 comments on commit efb7a7b

Please sign in to comment.