diff --git a/changelog.dd b/changelog.dd index 7b910eb537b..1da6e044835 100644 --- a/changelog.dd +++ b/changelog.dd @@ -14,19 +14,26 @@ $(VERSION 053, ddd mm, 2011, =================================================, $(LI Added unsigned to std.traits) ) $(LIBBUGSFIXED + $(LI $(BUGZILLA 4367): std.regex: Captures is not a random access range) + $(LI $(BUGZILLA 4574): std.regex: breaks with empy string regex) $(LI $(BUGZILLA 4644): assertExceptionThrown to assert that a particular exception was thrown) $(LI $(BUGZILLA 4944): Missing tzname even though we have tzset) + $(LI $(BUGZILLA 5019): In std.regex, empty capture at end of string causes error) + $(LI $(BUGZILLA 5169): Add(?:) Non-capturing parentheses group support to std.regex) $(LI $(BUGZILLA 5451): Three ideas for RedBlackTree) $(LI $(BUGZILLA 5474): unaryFun byRef is borked for custom parameter name) $(LI $(BUGZILLA 5485): TLS sections handled incorrectly in FreeBSD) + $(LI $(BUGZILLA 5511): std.regex optional capture with no-match cause error) $(LI $(BUGZILLA 5616): std.datetime: not cross-platform) $(LI $(BUGZILLA 5654): BigInt returns ZERO with strings of single digit number with leading zeros) - $(LI $(BUGZILLA 5661): std.algorithm.move does not work on elaborate struct) + $(LI $(BUGZILLA 5661): std.algorithm.move does not work on elaborate struct) $(LI $(BUGZILLA 5731): std.datetime.SysTime prints UTC offsets backwards) $(LI $(BUGZILLA 5761): std.datetime: Date.this(int day) conversion fails for Dec 30 of leap years) $(LI $(BUGZILLA 5780): [patch] std.traits.hasIndirections incorrectly handles static arrays) $(LI $(BUGZILLA 5781): std.datetime: On Windows, times off by one hour in some years due to DST rule changes) $(LI $(BUGZILLA 5794): std.datetime StopWatch (and perhaps benchmark) examples need a small fix) + $(LI $(BUGZILLA 5857): std.regex (...){n,m} is bogus when (...) contains repetitions) + $(LI $(BUGZILLA 6076): std.regex: "c.*|d" matches "mm") ) ) diff --git a/std/regex.d b/std/regex.d index e41b181dbc6..a96b0cef9c7 100644 --- a/std/regex.d +++ b/std/regex.d @@ -345,7 +345,108 @@ Returns the number of parenthesized captures debug(std_regex) writefln("error: %s", msg); throw new Exception(msg); } + //adjust jumps, after removing instructions at 'place' + void fixup(ubyte[] prog, size_t place, uint change) + { + for (size_t pc=0;pc place) + *dest -= change; + } + pc += 1 + 2*uint.sizeof; + break; + + case REloop, REloopg: //jump back + if (pc > place) + { + auto dest = cast(uint *)&prog[pc + 1 + 2*uint.sizeof]; + if (pc + *dest > place) + *dest += change; + } + pc += 1 + 3*uint.sizeof; + break; + + case REneglookahead://jump or call forward + case RElookahead: + case REor: + case REgoto: + if (pc < place) + { + auto dest = cast(uint *)&prog[pc+1]; + if (pc + *dest > place) + *dest -= change; + } + pc += 1 + uint.sizeof; + break; + + case REret: + case REanychar: + case REanystarg: + case REanystar: + case REbol: + case REeol: + case REwordboundary: + case REnotwordboundary: + case REdigit: + case REnotdigit: + case REspace: + case REnotspace: + case REword: + case REnotword: + pc++; + break; + + case REchar: + case REichar: + case REbackref: + pc += 2; + break; + + case REdchar: + case REidchar: + pc += 1 + dchar.sizeof; + break; + + case REstring: + case REistring: + auto len = *cast(size_t *)&prog[pc + 1]; + assert(len % E.sizeof == 0); + pc += 1 + size_t.sizeof + len; + break; + + case REtestbit: + case REbit: + case REnotbit: + auto pu = cast(ushort *)&prog[pc + 1]; + auto len = pu[1]; + pc += 1 + 2 * ushort.sizeof + len; + break; + + case RErange: + case REnotrange: + auto len = *cast(uint *)&prog[pc + 1]; + pc += 1 + uint.sizeof + len; + break; + + case REsave: + pc += 1 + uint.sizeof; + break; + default: + writeln("%d",prog[pc]); + assert(0); + } + } + } //Fixup counter numbers, simplify instructions private void postprocess(ubyte[] prog) { @@ -353,40 +454,40 @@ Returns the number of parenthesized captures size_t len; ushort* pu; nCounters = 0; - + size_t pc = 0; for (;;) { - switch (prog.front) + switch (prog[pc]) { case REend: return; case REcounter: - size_t offs = 1 + 2*uint.sizeof; + size_t offs = pc + 1 + 2*uint.sizeof; bool anyloop = counter == 0 && prog[offs] == REanychar && (prog[offs+1] == REloop || prog[offs+1] == REloopg); uint* puint = cast(uint*)&prog[offs+2]; if (anyloop && puint[0] == 0 && puint[1] == inf) { - prog[0] = prog[offs+1] == REloop ? REanystar : REanystarg; - std.array.replaceInPlace( - prog, 1, - 2*(1 + uint.sizeof) + 1 + 3*uint.sizeof, - cast(ubyte[])[]); - prog.popFront(); + prog[pc] = prog[offs+1] == REloop ? REanystar : REanystarg; + uint change = 2*(1 + uint.sizeof) + 1 + 3*uint.sizeof - 1; + std.array.replaceInPlace(prog, pc + 1, + pc + change + 1, cast(ubyte[])[]); + fixup(prog, pc, change); + pc++; } else { - *cast(uint*)&prog[1] = counter; + *cast(uint *)&prog[pc+1] = counter; counter++; nCounters = max(nCounters, counter); - prog.popFrontN(1 + 2*uint.sizeof); + pc += 1 + 2*uint.sizeof; } break; case REloop, REloopg: counter--; - prog.popFrontN(1 + 3*uint.sizeof); + pc += 1 + 3*uint.sizeof; break; case REret: @@ -401,57 +502,52 @@ Returns the number of parenthesized captures case REnotspace: case REword: case REnotword: - prog.popFront(); + pc++; break; case REbackref: - prog.popFrontN(2); - break; - case REchar: case REichar: - prog.popFrontN(2); + pc += 2; break; case REdchar: case REidchar: - prog.popFrontN(1+dchar.sizeof); + pc += 1 + dchar.sizeof; break; case REstring: case REistring: - len = *cast(size_t *)&prog[1]; + len = *cast(size_t *)&prog[pc+1]; assert(len % E.sizeof == 0); - prog.popFrontN(1 + size_t.sizeof + len); + pc += 1 + size_t.sizeof + len; break; case REtestbit: case REbit: case REnotbit: - pu = cast(ushort *)&prog[1]; + pu = cast(ushort *)&prog[pc+1]; len = pu[1]; - prog.popFrontN(1 + 2 * ushort.sizeof + len); + pc += 1 + 2 * ushort.sizeof + len; break; case RErange: case REnotrange: - len = *cast(uint *)&prog[1]; - prog.popFrontN(1 + uint.sizeof + len); + len = *cast(uint *)&prog[pc+1]; + pc += 1 + uint.sizeof + len; break; - + + case REneglookahead: + case RElookahead: case REor: case REgoto: - prog.popFrontN(1 + uint.sizeof); + pc += 1 + uint.sizeof; break; case REsave: - prog.popFrontN(1 + uint.sizeof); + pc += 1 + uint.sizeof; break; - case REneglookahead: - case RElookahead: - prog.popFrontN(1 + uint.sizeof); - break; default: assert(0); } @@ -1459,7 +1555,7 @@ struct RegexMatch(Range = string) // Engine alias .Regex!(Unqual!E) Regex; private alias Regex.regmatch_t regmatch_t; - enum stackSize = 640*1024; + enum stackSize = 32*1024; /** Get or set the engine of the match. */ @@ -1965,7 +2061,7 @@ Returns $(D hit) (converted to $(D string) if necessary). auto stateSize = (counters.empty ? 0 : (curCounter+1)*uint.sizeof) + matchesToSave*regmatch_t.sizeof; if (memory.length < lastState + stateSize + StateTail.sizeof) - memory.length += memory.length/2; //reallocates on heap + memory.length += memory.length; //reallocates on heap auto matchPtr = cast(regmatch_t*)&memory[lastState]; matchPtr[0..matchesToSave] = pmatch[1..matchesToSave+1]; if (!counters.empty) @@ -2240,7 +2336,7 @@ Returns $(D hit) (converted to $(D string) if necessary). src = input.length; else { - auto p = memchr(&input[src],'\n', input.length-src); + auto p = memchr(input.ptr+src,'\n', input.length-src); src = p ? p - &input[src] : input.length; } while (src > ss) @@ -3489,3 +3585,12 @@ unittest assert(pres == array(retro(heads))); assert(posts == tails); } + +//issue 6076 +//regression on .* +unittest +{ + auto re = regex("c.*|d"); + auto m = match("mm", re); + assert(m.empty); +} \ No newline at end of file