Permalink
Browse files

REx engine improvements

	Message-Id: <199905250642.CAA06208@monk.mps.ohio-state.edu>

p4raw-id: //depot/perl@3475
  • Loading branch information...
1 parent a99f882 commit cf93c79d660ae36ccc5f83d949c599473fc522ce Ilya Zakharevich committed with Gurusamy Sarathy May 25, 1999
Showing with 758 additions and 320 deletions.
  1. +6 −0 embedvar.h
  2. +27 −21 mg.c
  3. +4 −0 objXSUB.h
  4. +11 −10 pp.c
  5. +15 −14 pp_ctl.c
  6. +81 −53 pp_hot.c
  7. +19 −18 regcomp.c
  8. +110 −81 regexec.c
  9. +29 −13 regexp.h
  10. +15 −1 t/op/pat.t
  11. +197 −4 t/op/re_tests
  12. +8 −1 t/op/regexp.t
  13. +4 −2 thrdvar.h
  14. +232 −102 util.c
View
@@ -62,6 +62,8 @@
#define PL_reg_magic (PL_curinterp->Treg_magic)
#define PL_reg_oldcurpm (PL_curinterp->Treg_oldcurpm)
#define PL_reg_oldpos (PL_curinterp->Treg_oldpos)
+#define PL_reg_oldsaved (PL_curinterp->Treg_oldsaved)
+#define PL_reg_oldsavedlen (PL_curinterp->Treg_oldsavedlen)
#define PL_reg_re (PL_curinterp->Treg_re)
#define PL_reg_start_tmp (PL_curinterp->Treg_start_tmp)
#define PL_reg_start_tmpl (PL_curinterp->Treg_start_tmpl)
@@ -453,6 +455,8 @@
#define PL_Treg_magic PL_reg_magic
#define PL_Treg_oldcurpm PL_reg_oldcurpm
#define PL_Treg_oldpos PL_reg_oldpos
+#define PL_Treg_oldsaved PL_reg_oldsaved
+#define PL_Treg_oldsavedlen PL_reg_oldsavedlen
#define PL_Treg_re PL_reg_re
#define PL_Treg_start_tmp PL_reg_start_tmp
#define PL_Treg_start_tmpl PL_reg_start_tmpl
@@ -589,6 +593,8 @@
#define PL_reg_magic (thr->Treg_magic)
#define PL_reg_oldcurpm (thr->Treg_oldcurpm)
#define PL_reg_oldpos (thr->Treg_oldpos)
+#define PL_reg_oldsaved (thr->Treg_oldsaved)
+#define PL_reg_oldsavedlen (thr->Treg_oldsavedlen)
#define PL_reg_re (thr->Treg_re)
#define PL_reg_start_tmp (thr->Treg_start_tmp)
#define PL_reg_start_tmpl (thr->Treg_start_tmpl)
View
48 mg.c
@@ -341,23 +341,23 @@ magic_regdatum_get(SV *sv, MAGIC *mg)
{
dTHR;
register I32 paren;
- register char *s;
+ register I32 s;
register I32 i;
register REGEXP *rx;
- char *t;
+ I32 t;
if (PL_curpm && (rx = PL_curpm->op_pmregexp)) {
paren = mg->mg_len;
if (paren < 0)
return 0;
if (paren <= rx->nparens &&
- (s = rx->startp[paren]) &&
- (t = rx->endp[paren]))
+ (s = rx->startp[paren]) != -1 &&
+ (t = rx->endp[paren]) != -1)
{
if (mg->mg_obj) /* @+ */
- i = t - rx->subbeg;
+ i = t;
else /* @- */
- i = s - rx->subbeg;
+ i = s;
sv_setiv(sv,i);
}
}
@@ -378,13 +378,15 @@ magic_len(SV *sv, MAGIC *mg)
case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9': case '&':
if (PL_curpm && (rx = PL_curpm->op_pmregexp)) {
+ I32 s1, t1;
+
paren = atoi(mg->mg_ptr);
getparen:
if (paren <= rx->nparens &&
- (s = rx->startp[paren]) &&
- (t = rx->endp[paren]))
+ (s1 = rx->startp[paren]) != -1 &&
+ (t1 = rx->endp[paren]) != -1)
{
- i = t - s;
+ i = t1 - s1;
if (i >= 0)
return i;
}
@@ -399,17 +401,17 @@ magic_len(SV *sv, MAGIC *mg)
return 0;
case '`':
if (PL_curpm && (rx = PL_curpm->op_pmregexp)) {
- if ((s = rx->subbeg) && rx->startp[0]) {
- i = rx->startp[0] - s;
+ if (rx->startp[0] != -1) {
+ i = rx->startp[0];
if (i >= 0)
return i;
}
}
return 0;
case '\'':
if (PL_curpm && (rx = PL_curpm->op_pmregexp)) {
- if (rx->subend && (s = rx->endp[0])) {
- i = rx->subend - s;
+ if (rx->endp[0] != -1) {
+ i = rx->sublen - rx->endp[0];
if (i >= 0)
return i;
}
@@ -589,25 +591,28 @@ magic_get(SV *sv, MAGIC *mg)
case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9': case '&':
if (PL_curpm && (rx = PL_curpm->op_pmregexp)) {
+ I32 s1, t1;
+
/*
* Pre-threads, this was paren = atoi(GvENAME((GV*)mg->mg_obj));
* XXX Does the new way break anything?
*/
paren = atoi(mg->mg_ptr);
getparen:
if (paren <= rx->nparens &&
- (s = rx->startp[paren]) &&
- (t = rx->endp[paren]))
+ (s1 = rx->startp[paren]) != -1 &&
+ (t1 = rx->endp[paren]) != -1)
{
- i = t - s;
+ i = t1 - s1;
+ s = rx->subbeg + s1;
getrx:
if (i >= 0) {
bool was_tainted;
if (PL_tainting) {
was_tainted = PL_tainted;
PL_tainted = FALSE;
}
- sv_setpvn(sv,s,i);
+ sv_setpvn(sv, s, i);
if (PL_tainting)
PL_tainted = (was_tainted || RX_MATCH_TAINTED(rx));
break;
@@ -626,17 +631,18 @@ magic_get(SV *sv, MAGIC *mg)
break;
case '`':
if (PL_curpm && (rx = PL_curpm->op_pmregexp)) {
- if ((s = rx->subbeg) && rx->startp[0]) {
- i = rx->startp[0] - s;
+ if ((s = rx->subbeg) && rx->startp[0] != -1) {
+ i = rx->startp[0];
goto getrx;
}
}
sv_setsv(sv,&PL_sv_undef);
break;
case '\'':
if (PL_curpm && (rx = PL_curpm->op_pmregexp)) {
- if (rx->subend && (s = rx->endp[0])) {
- i = rx->subend - s;
+ if (rx->subbeg && rx->endp[0] != -1) {
+ s = rx->subbeg + rx->endp[0];
+ i = rx->sublen - rx->endp[0];
goto getrx;
}
}
View
@@ -518,6 +518,10 @@
#define PL_reg_oldcurpm pPerl->PL_reg_oldcurpm
#undef PL_reg_oldpos
#define PL_reg_oldpos pPerl->PL_reg_oldpos
+#undef PL_reg_oldsaved
+#define PL_reg_oldsaved pPerl->PL_reg_oldsaved
+#undef PL_reg_oldsavedlen
+#define PL_reg_oldsavedlen pPerl->PL_reg_oldsavedlen
#undef PL_reg_re
#define PL_reg_re pPerl->PL_reg_re
#undef PL_reg_start_tmp
View
21 pp.c
@@ -5006,8 +5006,10 @@ PP(pp_split)
else if (rx->check_substr && !rx->nparens
&& (rx->reganch & ROPT_CHECK_ALL)
&& !(rx->reganch & ROPT_ANCH)) {
+ int tail = SvTAIL(rx->check_substr) != 0;
+
i = SvCUR(rx->check_substr);
- if (i == 1 && !SvTAIL(rx->check_substr)) {
+ if (i == 1 && !tail) {
i = *SvPVX(rx->check_substr);
while (--limit) {
/*SUPPRESS 530*/
@@ -5026,15 +5028,15 @@ PP(pp_split)
#ifndef lint
while (s < strend && --limit &&
(m=fbm_instr((unsigned char*)s, (unsigned char*)strend,
- rx->check_substr, 0)) )
+ rx->check_substr, PL_multiline ? FBMrf_MULTILINE : 0)) )
#endif
{
dstr = NEWSV(31, m-s);
sv_setpvn(dstr, s, m-s);
if (make_mortal)
sv_2mortal(dstr);
XPUSHs(dstr);
- s = m + i;
+ s = m + i - tail; /* Fake \n at the end */
}
}
}
@@ -5044,24 +5046,23 @@ PP(pp_split)
CALLREGEXEC(rx, s, strend, orig, 1, sv, NULL, 0))
{
TAINT_IF(RX_MATCH_TAINTED(rx));
- if (rx->subbase
- && rx->subbase != orig) {
+ if (RX_MATCH_COPIED(rx) && rx->subbeg != orig) {
m = s;
s = orig;
- orig = rx->subbase;
+ orig = rx->subbeg;
s = orig + (m - s);
strend = s + (strend - m);
}
- m = rx->startp[0];
+ m = rx->startp[0] + orig;
dstr = NEWSV(32, m-s);
sv_setpvn(dstr, s, m-s);
if (make_mortal)
sv_2mortal(dstr);
XPUSHs(dstr);
if (rx->nparens) {
for (i = 1; i <= rx->nparens; i++) {
- s = rx->startp[i];
- m = rx->endp[i];
+ s = rx->startp[i] + orig;
+ m = rx->endp[i] + orig;
if (m && s) {
dstr = NEWSV(33, m-s);
sv_setpvn(dstr, s, m-s);
@@ -5073,7 +5074,7 @@ PP(pp_split)
XPUSHs(dstr);
}
}
- s = rx->endp[0];
+ s = rx->endp[0] + orig;
}
}
View
@@ -172,8 +172,8 @@ PP(pp_substcont)
if (cx->sb_once || !CALLREGEXEC(rx, s, cx->sb_strend, orig,
s == m, cx->sb_targ, NULL,
((cx->sb_rflags & REXEC_COPY_STR)
- ? REXEC_IGNOREPOS
- : (REXEC_COPY_STR|REXEC_IGNOREPOS))))
+ ? (REXEC_IGNOREPOS|REXEC_NOT_FIRST)
+ : (REXEC_COPY_STR|REXEC_IGNOREPOS|REXEC_NOT_FIRST))))
{
SV *targ = cx->sb_targ;
sv_catpvn(dstr, s, cx->sb_strend - s);
@@ -201,16 +201,16 @@ PP(pp_substcont)
RETURNOP(pm->op_next);
}
}
- if (rx->subbase && rx->subbase != orig) {
+ if (RX_MATCH_COPIED(rx) && rx->subbeg != orig) {
m = s;
s = orig;
- cx->sb_orig = orig = rx->subbase;
+ cx->sb_orig = orig = rx->subbeg;
s = orig + (m - s);
cx->sb_strend = s + (cx->sb_strend - m);
}
- cx->sb_m = m = rx->startp[0];
+ cx->sb_m = m = rx->startp[0] + orig;
sv_catpvn(dstr, s, m-s);
- cx->sb_s = rx->endp[0];
+ cx->sb_s = rx->endp[0] + orig;
cx->sb_rxtainted |= RX_MATCH_TAINTED(rx);
rxres_save(&cx->sb_rxres, rx);
RETURNOP(pm->op_pmreplstart);
@@ -231,13 +231,13 @@ rxres_save(void **rsp, REGEXP *rx)
*rsp = (void*)p;
}
- *p++ = (UV)rx->subbase;
- rx->subbase = Nullch;
+ *p++ = (UV)(RX_MATCH_COPIED(rx) ? rx->subbeg : Nullch);
+ RX_MATCH_COPIED_off(rx);
*p++ = rx->nparens;
*p++ = (UV)rx->subbeg;
- *p++ = (UV)rx->subend;
+ *p++ = (UV)rx->sublen;
for (i = 0; i <= rx->nparens; ++i) {
*p++ = (UV)rx->startp[i];
*p++ = (UV)rx->endp[i];
@@ -250,17 +250,18 @@ rxres_restore(void **rsp, REGEXP *rx)
UV *p = (UV*)*rsp;
U32 i;
- Safefree(rx->subbase);
- rx->subbase = (char*)(*p);
+ if (RX_MATCH_COPIED(rx))
+ Safefree(rx->subbeg);
+ RX_MATCH_COPIED_set(rx, *p);
*p++ = 0;
rx->nparens = *p++;
rx->subbeg = (char*)(*p++);
- rx->subend = (char*)(*p++);
+ rx->sublen = (I32)(*p++);
for (i = 0; i <= rx->nparens; ++i) {
- rx->startp[i] = (char*)(*p++);
- rx->endp[i] = (char*)(*p++);
+ rx->startp[i] = (I32)(*p++);
+ rx->endp[i] = (I32)(*p++);
}
}
Oops, something went wrong.

0 comments on commit cf93c79

Please sign in to comment.