Skip to content
Permalink
Browse files

Fix exponential explosion in backtrack compilation

The trick is to define * in terms of +, instead of vice versa, because
if you do it the other way around you must compile the remaining regex
twice, and if that remainder uses +, it will compile its remainder
twice, and so forth, resulting in 2^n compilations.
  • Loading branch information...
Peter Bex
Peter Bex committed Dec 5, 2016
1 parent 46e182c commit a16ffc86eca15fca9e40607d41de3cea9cf868f1
Showing with 20 additions and 14 deletions.
  1. +2 −0 benchmarks/re-benchmarks.txt
  2. +18 −14 irregex.scm
@@ -37,3 +37,5 @@ exponential dfa a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab

# Found by Caolan McMahon
exponential backtracking for repeating pattern [a-z]{0,42} testing@ b 1 1

Exponential memory use while building match procedure ($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($($(${-2,16}+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+) foo b 1 1
@@ -30,6 +30,8 @@

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; History
;; 0.9.6: 2016/12/05 - fixed exponential memory use of + in compilation
;; of backtracking matcher.
;; 0.9.5: 2016/09/10 - fixed a bug in irregex-fold handling of bow
;; 0.9.4: 2015/12/14 - performance improvement for {n,m} matches
;; 0.9.3: 2014/07/01 - R7RS library
@@ -3084,16 +3086,7 @@
((sre-empty? (sre-sequence (cdr sre)))
(error "invalid sre: empty *" sre))
(else
(letrec
((body
(lp (sre-sequence (cdr sre))
n
flags
(lambda (cnk init src str i end matches fail)
(body cnk init src str i end matches
(lambda ()
(next cnk init src str i end matches fail)
))))))
(let ((body (rec (list '+ (sre-sequence (cdr sre))))))
(lambda (cnk init src str i end matches fail)
(body cnk init src str i end matches
(lambda ()
@@ -3118,10 +3111,21 @@
(lambda ()
(body cnk init src str i end matches fail))))))))
((+)
(lp (sre-sequence (cdr sre))
n
flags
(rec (list '* (sre-sequence (cdr sre))))))
(cond
((sre-empty? (sre-sequence (cdr sre)))
(error "invalid sre: empty +" sre))
(else
(letrec
((body
(lp (sre-sequence (cdr sre))
n
flags
(lambda (cnk init src str i end matches fail)
(body cnk init src str i end matches
(lambda ()
(next cnk init src str i end matches fail)
))))))
body))))
((=)
(rec `(** ,(cadr sre) ,(cadr sre) ,@(cddr sre))))
((>=)

0 comments on commit a16ffc8

Please sign in to comment.
You can’t perform that action at this time.