Skip to content

Commit

Permalink
Rewrite SEQ without using recursion.
Browse files Browse the repository at this point in the history
Allows for very long regexes to be processed.
Doesn't require the result to be reversed afterwards.
  • Loading branch information
stassats committed Feb 13, 2013
1 parent 7ad13f5 commit 7d64a90
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 70 deletions.
5 changes: 2 additions & 3 deletions lexer.lisp
Expand Up @@ -290,8 +290,7 @@ handled elsewhere."
(return))
(write-char char out))))))
(list (if (char= first-char #\p) :property :inverted-property)
;; we must reverse here because of what PARSE-STRING does
(nreverse name))))
name)))

(defun collect-char-class (lexer)
"Reads and consumes characters from regex string until a right
Expand Down Expand Up @@ -571,7 +570,7 @@ closing #\> will also be consumed."
;; back-referencing a named register
(incf (lexer-pos lexer))
(list :back-reference
(nreverse (parse-register-name-aux lexer))))
(parse-register-name-aux lexer)))
(t
;; false alarm, just unescape \k
#\k)))
Expand Down
105 changes: 38 additions & 67 deletions parser.lisp
Expand Up @@ -128,10 +128,7 @@ Will return <parse-tree> or \(<grouping-type> <parse-tree>) where
;; have been the "(?:"<regex>")" production
(cons :group (nconc flags (list reg-expr)))
(if (eq open-token :open-paren-less-letter)
(list :named-register
;; every string was reversed, so we have to
;; reverse it back to get the name
(nreverse register-name)
(list :named-register register-name
reg-expr)
(list (case open-token
((:open-paren)
Expand Down Expand Up @@ -201,54 +198,42 @@ Will return <parse-tree> or (:SEQUENCE <parse-tree> <parse-tree>)."
;; to parse a <seq> or <quant> in order to catch empty regular
;; expressions
(if (start-of-subexpr-p lexer)
(let ((quant (quant lexer)))
(if (start-of-subexpr-p lexer)
(let* ((seq (seq lexer))
(quant-is-char-p (characterp quant))
(seq-is-sequence-p (and (consp seq)
(eq (first seq) :sequence))))
(cond ((and quant-is-char-p
(characterp seq))
(make-array-from-two-chars seq quant))
((and quant-is-char-p
(stringp seq))
(vector-push-extend quant seq)
seq)
((and quant-is-char-p
seq-is-sequence-p
(characterp (second seq)))
(cond ((cddr seq)
(setf (cdr seq)
(cons
(make-array-from-two-chars (second seq)
quant)
(cddr seq)))
seq)
(t (make-array-from-two-chars (second seq) quant))))
((and quant-is-char-p
seq-is-sequence-p
(stringp (second seq)))
(cond ((cddr seq)
(setf (cdr seq)
(cons
(progn
(vector-push-extend quant (second seq))
(second seq))
(cddr seq)))
seq)
(t
(vector-push-extend quant (second seq))
(second seq))))
(seq-is-sequence-p
;; if <seq> is also a :SEQUENCE parse tree we merge
;; both lists into one to avoid unnecessary consing
(setf (cdr seq)
(cons quant (cdr seq)))
seq)
(t (list :sequence quant seq))))
quant))
:void)))

(loop with seq-is-sequence-p = nil
with last-cdr
for quant = (quant lexer)
for quant-is-char-p = (characterp quant)
for seq = quant
then
(cond ((and quant-is-char-p (characterp seq))
(make-array-from-two-chars seq quant))
((and quant-is-char-p (stringp seq))
(vector-push-extend quant seq)
seq)
((not seq-is-sequence-p)
(setf last-cdr (list quant)
seq-is-sequence-p t)
(list* :sequence seq last-cdr))
((and quant-is-char-p
(characterp (car last-cdr)))
(setf (car last-cdr)
(make-array-from-two-chars (car last-cdr)
quant))
seq)
((and quant-is-char-p
(stringp (car last-cdr)))
(vector-push-extend quant (car last-cdr))
seq)
(t
;; if <seq> is also a :SEQUENCE parse tree we merge
;; both lists into one
(let ((cons (list quant)))
(psetf last-cdr cons
(cdr last-cdr) cons))
seq))
while (start-of-subexpr-p lexer)
finally (return seq))
:void)))

(defun reg-expr (lexer)
"Parses and consumes a <regex>, a complete regular expression.
The productions are: <regex> -> <seq> | <seq>\"|\"<regex>.
Expand Down Expand Up @@ -294,25 +279,11 @@ Will return <parse-tree> or (:ALTERNATION <parse-tree> <parse-tree>)."
(setf (lexer-pos lexer) pos)
seq)))))))

(defun reverse-strings (parse-tree)
"Recursively walks through PARSE-TREE and destructively reverses all
strings in it."
(declare #.*standard-optimize-settings*)
(cond ((stringp parse-tree)
(nreverse parse-tree))
((consp parse-tree)
(loop for parse-tree-rest on parse-tree
while parse-tree-rest
do (setf (car parse-tree-rest)
(reverse-strings (car parse-tree-rest))))
parse-tree)
(t parse-tree)))

(defun parse-string (string)
"Translate the regex string STRING into a parse tree."
(declare #.*standard-optimize-settings*)
(let* ((lexer (make-lexer string))
(parse-tree (reverse-strings (reg-expr lexer))))
(parse-tree (reg-expr lexer)))
;; check whether we've consumed the whole regex string
(if (end-of-string-p lexer)
parse-tree
Expand Down

0 comments on commit 7d64a90

Please sign in to comment.