Switch branches/tags
Nothing to show
Find file
Fetching contributors…
Cannot retrieve contributors at this time
387 lines (327 sloc) 13.2 KB
(in-package :metapeg)
;(declaim (optimize (speed 3) (safety 0) (debug 0)))
(declaim (optimize (speed 0) (safety 3) (debug 3)))
; Global variables used during the parse
; --------------------
(defvar *actions*)
(defvar *rules* nil)
(defvar *input* nil)
(defvar *context* nil)
(defclass context ()
((parent :accessor parent :initform nil)
(rule :accessor rule :initform nil)
(children :accessor children :initform nil)
(value :accessor value :initform nil)
(start-index :initarg :start-index :accessor start-index :initform nil)
(end-index :accessor end-index :initform nil)))
(defmethod print-object ((obj context) stream)
(format stream "context ~A ~S val ~S ~A ~A" (rule obj) (children obj) (value obj) (start-index obj) (end-index obj)))
(defun clone-ctx (ctx rule)
(let ((new-ctx (make-instance 'context)))
(setf (parent new-ctx) ctx)
(setf (rule new-ctx) rule)
(setf (start-index new-ctx) (end-index ctx))
(defun ctx-failed-p (ctx) (null (end-index ctx)))
(defun succeed (ctx value start-index end-index)
(setf (value ctx) value)
(setf (start-index ctx) start-index)
(setf (end-index ctx) end-index)
; (format t "succeed ~A ~A ~A~%" (rule ctx) (start-index ctx) (end-index ctx))
(defun fail ()
(let ((ctx (make-instance 'context)))
(setf (rule ctx) 'fail)
(setf (value ctx) (rule *context*))
; (format t "fail ~A ~A ~A~%" (rule *context*) (start-index *context*) (end-index *context*))
; Utility functions
; --------------------
(defun make-name (string)
(intern (concatenate 'string "parse_" string)
(symbol-package 'this-package)))
(defun fix-escapes2 (char-list)
(do ((out nil)
(remaining char-list))
((null remaining) (reverse out))
(let ((c (first remaining)))
; (printf "rem ~s~n" remaining)
(if (char= c #\\)
(let ((nextc (second remaining)))
(setf out (cons (case nextc
((#\n) #\newline)
((#\t) #\tab)
(otherwise nextc))
(setf remaining (cdr (cdr remaining))))
(setf out (cons c out))
(setf remaining (cdr remaining)))))))
(defun fix-escapes (list) (fix-escapes2 list))
; filter out the first part of pair, useful for patterns where we specify a negative match (eg (!"x" .)*)
(defun zip-second (pair-list)
(loop for x in pair-list collect (second x)))
(defvar *build-with-tracing* nil)
(defmacro build-parser-function (name parser)
(if *build-with-tracing*
`(let* ((*context* (clone-ctx *context* ,name))
(result (funcall ,parser offset)))
(format t "~&~vT> ~A at ~D" offset ',name offset)
(if (ctx-failed-p result)
(succeed *context* (value result) (start-index result) (end-index result)))
(format t "~&~vT<~A ~A" offset ',name (if (ctx-failed-p result) ":<" ":)"))))
`(let* ((*context* (clone-ctx *context* ,name))
(result (funcall ,parser offset)))
(if (ctx-failed-p result)
(succeed *context* (value result) (start-index result) (end-index result))))))
(defun make-call-rule-closure (rule)
`#'(lambda (offset)
(let ((pair (assoc ',rule *rules*)))
(if pair
(funcall (cadr pair) offset)
(error "missing rule ~A" ',rule))) ))
(defun make-call-rule-closure2 (rule)
`#'(lambda (offset)
(let ((pair (assoc ',rule *rules*)))
(if pair
(funcall (cadr pair) offset)
(error "missing rule ~A" ',rule))) ))
(defun call-rule (rule)
(make-call-rule-closure rule))
(defun char-list-to-string (char-list)
(reduce #'(lambda (a b) (concatenate 'string a (string b))) char-list :initial-value ""))
(defun emit-actions (stream actions)
(loop for (sym string) in actions
do (format stream "~%(defun ~S (data) ~A )" sym string)))
; takes the name of the parser file to be created, the grammar and the existing bootstrap parser
(defun create-parser (new-parser-file-name grammar parser)
(multiple-value-bind (form actions) (parse grammar parser)
(with-open-file (stream new-parser-file-name :direction :output :if-exists :supersede)
(let ((*print-readably* t)
(*print-pretty* t)
(*print-circle* nil))
(loop for aform in form do
(prin1 aform stream)
do (format stream "~%"))
(format stream "~% ")
(emit-actions stream actions)))
; parsing combinator functions
; --------------------
; I have found remarkably elegant recursive versions of these combinators
; but this comment block is too small to note them
(defun either (&rest parsers)
#'(lambda (offset)
(block b1
(let ((*context* (clone-ctx *context* 'mp_either)))
(loop for p in parsers
do (let ((result (funcall p offset)))
(if (not (ctx-failed-p result))
(return-from b1 (succeed *context* (value result) offset (end-index result))))))))
(defun optional (parser) #'(lambda (offset)
(let ((*context* (clone-ctx *context* 'mp_optional)))
(let ((result (funcall parser offset)))
(if (ctx-failed-p result)
(succeed *context* 'optional offset offset)
(succeed *context* (value result) offset (end-index result)))))))
(defun follow (parser) #'(lambda (offset)
(let ((*context* (clone-ctx *context* 'mp_follow)))
(let ((result (funcall parser offset)))
(if (ctx-failed-p result)
(succeed *context* (value result) offset offset)))))) ;don't consume input
(defun many (parser) #'(lambda (offset)
(block b1
(let ((*context* (clone-ctx *context* 'mp_many))
(start-offset offset)
(loop do
(let ((result (funcall parser offset)))
(if (end-index result)
(push (value result) children)
(setf offset (end-index result)))
(return-from b1 (succeed *context* (reverse children) start-offset offset)))))))))
(defun many1 (parser) #'(lambda (offset)
(let ((*context* (clone-ctx *context* 'mp_many1)))
(let ((result (funcall parser offset)))
(if (end-index result)
(let ((result2 (funcall (many parser) (end-index result))))
(if (end-index result2)
(succeed *context* (cons (value result) (value result2)) offset (end-index result2))
(succeed *context* (value result) offset (end-index result))))
(defun seq (&rest parsers)
#'(lambda (offset)
(block b1
(assert (> (length parsers) 0))
(let ((*context* (clone-ctx *context* 'mp_seq))
(start-offset offset)
; run the parsers
(loop for p in parsers do
(if (not (listp p))
(let ((result (funcall p offset)))
(if (end-index result)
(push result child-nodes)
(push (value result) child-values)
(setf offset (end-index result))
(setf (children *context*) (reverse child-nodes)))
(return-from b1 (fail))))
(push (succeed (clone-ctx *context* 'action) nil offset offset) child-nodes)
(push p child-values)
(setf (children *context*) (reverse child-nodes))))
finally (return (succeed *context* (reverse child-values) start-offset offset)))))))
; non-portable use of bounding exception, should check input length instead
(defun match-string (string)
#'(lambda (offset)
(if (string= string (subseq *input* offset (+ offset ( length string))))
(let ((*context* (clone-ctx *context* 'mp_string)))
(succeed *context* string offset (+ offset (length string))))
(#+sbcl SB-KERNEL:BOUNDING-INDICES-BAD-ERROR #+ccl simple-error () (fail)))))
(defun match-char (char-list)
#'(lambda (offset)
(block b1
(loop for char in char-list do
(setf char (if (stringp char)
(elt char 0)
(if (char= char (elt *input* offset))
(return-from b1 (succeed (clone-ctx *context* 'mp_char) char offset (+ offset 1))))))
; (format t "match char dropped through ~S~%" char-list)
(#+sbcl SB-KERNEL:BOUNDING-INDICES-BAD-ERROR #+ccl simple-error ()
(#+sbcl SB-KERNEL::INDEX-TOO-LARGE-ERROR #+ccl simple-error ()
(defun match-any-char (ignored)
(declare (ignore ignored))
#'(lambda (offset)
(succeed (clone-ctx *context* 'mp_anychar) (elt *input* offset) offset (+ offset 1))
(#+sbcl SB-KERNEL:BOUNDING-INDICES-BAD-ERROR #+ccl simple-error ()
(defun match-any-char2 (ignored)
(declare (ignore ignored))
#'(lambda (offset)
(succeed (clone-ctx *context* 'mp_anychar) (elt *input* offset) offset (+ offset 1))
(#+sbcl SB-KERNEL:BOUNDING-INDICES-BAD-ERROR #+ccl simple-error ()
(defun negate (parser)
#'(lambda (offset)
(let ((*context* (clone-ctx *context* 'mp_negate)))
(let ((result (funcall parser offset)))
(if (ctx-failed-p result)
(succeed *context* 'negate offset offset) ;note we return a parse result but don't advance input
(defun find-match ( original-ctx examine-ctx rule-name offset)
(if (null examine-ctx)
(succeed (clone-ctx original-ctx rule-name) "" offset offset)
(let ((siblings (children examine-ctx)))
; (format t "siblings are ~S~%" siblings)
(loop for sibling in siblings
do (if (and (typep sibling 'context)
(stringp (rule sibling))
(string= (rule sibling) rule-name))
; (format t "comparing ~A ~A at ~A~%" rule-name sibling offset)
(let* ((ms (subseq *input* (start-index sibling) (end-index sibling)))
(failed (null (end-index (funcall (match-string ms) offset)))))
; (format t "match string ~S failed ~A~%" ms failed)
(return-from find-match (if failed
(succeed (clone-ctx original-ctx rule-name) ms offset (+ offset (length ms)))))))
(find-match original-ctx (parent examine-ctx) rule-name offset))))
(defun match (rule-name)
#'(lambda (offset)
(find-match *context* (parent *context*) rule-name offset)))
(defun read-file (filename)
(with-open-file (file filename :direction :input)
(let ((s (make-string (file-length file))))
;; (declare (dynamic-extent s)) ; stack alloc is unlikely
;; note characters are not bytes
(subseq s 0 (read-sequence s file)))))
(defvar *action-name-counter* 0)
(defun gen-action-name ()
(intern (format nil "METAPEG-ACTION~A" (incf *action-name-counter*))
(symbol-package 'this-package)))
(defun parse (input-file parser-file)
"Parse the input-file given the parser-file via parse-string."
(let ((input (read-file input-file)))
(parse-string input parser-file)))
(defvar *cached-parser-file-name* nil)
(defvar *cached-parser-file-write-date* nil)
(defun load-parser-if-necessary (parser-file)
"Load the parser defined in file unless it happens to be the one we last loaded."
((and (equal *cached-parser-file-name* parser-file) ; this breaks if the user changes directories
(equal *cached-parser-file-write-date* (file-write-date parser-file)))
(load-parser parser-file))))
(defun load-parser (&optional (parser-file *cached-parser-file-name*))
(setf *cached-parser-file-name* nil) ; fearing error invalidate the cache
(load parser-file)
(setf *cached-parser-file-name* parser-file)
(setf *cached-parser-file-write-date* (file-write-date parser-file)))
(defun parse-string (input parser-file)
"Wrapper for parse-string-using-latest-parser, but first load the parser in parser-file if necessary."
(load-parser-if-necessary parser-file)
(parse-string-using-latest-parser input))
(defun parse-string-using-latest-parser (input)
"Return two values; the result of the parse and what ever accumulates in *actions*"
(let ((*input* input)
(*actions* nil))
(let ((result (generated-parser)))
(if (not (ctx-failed-p result))
(if (= (length *input*) (end-index result))
(values (transform (value result)) *actions*)
(progn (cerror "Continue" "Parse only parsed up to index ~D, \"~A\"" (end-index result) (subseq *input* (end-index result)))
(values *actions*)))
(values result *actions*)))))
(defun transform (tree)
(if tree
(if (listp tree)
(if (equal (first tree) 'action)
(let ((data (mapcar #'transform tree)))
(progn (loop for el in data
when (and (listp el)
(equal (first el) 'action)
(symbolp (third el)))
do (progn
; (format *error-output* "data is ~s~%" data)
(return-from transform (funcall (third el) data))
(undefined-function (e)
(progn (format *error-output* "missing definition for ~S ~A~%" (third el) e)
;;; Example of how to rebootstrap the metapeg parser. Note that binding the metapeg::*action-name-counter*
;;; helps to keep the source control diffs under control.
(let ((*package* (find-package "METAPEG"))
(metapeg::*action-name-counter* 319))
(metapeg:create-parser "/tmp/metapeg.lisp" "metapeg.peg" "metapeg.lisp"))