Skip to content

Commit

Permalink
Optimizations to the parsing
Browse files Browse the repository at this point in the history
  - Upcase strings in read-time once, and the rest of the functions are case-sensitive
  - Optimized parse-simple and macrocompiler to fixed string parameters
  - Replace flexi-streams library with babel

It seems to speed up the parsing around 4 times, but real benchmark is missing.
  • Loading branch information
davazp committed Mar 18, 2012
1 parent 4c14cd0 commit 6f56d90
Show file tree
Hide file tree
Showing 9 changed files with 75 additions and 60 deletions.
2 changes: 1 addition & 1 deletion cl-icalendar.asd
Expand Up @@ -28,7 +28,7 @@
:name "iCalendar library"
:license "GPLv3+"
:version "0.0"
:depends-on (:trivial-gray-streams :cl-base64 :uuid :flexi-streams)
:depends-on (:trivial-gray-streams :cl-base64 :uuid :babel)
:serial t
:components
((:static-file "COPYING")
Expand Down
2 changes: 1 addition & 1 deletion cl-icalendar.lisp
Expand Up @@ -21,7 +21,7 @@
(in-package :cl-icalendar)

(defun open-vcalendar (pathname)
(with-open-file (infile pathname :element-type '(unsigned-byte 8))
(with-open-file (infile pathname :element-type :default)
(with-folding-stream (in infile)
(read-vcalendar in))))

Expand Down
7 changes: 3 additions & 4 deletions components.lisp
Expand Up @@ -58,8 +58,7 @@
(declare (ignorable initargs))))

(defun make-uninitialized-component (name)
(let* ((name (string-upcase name))
(component-class (find-component-class name)))
(let* ((component-class (find-component-class name)))
(allocate-component component-class name)))

(defun make-component (name &rest initargs)
Expand Down Expand Up @@ -290,8 +289,8 @@
;;; CLHS says: "If a defclass form appears as a top level form, the
;;; compiler must make the class name be recognized as a valid type
;;; name in..." So we have to define the classes separately.
(defmacro declare-component (name)
`(defclass ,name (standard-component) nil))
(defmacro declare-component (name &optional abstract-components)
`(defclass ,name (,@abstract-components standard-component) nil))

;;; Define a component.
(defmacro define-component (name options &body body)
Expand Down
8 changes: 4 additions & 4 deletions content-line.lisp
Expand Up @@ -23,9 +23,9 @@
(defun read-params-value (stream)
(if (char= (peek-char nil stream) #\")
(prog2 (read-char stream)
(parse stream "#\"" #\newline)
(parse stream "#\"" #.(string #\newline))
(read-char stream))
(string-upcase (parse stream ",;:" #\Newline))))
(string-upcase (parse stream ",;:" #.(string #\Newline)))))

(defun read-params-values (stream)
(unlist
Expand All @@ -38,7 +38,7 @@
(defun read-params (stream)
(with-collect
(while (char= (read-char stream) #\;)
(let ((name (parse stream "=" (coerce #(#\Newline #\: #\;) 'string))))
(let ((name (parse stream "=" #.(coerce #(#\Newline #\: #\;) 'string))))
(read-char stream)
(collect (string-upcase name))
(collect (read-params-values stream))))))
Expand All @@ -50,7 +50,7 @@
(char= ch #\space)
(char= ch #\tab))
do (read-char stream))
(values (parse stream ";:" #\Newline)
(values (string-upcase (parse stream ";:" #.(string #\Newline)))
(read-params stream)
(read-line stream)))

Expand Down
44 changes: 22 additions & 22 deletions folding.lisp
Expand Up @@ -20,16 +20,10 @@

(in-package :cl-icalendar)

;;; FIXME: flexi-streams requires a binary (or bivalent) stream. We
;;; would like to avoid this restriction.

(defconstant +tab-character+ (code-char #x09))

(defconstant +content-line-max-length+ 75)

;;; We build folding-stream upon flexi-streams. They are supposed to
;;; implement CRLF end of line style and UTF-8 encoding. Column octets
;;; counting is implemented by folding-stream.
(defclass folding-stream (fundamental-character-input-stream
fundamental-character-output-stream)
((column-octets
Expand All @@ -38,13 +32,12 @@
:reader folding-column-octets)
(backend-stream
:initform (required-arg)
:type flex:flexi-stream
:type stream
:initarg :stream
:reader folding-backend-stream)))

(defun make-folding-stream (stream)
(let ((fs (flex:make-flexi-stream stream :external-format '(:utf-8 :eol-style :crlf))))
(make-instance 'folding-stream :stream fs)))
(make-instance 'folding-stream :stream stream))

(defmacro with-folding-stream ((var stream) &body code)
`(with-open-stream (,var (make-folding-stream ,stream))
Expand All @@ -56,15 +49,19 @@

(defmethod stream-read-char ((stream folding-stream))
(with-slots (backend-stream) stream
(let ((character (stream-read-char backend-stream)))
(let ((character (read-char backend-stream)))
(cond
((eq character :eof) :eof)
;; #\return #\newline => #\newline
((char= character #\return)
(when (eql (peek-char nil backend-stream nil) #\newline)
(stream-read-char stream)))
((and (char= character #\newline)
(linear-whitespace-p
(peek-char nil backend-stream nil #\A)))
;; Skip the newline from folding algorithm and go on.
(stream-read-char backend-stream)
(stream-read-char backend-stream))
(read-char backend-stream)
(read-char backend-stream))
(t
character)))))

Expand All @@ -73,16 +70,19 @@

(defmethod stream-write-char ((stream folding-stream) character)
(with-slots (column-octets backend-stream) stream
(let* ((external-format (flex:flexi-stream-external-format backend-stream))
(size (flex:octet-length (string character) :external-format external-format)))
(when (> (+ column-octets size) +content-line-max-length+)
(stream-write-char backend-stream #\newline)
(stream-write-char backend-stream #\space)
(zerof column-octets))
(if (char= character #\newline)
(zerof column-octets)
(incf column-octets size))
(stream-write-char backend-stream character))))
(let* ((encoded (babel:string-to-octets
(if (char= character #\newline)
#.(coerce '(#\return #\linefeed) 'string)
(string character))))
(size (length encoded)))
(when (> (+ column-octets size) +content-line-max-length+)
(stream-write-char stream #\newline)
(stream-write-char stream #\space)
(zerof column-octets))
(if (char= character #\linefeed)
(zerof column-octets)
(incf column-octets size))
(write-sequence encoded backend-stream))))

(defmethod stream-line-column ((stream folding-stream))
(stream-line-column (folding-backend-stream stream)))
Expand Down
2 changes: 1 addition & 1 deletion parameters.lisp
Expand Up @@ -29,7 +29,7 @@
;;; Get the value of the parameter, whose name is given in the string
;;; designator PARAMETER, in the given PARAMETER-LIST.
(defun parameter (parameter parameter-list)
(let ((name (string-upcase (string parameter))))
(let ((name (string parameter)))
(loop for (param value) on parameter-list by #'cddr
when (string= name param) return value)))

Expand Down
21 changes: 10 additions & 11 deletions translate.lisp
Expand Up @@ -43,22 +43,21 @@
(defun register-translation (object icalname entity)
(with-slots (ical>lisp lisp>ical)
(intern-translation-table entity)
(let ((icalname (string-upcase icalname)))
;; Remove old associations for OBJECT and ICALNAME.
(let ((oldlisp (gethash icalname ical>lisp))
(oldical (gethash object lisp>ical)))
(remhash oldlisp lisp>ical)
(remhash oldical ical>lisp))
;; Set new associations
(setf (gethash icalname ical>lisp) object)
(setf (gethash object lisp>ical) icalname)
(values))))
;; Remove old associations for OBJECT and ICALNAME.
(let ((oldlisp (gethash icalname ical>lisp))
(oldical (gethash object lisp>ical)))
(remhash oldlisp lisp>ical)
(remhash oldical ical>lisp))
;; Set new associations
(setf (gethash icalname ical>lisp) object)
(setf (gethash object lisp>ical) icalname)
(values)))

;;; Translate the iCalendar name ICALNAME to a corresponding Lisp
;;; object in the ENTITY namespace.
(defun translate-to-lisp (icalname entity)
(with-slots (ical>lisp) (intern-translation-table entity)
(values (gethash (string-upcase icalname) ical>lisp))))
(values (gethash icalname ical>lisp))))

;;; Translate the Lisp value OBJECT to the corresponding iCalendar
;;; name in the ENTITY namespace.
Expand Down
6 changes: 4 additions & 2 deletions universal-time.lisp
Expand Up @@ -36,15 +36,17 @@

;;; Decode a universal time.
(defmacro decoded-universal-time
((&key second minute hour date month year day zone)
((&key second minute hour date month year day zone (timezone nil timezone-p))
form &body body)
(let ((vars))
(flet ((** (x) (or x (car (push (gensym) vars)))))
`(multiple-value-bind
(,(** second) ,(** minute) ,(** hour)
,(** date) ,(** month) ,(** year)
,(** day) ,(** zone))
(decode-universal-time ,form)
,(if timezone-p
`(decode-universal-time ,form ,timezone)
`(decode-universal-time ,form))
(declare (ignore ,@vars))
,@body))))

Expand Down
43 changes: 29 additions & 14 deletions utils.lisp
Expand Up @@ -318,27 +318,42 @@
;;; it finds a NON-EXPECT character, it signals an error. If an end of
;;; file condition is signaled and EOF-ERROR-P is nil, return nil.
(defun parse (stream char-bag &optional (not-expect "") (eof-error-p t))
(flet (;; Check if CH is a terminal char
(terminal-char-p (ch)
(declare (optimize speed))
(flet ((in (ch char-bag)
;; Check if CH is in CHAR-BAG.
(etypecase char-bag
(character (char= ch char-bag))
(sequence (find ch char-bag :test #'char=))
(function (funcall char-bag ch))))
;; Check if CH is not an expected char
(not-expect-char-p (ch)
(etypecase not-expect
(character (char= ch not-expect))
(sequence (find ch not-expect :test #'char=))
(function (funcall not-expect ch)))))
(sequence (find ch char-bag))
(function (funcall char-bag ch)))))
;; Read characters
(with-output-to-string (out)
(loop for ch = (peek-char nil stream eof-error-p)
until (and (not eof-error-p) (null ch))
until (terminal-char-p ch)
when (not-expect-char-p ch)
until (in ch char-bag)
when (in ch not-expect)
do (error "Character ~w is not expected." ch)
do (write-char (read-char stream) out)))))

;;; An optimized version of `parse' specific for calls where
;;; `char-bag' and `not-expected' are simple strings.
(defun parse-simple (stream char-bag &optional (not-expect "") (eof-error-p t))
(declare (optimize speed)
(simple-string char-bag not-expect)
(boolean eof-error-p))
;; Read characters
(with-output-to-string (out)
(loop for ch = (peek-char nil stream eof-error-p)
until (and (not eof-error-p) (null ch))
until (find (the character ch) char-bag)
when (find (the character ch) not-expect)
do (error "Character ~w is not expected." ch)
do (write-char (read-char stream) out))))

(define-compiler-macro parse (&whole form stream char-bag &optional (not-expect "") (eof-error-p t))
(if (and (stringp char-bag) (stringp not-expect))
`(parse-simple ,stream ,char-bag ,not-expect ,eof-error-p)
form))


;;;; Comparators

Expand All @@ -350,7 +365,6 @@

;;; Like `string=' but it is case-insensitive.
(defun string-ci= (str1 str2)
(declare (string str1 str2))
(and (= (length str1) (length str2))
(every #'char-ci= str1 str2)))

Expand Down Expand Up @@ -394,7 +408,8 @@

;;; Like `parse-integer' but it is not allowed to have a sign (+\-).
(defun parse-unsigned-integer (string &rest keyargs &key (start 0) end &allow-other-keys)
(unless (or (eql start end) (digit-char-p (elt string start)))
(declare (string string))
(unless (or (eql start end) (digit-char-p (char string start)))
(error "~w is not an unsigned integer." string))
(apply #'parse-integer string keyargs))

Expand Down

0 comments on commit 6f56d90

Please sign in to comment.