Permalink
Browse files

bug20472. Improve mechanism for skipping headers/trailers

deflate-streams

  change definition of _compress-spec_ as follows:

    :gzip    - create a deflate stream w/ gzip headers (same)
    :zlib    - create a deflate stream w/ zlib headers (was :deflate)
    :deflate - create a deflate stream w/ NO headers.

  the meaning of :deflate as currently documented is confusing since
  to zlib users, it suggests no headers will be added but in truth
  a compressed stream with zlib headers is what you get.

inflate-streams

  add a :compression keyword argument that mirrors deflate-streams,
  with a couple of extra allowed values

  :gzip - (default). Automatically read past gzip header and trailer
  :zlib - read past zlib header/trailer
  :deflate - no headers in this stream.
  (header-fn trailer-fn) -
    Instead of one of the above methods, users may specify their own
    functions for skipping past headers and trailers wrapping the
    DEFLATE stream. This can be used to support less common encoding
    methods, such as zip or pkzip, etc. The inflate module does not
    capture any information from header and trailers, so the custom
    functions can also be used to capture this information if needed.
    The function accepts a single argument, which is the input-handle
    of the inflate-stream. It should return nil or the number of bytes
    read.
  nil - equivalent to the :deflate option. No callbacks are made.

(*) gzip is the default compression method to maintain compatibility with
    aserve compression.

The :skip-gzip-header keyword is deprecated, but no warning is issued
if used. This is to maintain compatibility with AllegroServe. If a
compression method is specified, the header and trailer are always
automatically skipped.

skip-gzip-trailer, skip-zlib-header, and skip-zlib-trailer are exported
from the :util.zip package.

Internally, a number of test routines have also been updated to
remove calls to skip-gzip-header that are no longer necessary.

Are there user visible changes in this commit?  yes

Are tests included for new features?  yes

Tests run:  make all

<release-notes>
bug20472. Improve way headers and trailers around deflate streams are handled.

See description above. It's the same.
</release-notes>

<documentation>
bug20472. (or from text above)
</documentation>

Change-Id: I0e90e1f2850b2a4da64e3d9cbf2cdac08847e956
  • Loading branch information...
1 parent e4d29e1 commit c43b5b2805bd3fe3010813e3354865e11187262b Mikel Bancroft committed Sep 19, 2011
Showing with 170 additions and 34 deletions.
  1. +8 −6 deflate.cl
  2. +117 −22 inflate.cl
  3. +45 −6 t-gzip.cl
View
@@ -37,8 +37,8 @@ v2: fix memory leak."
;;
;; The target argument is required. It says where to compressed
;; data.
-;; The :compression argument is optional. It can be :gzip or
-;; deflate. If not given :gzip is assumed
+;; The :compression argument is optional. It can be :gzip,
+;; :zlib, or :deflate. If not given :gzip is assumed
;;
;; If a stream is given as the :target then the compressed bytes
;; are written to that stream as they are generated.
@@ -265,8 +265,8 @@ actual error:~% ~a" c)))
(setf (zlib-target-vector p) output-target))
(t (error "the value of initarg :target must be a stream or simple (unsigned-byte 8) vector, not ~s" output-target)))
- (if* (not (member compression '(:gzip :deflate)))
- then (error "compression must be :gzip or :deflate, not ~s"
+ (if* (not (member compression '(:gzip :zlib :deflate)))
+ then (error "compression must be :gzip, :zlib, or :deflate, not ~s"
compression))
(if* (null output-target)
@@ -313,8 +313,10 @@ actual error:~% ~a" c)))
(defun make-z-stream (z-stream type)
(let (
- ;; windowBits default value is 15, if you add 16 you get bzip header and footer
- (window-bits (+ 15 (ecase type (:gzip 16) (:deflate 0)))))
+ ;; windowBits default value is 15 for zlib header and trailer
+ ;; if you add 16 you get gzip header and trailer
+ ;; if windowBits is -15, then you get a raw deflate stream.
+ (window-bits (+ 15 (ecase type (:gzip 16) (:zlib 0) (:deflate -30)))))
(setf (z-stream-slot zalloc z-stream) 0
(z-stream-slot zfree z-stream) 0
(z-stream-slot opaque z-stream) 0)
View
@@ -1,7 +1,8 @@
#+(version= 8 2)
-(sys:defpatch "inflate" 2
+(sys:defpatch "inflate" 3
"v1: improved inflate-stream;
-v2: performance improvements."
+v2: performance improvements.
+v3: Fix bug in v2 patch that always expected a gzip trailer."
:type :system
:post-loadable t)
@@ -124,7 +125,10 @@ that describe the custome huffman tree are themselves huffman coded.
(:use :common-lisp :excl)
(:export #:inflate
#:inflate-stream
- #:skip-gzip-header))
+ #:skip-gzip-header
+ #:skip-gzip-trailer
+ #:skip-zlib-header
+ #:skip-zlib-trailer))
(in-package :util.zip)
@@ -133,6 +137,9 @@ that describe the custome huffman tree are themselves huffman coded.
(require :iodefs))
(provide :inflate)
+;; used by aserve to create correct inflate-stream based
+;; on inflate patch installed when it was built.
+(pushnew :inflate-bug20472 *features*)
(defun inflate (p op)
;; user callable
@@ -243,6 +250,49 @@ that describe the custome huffman tree are themselves huffman coded.
; success!
bytes-read
))
+
+(defun skip-gzip-trailer (p)
+ ;; There is no identifier for the gzip trailer, so
+ ;; this function should only be called immediately after
+ ;; the final block is read in the DEFLATE data-stream
+ ;; of an inflate-stream with :compression :gzip
+ (dotimes (i 8) (read-byte p))
+ 8)
+
+(defun skip-zlib-header (p)
+ ;; typically a 2-byte header, unless an FDICT is present.
+ ;; first nibble should always be 8.
+ ;; second nibble should always be <= 7.
+ (let ((bytes-read 0))
+ (let* ((cmf (read-byte p))
+ (cm (logand cmf #xF))
+ (cinfo (ash cmf -4)))
+ (unless (and (= cm 8) (<= cinfo 7))
+ ;; not a zlib header
+ (unread-char (code-char cmf) p)
+ (return-from skip-zlib-header nil))
+ (incf bytes-read)
+
+ (let* ((flag (read-byte p))
+ (fdict (logand flag #x20)))
+ (unless (= (mod (+ (ash cmf 8) flag) 31) 0)
+ ;; not a zlib header
+ (error "non zlib header detected."))
+ (incf bytes-read)
+
+ ;; check for fdist
+ (if* (> fdict 0)
+ then ;; shouldn't occur, but just in case, skip DICTID
+ (dotimes (i 4) (read-byte p))
+ (incf bytes-read 4))))
+
+ ;; success!
+ bytes-read))
+
+(defun skip-zlib-trailer (p)
+ ;; 4-byte adler32 value.
+ (dotimes (i 4) (read-byte p))
+ 4)
;;;----------- end gzip support
@@ -845,13 +895,30 @@ that describe the custome huffman tree are themselves huffman coded.
;; external interface
;; open a atream p to a file containing the compressed data.
;;
-;; If this file may have a gzip header on it call (skip-gzip-header p)
-;; then (make-instance 'inflate-stream :input-handle p)
+;; The :compression argument is optional. It can be :gzip,
+;; :zlib, :deflate, or a list containing two function-specs.
+;; If not given :gzip is assumed. If nil is specified, it is
+;; equivalent to the :deflate method.
+;;
+;; if the :compression argument is one of the allowed symbols,
+;; the inflate-stream will automatically attempt to skip over
+;; the header and trailer associated with that type.
+;;
+;; if the :compression argument is a list, then the first function
+;; will be called with the inflate-streams input-handle, in order
+;; to read past the header. When the final deflate block was been read
+;; from the stream, it will then call the second function to read
+;; past the trailer.
+;;
+;; If this file may have a gzip header on it, then
+;; (make-instance 'inflate-stream :compression :gzip)
;; will return a stream which can be read to recover the uncompressed data
;;
-;; You can also just just call
-;; (make-instance 'inflate-stream :input-handle p :skip-gzip-header t)
-;; and the header will be skipped (if present)
+;; If you wanted to actually see the contents of the header and trailer
+;; you could install your own custom readers by
+;; (make-instance 'inflate-stream :input-handle p
+;; :compression '(my-header-reader my-trailer-reader))
+;; and these routines will be called with 'p' at the appropriate times.
;;
;; closing the inflate-stream will close stream p.
;;
@@ -930,14 +997,17 @@ into the inflate buffer.
:accessor inflate-passed-to-user)
(at-eof :initform nil :accessor inflate-stream-eof) ; true when read no more
+
+ (compression :initarg :compression :initform :gzip :accessor inflate-compression-type)
))
(defmethod device-open ((p inflate-stream) dummy options)
(declare (ignore dummy))
- (let ((input-handle (getf options :input-handle)))
+ (let ((input-handle (getf options :input-handle))
+ (compression (or (getf options :compression) :gzip)))
(if* (null input-handle)
then (error ":input-handle value must be specified on stream creation"))
@@ -946,6 +1016,18 @@ into the inflate buffer.
(install-single-channel-character-strategy
p (stream-external-format input-handle) nil)
+
+ ;; remain silent on the deprecation notice to maintain backward
+ ;; compatibility w/ older versions of the patch in how they
+ ;; interact with aserve compression.
+ #+ignore
+ (if* (not (eq (getf options :skip-gzip-header :absent) :absent))
+ then (warn ":skip-gzip-header has been deprecated. Use :compression to effect handling of headers and trailers."))
+
+ (unless (or (member compression '(:gzip :zlib :deflate))
+ (and (consp compression) (= (length compression) 2))
+ (null compression))
+ (error "compression must be :gzip, :zlib, :deflate, nil, or a list containing two function-specs, not ~s" compression))
(setf (stream-external-format p) (stream-external-format input-handle))
@@ -966,11 +1048,19 @@ into the inflate buffer.
;; specific to the inflate stream
(let ((initial-count 0))
- (if* (getf options :skip-gzip-header)
- then (setq initial-count
- (skip-gzip-header input-handle))
- )
-
+ (setq initial-count
+ (case compression
+ (:gzip (skip-gzip-header input-handle))
+ (:zlib (skip-zlib-header input-handle))
+ (:deflate 0)
+ (t (and (car compression) (funcall (car compression) input-handle)))))
+
+ (if* (null initial-count)
+ then ;; problem reading header. set compression mode to :deflate
+ ;; so we don't try to read a trailer, either.
+ (setq initial-count 0)
+ (setf (inflate-compression-type p) :deflate))
+
(setf (inflate-stream-br p) (new-bit-reader input-handle
:bytes-to-read
(getf options :content-length)
@@ -1104,16 +1194,24 @@ into the inflate buffer.
(defmethod excl::record-stream-advance-to-eof ((any t))
any)
(defmethod excl::record-stream-advance-to-eof ((p inflate-stream))
- (let ((inner-handle (slot-value p 'excl::input-handle)))
- ;; skip the gzip trailer
- (loop repeat 8 do (read-byte inner-handle))
+ (let ((inner-handle (slot-value p 'excl::input-handle))
+ (compression (inflate-compression-type p))
+ trailer-bytes)
+ ;; skip the trailer, if there is one
+ (setq trailer-bytes
+ (case compression
+ (:gzip (skip-gzip-trailer inner-handle))
+ (:zlib (skip-zlib-trailer inner-handle))
+ (:deflate 0)
+ (t (and (second compression) (funcall (second compression) inner-handle)))))
+ (when trailer-bytes
+ (incf (bit-reader-bytes-read (inflate-stream-br p)) trailer-bytes))
(excl::record-stream-advance-to-eof inner-handle))))
#+ignore
(defun teststr ()
;; setup test stream on a file
(with-open-file (p "foo.n.gz")
- (skip-gzip-header p)
(let ((*dec* (make-instance 'inflate-stream :input-handle p)))
(declare (special *dec*))
(break "foo"))))
@@ -1131,7 +1229,6 @@ into the inflate buffer.
(run-shell-command
(format nil "gzip -c ~a > foo.n.gz" filename))
(with-open-file (p "foo.n.gz")
- (skip-gzip-header p)
(let ((comp (make-instance 'inflate-stream :input-handle p)))
(with-open-file (of filename)
(loop
@@ -1162,9 +1259,7 @@ into the inflate buffer.
(run-shell-command
(format nil "gzip -c ~a > foo.n.gz" filename))
(with-open-file (p "foo.n.gz")
- (let ((comp (make-instance 'inflate-stream :input-handle p
- :skip-gzip-header t
- )))
+ (let ((comp (make-instance 'inflate-stream :input-handle p)))
(with-open-file (of filename)
(loop
(let ((compbyte (read-byte comp nil nil))
View
@@ -16,19 +16,20 @@
(eval-when (compile load eval)
(require :deflate)
+ (require :inflate)
(require :test))
(in-package :test)
-(defun deflate-file (input-filename output-filename)
+(defun deflate-file (input-filename output-filename &optional (type :gzip))
(with-open-file (in input-filename :direction :input)
(with-open-file (out output-filename
:direction :output
:if-exists :supersede)
(let ((buffer (make-array 4096 :element-type '(unsigned-byte 8)))
(deflate (make-instance 'util.zip::deflate-stream
:target out
- :compression :gzip)))
+ :compression type)))
(loop
(let ((bytes (read-vector buffer in)))
@@ -44,14 +45,15 @@
; finish compression
(close deflate)))))
-(defun inflate-file (input-filename output-filename)
+(defun inflate-file (input-filename output-filename &optional (type :gzip))
(with-open-file (in input-filename :direction :input)
(with-open-file (out output-filename
:direction :output
:if-exists :supersede)
+ (format t ";; Inside inflate-file~%")
(let ((inflate (make-instance 'util.zip::inflate-stream
- :input-handle in
- :skip-gzip-header t))
+ :compression type
+ :input-handle in))
byte)
(while (setq byte (read-byte inflate nil nil))
(write-byte byte out))))))
@@ -108,6 +110,38 @@
(when temp-file1 (ignore-errors (delete-file temp-file1)))
(when temp-file2 (ignore-errors (delete-file temp-file2)))))))
+
+(defun full-test (input-filename type &optional inflate-type)
+ ;; compress input-file to temp-file1, uncompress it back to temp-file2
+ ;; and compare temp-file2 to input-filename, error if not same.
+ (unless inflate-type
+ (setq inflate-type type))
+ (let (temp-file1 temp-file2)
+ (unwind-protect
+ (progn
+ (setq temp-file1 (sys:make-temp-file-name "full1"))
+ (setq temp-file2 (sys:make-temp-file-name "full2"))
+ (format t "; full test on ~a type ~s ~s~%"
+ (enough-namestring input-filename) type inflate-type)
+ (format t " ; deflate ~a to ~a~%" temp-file1 temp-file2)
+ (deflate-file input-filename temp-file1 type)
+ (format t " ; inflate ~a to ~a~%" temp-file1 temp-file2)
+ (inflate-file temp-file1 temp-file2 inflate-type)
+ ;;(format t "; compare ~a to ~a~%" input-filename temp-file2)
+ (test-t (excl::compare-files input-filename temp-file2)))
+ (when temp-file1 (ignore-errors (delete-file temp-file1)))
+ (when temp-file2 (ignore-errors (delete-file temp-file2))))))
+
+;; skip the 2-byte zlib header
+(defun custom-zlib-head (p)
+ (read-byte p) (read-byte p)
+ 2)
+
+;; skip the 4-byte zlib trailer
+(defun custom-zlib-tail (p)
+ (dotimes (i 4) (read-byte p))
+ 4)
+
(defun test-gzip ()
(map-over-directory
(lambda (p)
@@ -116,7 +150,12 @@
;; and the tests will fail.
(when (not (string-equal "out" (pathname-type p)))
(deflate-test p)
- (inflate-test p)))
+ (inflate-test p)
+ (dolist (type '(:gzip :zlib :deflate nil))
+ (full-test p type))
+ ;; test custom compression type.
+ (full-test p :zlib '(custom-zlib-head custom-zlib-tail))
+ ))
"./"
:recurse nil))

0 comments on commit c43b5b2

Please sign in to comment.