Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

add deflate-stream and improve inflate-stream

The deflate stream is in interface to the libz compression library
found on most unix systems and available on Windows system.
In this commit we've only tested on Linux

Are there user visible changes in this commit?  yes

Is bootstrapping needed?   no

Are tests included for new features? no, but aserve's tests test this

Tests run:  none

<release-note>
new deflate-stream and improved inflate-stream

The deflate-stream is documented in deflate.txt in this module.

The inflate stream change is that there is a new argument
to make-instance of an inflate-stream: :skip-gzip-header
with description:
Streams to be inflated often begin with a gzip header which
must be skipped over before inflation can begin.  Now you can
pass the argument :skip-gzip-header t to the call to
make-instance of an inflate-stream.  If the stream begins with
a gzip header then that header will be skipped.  If the stream
doesn't begin with a gzip header then this argument has no effect

</release-note>

<documentation>
see deflate.txt for documentation on the deflate-stream
</documentation>

Change-Id: I37fdea747a8e31a414fc7c986a38190de562181a
  • Loading branch information...
commit 08627ae04840a59f44c157289b6df576f9eab1c9 1 parent ebb1c50
John Foderaro authored
Showing with 822 additions and 25 deletions.
  1. +543 −0 deflate.cl
  2. +139 −0 deflate.txt
  3. +140 −25 inflate.cl
View
543 deflate.cl
@@ -0,0 +1,543 @@
+;; stream for doing compression
+;;
+;; code based on zlib.cl from AllegroGraph written by marijnh
+;;
+(defpackage :util.zip
+ (:use :common-lisp :excl)
+ (:export #:deflate-stream
+ #:deflate-target-stream
+ #:deflate-stream-vector
+ #:deflate-stream-vector-combined))
+
+(in-package :util.zip)
+
+
+(provide :deflate)
+
+(eval-when (compile load eval)
+(defconstant *zlib-in-buffer-size* (* 16 1024))
+(defconstant *zlib-out-buffer-size* (* 17 1024))
+)
+
+
+;; constants needed for resourcing for static buffers
+
+(defconstant *deflate-buffer-z-stream* 0)
+(defconstant *deflate-buffer-in* 1)
+(defconstant *deflate-buffer-out* 2)
+
+
+
+
+
+;; deflate-stream
+;;
+;; create via
+;; (make-instance 'deflate-stream :target stream-or-vector
+;; :compression ckind)
+;;
+;; The target argument is required. It says where to compressed
+;; data.
+;; The :compression argument is optional. It can be :gzip or
+;; deflate. If not given :gzip is assumed
+;;
+;; If a stream is given as the :target then the compressed bytes
+;; are written to that stream as they are generated.
+;; You cannot count on the zlib module to generate
+;; compressed bytes immediately. The only time you can be sure
+;; that all the compressed bytes have been send to the stream
+;; is after you close the deflate-stream. After the deflate-stream
+;; is closed, the last bits of compressed data is written to
+;; the target stream and a force-output is done the target
+;; stream. The target stream is NOT closed.
+;;
+;; If the :target value is a simple vector of (unsigned-byte 8) then
+;; the compressed bytes are written to that vector. If that
+;; vector fills up then more vectors are allocated.
+;; After the deflate-stream is closed you can call
+;; deflate-stream-vector to retrieve all off the vectors that contain
+;; the compressed data. You can also call deflate-stream-vector-combined
+;; to create a single vector containing all of the compressed data.
+;;
+;;
+;; examples
+;; (setq str (make-instance 'deflate-stream :target (make-array 1000 :element-type '(unsigned-byte 8))))
+;; (dotimes (i 1000) (write-byte (mod i 30) str))
+;; (close str)
+;; (deflate-stream-vector-combined str)
+;;
+
+(defvar *libz-dll-loaded* nil)
+
+(if* (not *libz-dll-loaded*)
+ then (load (util.string:string+ "libz." sys::*dll-type*) :foreign t)
+ (setq *libz-dll-loaded* t))
+
+
+(ff:def-foreign-type z-stream
+ (:struct (next-in (* :void)) ; next input byte
+ (avail-in :unsigned-int) ; number of bytes available at next-in
+ (total-in :unsigned-long) ; total nb of input bytes read so far
+
+ (next-out (* :void)) ; next output byte should be put there
+ (avail-out :unsigned-int) ; remaining free space at next_out
+ (total-out :unsigned-long) ; total nb of bytes output so far
+
+ (msg (* :char)) ; last error message, NULL if no error
+ (state (* :void)) ; not visible by applications
+
+ (zalloc (* :void)) ; used to allocate the internal state
+ (zfree (* :void)) ; used to free the internal state
+ (opaque (* :void)) ; private data object passed to zalloc and zfree
+
+ (data-type :int) ; best guess about the data type: binary or text
+ (adler :unsigned-long) ; adler32 value of the uncompressed data
+ (reserved :unsigned-long))) ; reserved for future use
+
+
+(ff:def-foreign-type deflate-in-buffer
+ (:struct (buff (:array :unsigned-char #.*zlib-in-buffer-size*))))
+
+(ff:def-foreign-type deflate-out-buffer
+ (:struct (buff (:array :unsigned-char #.*zlib-out-buffer-size*))))
+
+
+
+(defmacro z-stream-slot (name obj)
+ `(ff:fslot-value-typed 'z-stream :c ,obj ',name))
+
+(ff:def-foreign-call (deflate-init-2 "deflateInit2_")
+ ((stream (* z-stream))
+ (level :int)
+ (method :int)
+ (window-bits :int)
+ (mem-level :int)
+ (strategy :int)
+ (version (* :char))
+ (stream-size :int))
+ :strings-convert t
+ :returning :int)
+
+(ff:def-foreign-call (deflate "deflate")
+ ((stream (* z-stream))
+ (flush :int))
+ :returning :int)
+
+(ff:def-foreign-call (deflate-end "deflateEnd")
+ ((stream (* z-stream)))
+ :returning :int)
+
+
+(def-stream-class deflate-stream (single-channel-simple-stream)
+ ((z-state
+ ;; malloc z-state foreign object
+ ;; holding the info zlib needs to use to run
+ :initform 0
+ :accessor z-state)
+
+ ; using existing slots
+ ; from stream
+ ; flags
+ ; output-handle - stream to vector
+ ; external-format
+ ;
+ ; from simple-stream
+ ; buffer malloc,ed, contains user written data
+ ; buffer-ptr next byte to write
+ ; charpos always nil since we don't track
+ ;
+ ;
+
+ ; new slots
+
+ (z-stream
+ ;; holds malloc'ed zlib struct that controls compression
+ :initform 0
+ :accessor zlib-z-stream)
+
+ (in-buffer
+ ;; malloced buffer to which data is copied before compression
+ ;; since the compressor requires a static buffer
+ :accessor zlib-in-buffer)
+
+
+ (z-buffer
+ ;; malloc buffer holding data after compression
+ ;; it's malloced so it stays still
+
+ :initform 0
+ :accessor zlib-z-buffer)
+
+
+ (in-buffer-ptr :initform 0
+ :accessor zlib-in-buffer-ptr)
+
+
+ ; points to the lispstatic-reclaimable resources for
+ ; this stream. Should the stream be dropped and never
+ ; closed this list will be gc'ed and that will the
+ ; allow the static data to be reclaimed.
+ (static-resources :initform nil
+ :accessor zlib-static-resources)
+
+ ; trace usage
+ (in-bytes :initform 0
+ :accessor zlib-in-bytes)
+
+ (out-bytes :initform 0
+ :accessor zlib-out-bytes)
+
+
+ ;; for stream target
+ (target-stream
+ :initform nil
+ :accessor deflate-target-stream)
+
+ ;; for vector target
+ (target-vector
+ :initform nil
+ :accessor zlib-target-vector)
+
+ (target-vector-pos
+ :initform 0
+ :accessor zlib-target-vector-pos)
+
+ (target-vector-old
+ ; list of full previous target vectors
+ :initform nil
+ :accessor zlib-target-vector-old)
+
+ ;; end vector target
+
+
+ )
+ )
+
+(defmethod print-object ((p deflate-stream) s)
+ (print-unreadable-object (p s :identity t :type t)
+ (format s "in ~d / out ~d" (zlib-in-bytes p) (zlib-out-bytes p))))
+
+(defmethod device-open ((p deflate-stream) dummy options)
+ (declare (ignore dummy))
+
+
+ (let ((output-target (getf options :target))
+ (compression (or (getf options :compression)
+ :gzip))
+ (static-resources (get-deflate-buffer-resources)))
+
+ (setf (zlib-static-resources p) static-resources)
+
+ (destructuring-bind (z-stream-vec in-buffer-vec out-buffer-vec)
+ static-resources
+
+
+ (typecase output-target
+ (stream
+ (setf (deflate-target-stream p) output-target))
+ ((simple-array (unsigned-byte 8) (*))
+ (setf (zlib-target-vector p) output-target))
+ (t (error "the value of initarg :target must be a stream or simple (unsigned-byte 8) vector, not ~s" output-target)))
+
+ (if* (not (member compression '(:gzip :deflate)))
+ then (error "compression must be :gzip or :deflate, not ~s"
+ compression))
+
+ (if* (null output-target)
+ then (error ":output-target must be given when creating a deflate-stream"))
+
+ ;; normal these would be written using the with-stream-class
+ ;; macro and sm, but we may want to open source this so best
+ ;; to write it in code that doesn't need a dcl to build
+ (setf
+ (slot-value p 'excl::buffer) (make-array 4096 :element-type '(unsigned-byte 8))
+ (zlib-in-buffer p) (ff:fslot-address-typed
+ 'deflate-in-buffer
+ :foreign-static-gc
+ in-buffer-vec)
+
+
+ (zlib-z-buffer p) (ff:fslot-address-typed
+ 'deflate-out-buffer
+ :foreign-static-gc
+ out-buffer-vec)
+
+ (slot-value p 'excl::buffer-ptr) 0
+
+ (zlib-z-stream p) (make-z-stream (ff:fslot-address-typed
+ 'z-stream
+ :foreign-static-gc
+ z-stream-vec)
+
+ compression)
+
+ (slot-value p 'excl::control-out) excl::*std-control-out-table*
+
+ )
+
+ ; does some kind of initialization I think
+ (setf (stream-external-format p)
+ (stream-external-format p))
+
+ (add-stream-instance-flags p :output :simple)
+
+ t)))
+
+
+
+(defun make-z-stream (z-stream type)
+ (let (
+ ;; windowBits default value is 15, if you add 16 you get bzip header and footer
+ (window-bits (+ 15 (ecase type (:gzip 16) (:deflate 0)))))
+ (setf (z-stream-slot zalloc z-stream) 0
+ (z-stream-slot zfree z-stream) 0
+ (z-stream-slot opaque z-stream) 0)
+ (let ((err (deflate-init-2 z-stream
+ -1 #|default level|#
+ 8 #|Z_DEFLATED|#
+ window-bits
+ 8 #|default level|#
+ 0 #|Z_DEFAULT_STRATEGY|#
+ "1.2.3.4" #|version|#
+ (ff:sizeof-fobject 'z-stream))))
+ (if* (< err 0 #|Z_OK|#)
+ then (error "deflateInit2_ returned ~a" err)))
+ z-stream))
+
+
+
+(defmethod device-write ((p deflate-stream) buffer start end blocking)
+ ;;
+ ;; buffer is an ausb8
+ ;;
+ ;; fill up the internal static buffer
+ ;; do the compressing should the buffer fill up
+ ;;
+ (declare (ignore blocking))
+
+ (let ((in-buffer (zlib-in-buffer p))
+ (in-buffer-ptr (zlib-in-buffer-ptr p))
+ (max *zlib-in-buffer-size*)
+ (buffer (or buffer (slot-value p 'excl::buffer)))
+ )
+
+
+ (do ((i start (1+ i)))
+ ((>= i end))
+
+
+ (setf (sys::memref-int in-buffer in-buffer-ptr 0 :unsigned-byte)
+ (aref buffer i))
+ (incf in-buffer-ptr)
+
+ (if* (>= in-buffer-ptr max)
+ then ; must flush the buffer
+ (setf (zlib-in-buffer-ptr p) in-buffer-ptr)
+ (flush-deflate-stream-input-buffer p)
+ (setq in-buffer-ptr (zlib-in-buffer-ptr p))))
+
+
+ (setf (zlib-in-buffer-ptr p) in-buffer-ptr)
+
+
+ end))
+
+
+(defmethod flush-deflate-stream-input-buffer ((p deflate-stream))
+ ;; compress the contents of the input buffer
+
+ (let ((z-stream (zlib-z-stream p)))
+
+ (setf (z-stream-slot avail-in z-stream) (zlib-in-buffer-ptr p)
+ (z-stream-slot next-in z-stream) (zlib-in-buffer p))
+
+ (incf (zlib-in-bytes p) (zlib-in-buffer-ptr p))
+
+ (setf (zlib-in-buffer-ptr p) 0)
+
+ (loop
+ (if* (zerop (z-stream-slot avail-in z-stream))
+ then ; no more to compress
+ (return))
+
+
+ (setf (z-stream-slot next-out z-stream) (zlib-z-buffer p)
+ (z-stream-slot avail-out z-stream) *zlib-out-buffer-size*)
+
+ (let ((error (deflate z-stream 0 ; Z_NO_FLUSH
+ )))
+
+ (if* (< error 0)
+ then (error "zlib's deflate returned error code ~s" error))
+
+
+ (process-compressed-result p)))))
+
+(defmethod finish-zlib-compression ((p deflate-stream))
+ ;; finish the compression of the contents of the input buffer
+
+
+ (flush-deflate-stream-input-buffer p)
+
+ (let ((z-stream (zlib-z-stream p)))
+
+
+
+ (loop
+ (setf (z-stream-slot next-out z-stream) (zlib-z-buffer p)
+ (z-stream-slot avail-out z-stream) *zlib-out-buffer-size*)
+
+ (let ((error (deflate z-stream 4 ; Z_FINISH
+ )))
+
+ (process-compressed-result p)
+ (if* (eq error 1) ; Z_STREAM_END
+ then (return))
+ ))))
+
+
+(defmethod process-compressed-result ((p deflate-stream))
+ ;; take the resulant compressed bytes and put
+ ;; them somewhere
+
+ (let ((static-vec (zlib-z-buffer p))
+ (bytes (- *zlib-out-buffer-size*
+ (z-stream-slot avail-out (zlib-z-stream p)))))
+
+
+ (incf (zlib-out-bytes p) bytes)
+ ; we'll just write byte all the values
+
+ (let ((target-stream (deflate-target-stream p)))
+ (if* target-stream
+ then
+ (dotimes (i bytes)
+ (write-byte (sys:memref-int static-vec i 0 :unsigned-byte)
+ target-stream))
+ else (let* ((vec (zlib-target-vector p))
+ (pos (zlib-target-vector-pos p))
+ (max (length vec))
+ (static-base 0))
+
+ (loop
+ (let ((docopy (min bytes (- max pos))))
+
+ (dotimes (i docopy)
+ (setf (aref vec (+ pos i))
+ (sys:memref-int static-vec i static-base :unsigned-byte)))
+ (if* (> bytes docopy)
+ then ; we overflowed, more to do
+ (push vec (zlib-target-vector-old p))
+ (setq vec (make-array (length vec)
+ :element-type
+ '(unsigned-byte 8)))
+ (setf (zlib-target-vector p) vec)
+
+ (setq pos 0)
+ (incf static-base docopy)
+ (decf bytes docopy)
+
+ else ; finished
+ (setf (zlib-target-vector-pos p) (+ pos docopy))
+ (return)))))))))
+
+
+
+
+
+(defmethod device-close ((p deflate-stream) abort)
+
+ (if* (not abort)
+ then ; flush all current data
+ (finish-zlib-compression p))
+
+ (free-deflate-buffer-resource (zlib-static-resources p))
+
+ (if* (deflate-target-stream p)
+ then (force-output (deflate-target-stream p)))
+ p
+ )
+
+(without-package-locks
+(defmethod excl::inner-stream ((p deflate-stream))
+ (deflate-target-stream p)))
+
+
+(defmethod deflate-stream-vector ((p deflate-stream))
+ (let ((vec (zlib-target-vector p)))
+ (if* vec
+ then (values vec
+ (zlib-target-vector-pos p)
+ (zlib-target-vector-old p))
+ else (error "deflate-stream ~s was not created with a vector target" p))))
+
+(defmethod deflate-stream-vector-combined ((p deflate-stream))
+ (multiple-value-bind (last pos old) (deflate-stream-vector p)
+ (if* old
+ then ; must combine
+ (let ((size pos))
+ (dolist (v old) (incf size (length v)))
+ (let ((ans (make-array size :element-type '(unsigned-byte 8)))
+ (start 0))
+ (dolist (v (reverse old))
+ (replace ans v :start1 start)
+ (incf start (length v)))
+ (replace ans last :start1 start :end2 pos)
+ (values ans size)))
+ else (values last pos))))
+
+
+
+
+;;;;;;;;; test code
+
+(defun deflate-file (input-filename output-filename)
+ (with-open-file (in input-filename :direction :input)
+ (with-open-file (out output-filename
+ :direction :output
+ :if-exists :supersede)
+ (let ((buffer (make-array 4096 :element-type '(unsigned-byte 8)))
+ (deflate (make-instance 'deflate-stream
+ :target out
+ :compression :gzip)))
+ (loop
+ (let ((bytes (read-vector buffer in)))
+
+ (if* (zerop bytes) then (return))
+
+ (let ((pos 0))
+
+ (loop
+ (setq pos (write-vector buffer deflate :start pos :end bytes))
+ (if* (>= pos bytes) then (return)))
+ )))
+
+ ; finish compression
+ (close deflate)))))
+
+
+
+;; we'll resource the buffers we need to speed up allocation
+
+(defvar *deflate-resource-lock* (mp:make-process-lock))
+
+(defvar *deflate-malloc-resources* nil)
+
+
+
+(defun get-deflate-buffer-resources ()
+ (mp:with-process-lock (*deflate-resource-lock*)
+ (let ((buffers (pop *deflate-malloc-resources*)))
+ (if* buffers
+ thenret
+ else (list (ff:allocate-fobject 'z-stream :foreign-static-gc)
+ (ff:allocate-fobject 'deflate-in-buffer :foreign-static-gc)
+ (ff:allocate-fobject 'deflate-out-buffer :foreign-static-gc))))))
+
+
+(defun free-deflate-buffer-resource (buffers)
+ (mp:with-process-lock (*deflate-resource-lock*)
+ (push buffers *deflate-malloc-resources*)))
+
+
+
View
139 deflate.txt
@@ -0,0 +1,139 @@
+
+deflate module documentation
+
+[this is temporary until the documentation is property formatted
+and merged with other documentation]
+
+
+The deflate module provides a stream interface to the widely
+available libz compression library. libz is not included and is
+assumee to be on your machine.
+
+
+
+
+
+Functions are in the util.zip package.
+The module exports
+
+class:
+ deflate-stream
+
+functions
+ deflate-target-stream
+ deflate-stream-vector
+ deflate-stream-vector-combined
+
+
+
+
+
+
+This module implements the deflate-stream. A deflate-stream
+accepts characters and byte and causes them to be compressed
+and sent to a target.
+
+The target can either another stream, or it can be a vector.
+In the case of a vector the deflate-stream will collect the
+complete deflation of what is written to it in a sequence of
+vectors, initially using the vector passed in as the target and
+then allocating new vectors as necessary.
+
+Usage:
+(make-instance 'deflate-stream :target target-spec :compression compress-spec)
+
+The target-spec is a stream or a simple vector element type (unsigned-byte 8).
+
+The compress-spec is either :gzip or :deflate (where :gzip is the default).
+
+:gzip is the preferred format as the result can be uncompressed with
+the inflate-stream (be sure to specify :skip-gzip-header t to the
+make-instance of inflate-stream). The :gzip format output can
+also be uncompressed with the gunzip program fount on Unix.
+
+
+
+
+
+Stream as a target-spec
+
+If you pass a stream as the target-spec then as you write characters
+and bytes to the deflate-stream, the bytes resulting from deflation
+will be written to the given stream. There is a lot of buffering
+going on in this stream and the compression library. Therefore you
+may not see the results in your target-spec stream immeidately.
+
+When you close the deflate-stream the last bytes in all the buffers
+will be sent through deflation and the end of deflation record will
+be written to the target-spec stream.
+
+The target-spec stream will NOT be closed. It is the callers responsibility
+to close the stream passed in as the target-spec
+
+
+The function
+(deflate-target-stream deflate-stream)
+
+will return that target-spec stream used by the deflate-stream
+
+
+
+;
+
+
+Octet vector as a target-spec
+
+Passing a simple vector of type (unsigned-byte 8) as the target-spec
+is telling the deflate-stream that you wish to collect the deflation
+result in vectors in the lisp heap.
+
+After you close the deflate-stream you can retrieve the result of
+deflation in one of two ways:
+
+
+
+(deflate-stream-vector-combined deflate-stream)
+
+returns two values
+ 1. octet vector
+ 2. number of bytes of actual data
+
+this says that the result of deflation is found in the first N bytes
+of the vector returned by the first value. The second value returned is N.
+
+
+
+
+
+(deflate-stream-vector deflate-stream)
+
+returns three values
+ 1. the newest vector
+ 2. the number of byte of actual data in the newest vector
+ 3. a list of previous vectors holding data in reverse order
+
+for example if the three returned values were
+
+ v
+ 100
+ (c b a)
+
+then the deflated result is found by combining in this order:
+ all of a
+ all of b
+ all of c
+ the first 100 bytes of v
+
+The deflate-stream-vector-combined function does the combination
+described above to produce its results. This results in
+a new vector allocation and then copying that wouldn't be necessary
+if you're prepared to work with the raw results from deflate-stream-vector
+
+
+
+
+
+
+
+
+
View
165 inflate.cl
@@ -180,51 +180,64 @@ that describe the custome huffman tree are themselves huffman coded.
;; see check_header in gzio.c in rpm zlib-1.1.3 (or variant)
;; for details on what's in the header.
- (let (method flags)
+ (let (method flags (bytes-read 0))
; look for magic number
(if* (not (eql #x1f (read-byte p)))
then ; not a gzip header, may be a deflate block
(unread-char (code-char #x1f) p)
(return-from skip-gzip-header nil))
-
+ (incf bytes-read)
; now check the second magic number
(if* (not (eql #x8b (read-byte p)))
then (error "non gzip magic number"))
-
+
+ (incf bytes-read)
+
(setq method (read-byte p)
flags (read-byte p))
+ (incf bytes-read 2)
+
(if* (or (not (eql method z_deflated))
(not (zerop (logand flags gz_reserved))))
then (error "bad method/flags in header"))
; discard time, xflags and os code */
(dotimes (i 6) (read-byte p))
-
+
+ (incf bytes-read 6)
+
; discard extra field if present
(if* (logtest flags gz_extra_field)
then (let ((length (+ (read-byte p)
(ash (read-byte p) 8))))
- (dotimes (i length) (read-byte p))))
+ (dotimes (i length) (read-byte p))
+ (incf bytes-read (+ length 2))))
(if* (logtest flags gz_orig_name)
then ; discard name of file, null terminated
(do ((val (read-byte p) (read-byte p)))
- ((zerop val))))
+ ((zerop val)
+ (incf bytes-read))
+ (incf bytes-read)))
(if* (logtest flags gz_comment)
then ; discard comment, null terminated
(do ((val (read-byte p) (read-byte p)))
- ((zerop val))))
+ ((zerop val)
+ (incf bytes-read))
+ (incf bytes-read)))
(if* (logtest flags gz_head_crc)
then ; discard header crc
- (dotimes (i 2) (read-byte p)))
+ (dotimes (i 2) (read-byte p))
+ (incf bytes-read 2))
+
; success!
- t
+ bytes-read
))
;;;----------- end gzip support
@@ -238,6 +251,8 @@ that describe the custome huffman tree are themselves huffman coded.
stream
last-byte ; last byte read, possibly two combined bytes too
bits ; bits left of last byte to use
+ (bytes-read 0)
+ bytes-to-read ; number of bytes to read before eof (nil if no limit)
)
(defparameter *maskarray*
@@ -251,9 +266,11 @@ that describe the custome huffman tree are themselves huffman coded.
#x1fff #x3fff #x7fff #xffff)))
;; bit reader
-(defun new-bit-reader (stream)
+(defun new-bit-reader (stream &key bytes-to-read (bytes-read 0))
; create and initialize bit reader
- (make-bit-reader :stream stream :last-byte 0 :bits 0))
+ (make-bit-reader :stream stream :last-byte 0 :bits 0
+ :bytes-to-read bytes-to-read
+ :bytes-read bytes-read))
(defun reset-bit-reader (br)
; clear out unused bit of the current byte
@@ -285,10 +302,20 @@ that describe the custome huffman tree are themselves huffman coded.
(return last-byte)
)
else ; need a new byte
- (let ((new-byte (read-byte (bit-reader-stream br))))
- (setq last-byte (+ last-byte
- (ash new-byte bits)))
- (incf bits 8))))))
+ (let ((bytes-left (bit-reader-bytes-to-read br)))
+
+ (if* (eq 0 bytes-left)
+ then (error "end of file on bit reader"))
+
+ (let ((new-byte (read-byte (bit-reader-stream br))))
+
+ (incf (bit-reader-bytes-read br))
+ (if* bytes-left
+ then (setf (bit-reader-bytes-to-read br) (1- bytes-left)))
+
+ (setq last-byte (+ last-byte
+ (ash new-byte bits)))
+ (incf bits 8)))))))
@@ -786,10 +813,15 @@ that describe the custome huffman tree are themselves huffman coded.
;; external interface
;; open a atream p to a file containing the compressed data.
+;;
;; If this file may have a gzip header on it call (skip-gzip-header p)
;; then (make-instance 'inflate-stream :input-handle p)
;; will return a stream which can be read to recover the uncompressed data
;;
+;; You can also just just call
+;; (make-instance 'inflate-stream :input-handle p :skip-gzip-header t)
+;; and the header will be skipped (if present)
+;;
;; closing the inflate-stream will not close stream p. that must
;; be done separately.
;;
@@ -851,11 +883,22 @@ into the inflate buffer.
(inflate-buffer-end
:initform 0
:accessor inflate-buffer-end)
-
+
(cached-buffs
:initform nil
:accessor cached-buffs)
+ ; counters
+ (inflated-bytes
+ ;; bytes returned by inflation code
+ :initform 0
+ :accessor inflate-inflated-bytes)
+
+ (passed-to-user
+ ;; bytes passed to owner of this inflate stream
+ :initform 0
+ :accessor inflate-passed-to-user)
+
(at-eof :initform nil :accessor inflate-stream-eof) ; true when read no more
))
@@ -874,6 +917,8 @@ into the inflate buffer.
(install-single-channel-character-strategy
p (stream-external-format input-handle) nil)
+ (setf (stream-external-format p) (stream-external-format input-handle))
+
(add-stream-instance-flags p :input :simple)
; empty 32k buffer:
@@ -890,7 +935,16 @@ into the inflate buffer.
(getf (slot-value input-handle 'excl::plist) 'excl::filename))
;; specific to the inflate stream
- (setf (inflate-stream-br p) (new-bit-reader input-handle))
+ (let ((initial-count 0))
+ (if* (getf options :skip-gzip-header)
+ then (setq initial-count
+ (skip-gzip-header input-handle))
+ )
+
+ (setf (inflate-stream-br p) (new-bit-reader input-handle
+ :bytes-to-read
+ (getf options :content-length)
+ :bytes-read initial-count)))
(setf (inflate-stream-buffer p)
(make-array (* 32 1024) :element-type '(unsigned-byte 8)))
@@ -900,7 +954,18 @@ into the inflate buffer.
;; [bug17925]: add print-object method
(defmethod print-object ((stream inflate-stream) s)
(print-unreadable-object (stream s :identity *print-escape* :type t)
- (format s "inflating ~s" (excl::stream-input-handle stream))))
+ (format s "inflating ~s" (excl::stream-input-handle stream))
+ (format s "ef ~s, in: ~s, inflated ~d, used: ~d of "
+ (excl::ef-name (stream-external-format stream))
+ (let ((br (inflate-stream-br stream)))
+ (if* br
+ then (bit-reader-bytes-read br)))
+ (inflate-inflated-bytes stream)
+ (inflate-passed-to-user stream)
+ (slot-value stream 'excl::input-handle)
+ )
+
+ ))
(defmethod device-read ((p inflate-stream) buffer start end blocking)
@@ -909,6 +974,9 @@ into the inflate buffer.
(if* (null buffer) then (setq buffer (slot-value p 'excl::buffer)))
(if* (null end) then (setq end (length buffer)))
+
+ ; perhaps wishful thinking, as we haven't copied them yet
+
(loop
; first grab from the cached buffers
@@ -927,6 +995,7 @@ into the inflate buffer.
:end2 fromend)
(let ((copied (min (- end start) (- fromend fromstart))))
(incf fromstart copied)
+ (incf (inflate-passed-to-user p) copied)
(if* (>= fromstart fromend)
then ; the buffer's all used up
(setf (cached-buffs p) (cdr cbs))
@@ -949,6 +1018,7 @@ into the inflate buffer.
:start2 i-start
:end2 i-end)
(let ((copied (min (- end start) (- i-end i-start))))
+ (incf (inflate-passed-to-user p) copied)
(setf (inflate-buffer-start p) (+ i-start copied))
(return-from device-read copied)))
@@ -957,14 +1027,18 @@ into the inflate buffer.
then ; nothing more to read
(return-from device-read -1))
- (let ((end (process-deflate-block
- (inflate-stream-br p)
- #'(lambda (buffer end)
- (append-cache-buffer p buffer end))
- inflate-buffer
- i-end)))
+ (let* ((np p) ; close over this version
+ (end (process-deflate-block
+ (inflate-stream-br p)
+ #'(lambda (buffer end)
+ (incf (inflate-inflated-bytes np) end)
+ (append-cache-buffer np buffer end))
+ inflate-buffer
+ i-end)))
(if* (null end)
then ; no more data
+ (excl::record-stream-advance-to-eof
+ (slot-value p 'excl::input-handle))
(setf (inflate-stream-eof p) t)
(setf (inflate-buffer-end p) 0)
else (setf (inflate-buffer-end p) end))
@@ -973,12 +1047,16 @@ into the inflate buffer.
))))
+(without-package-locks
+ (defmethod excl::inner-stream ((p inflate-stream))
+ (slot-value p 'excl::input-handle)))
(defun append-cache-buffer (p buffer end)
;; add data from this buffer to the saved buffer list
;; and set the start back to 0 since this is only called
;; at the end of the buffer or when we're writing out the last block
(let ((size (- end (inflate-buffer-start p))))
+ (incf (inflate-inflated-bytes p)) ; record that we've captured these
(if* (> size 0)
then (let ((newbuf (make-array size :element-type '(unsigned-byte 8))))
(replace newbuf buffer
@@ -993,7 +1071,12 @@ into the inflate buffer.
(setf (inflate-buffer-start p) 0)))
-
+(without-package-locks
+ (defmethod excl::record-stream-advance-to-eof ((any t))
+ ;; if the stream is composed of records ending in a pseudo eof
+ ;; then read up to an eof
+ ;; (e.g. an unchunking stream is such a stream)
+ any))
#+ignore
(defun teststr ()
@@ -1033,3 +1116,35 @@ into the inflate buffer.
(if* (or (null compbyte) (null normbyte))
then (return))))
(format t "~d bytes processed~%" count))))))
+
+#+ignore
+(defun test-str-file-2 (filename)
+ ;; this compresses the contents of the given file into
+ ;; a temporary filename and then compares the result
+ ;; of decoding that compressed file using our inflate stream
+ ;; against the actual contents of the file.
+ ;;
+ ;; this differs from test-str-file in that we do the gzip
+ ;; header skipping in the call to make-instance
+ ;;
+ (let ((count 0))
+ (run-shell-command
+ (format nil "gzip -c ~a > foo.n.gz" filename))
+ (with-open-file (p "foo.n.gz")
+ (let ((comp (make-instance 'inflate-stream :input-handle p
+ :skip-gzip-header t
+ )))
+ (with-open-file (of filename)
+ (loop
+ (let ((compbyte (read-byte comp nil nil))
+ (normbyte (read-byte of nil nil)))
+ (if* compbyte
+ then (incf count))
+
+ (if* (not (eq compbyte normbyte))
+ then (format t "byte: ~d: comp: ~s, norm: ~s~%"
+ count
+ compbyte normbyte))
+ (if* (or (null compbyte) (null normbyte))
+ then (return))))
+ (format t "~d bytes processed~%" count))))))
Please sign in to comment.
Something went wrong with that request. Please try again.