-
Notifications
You must be signed in to change notification settings - Fork 11
/
Lzma.hs
403 lines (334 loc) · 15.9 KB
/
Lzma.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
{-# LANGUAGE BangPatterns #-}
-- |
-- Module : Codec.Compression.Lzma
-- Copyright : © 2015 Herbert Valerio Riedel
-- License : BSD3
--
-- Maintainer : hvr@gnu.org
-- Stability : experimental
--
-- Compression and decompression of data streams in the lzma/xz format
--
-- See also the XZ Utils home page: <http://tukaani.org/xz/>
module Codec.Compression.Lzma
( -- * Simple (de)compression
compress
, decompress
-- * Extended API with control over parameters
, compressWith
, decompressWith
-- * Monadic incremental (de)compression API
--
-- | See <http://hackage.haskell.org/package/zlib-0.6.1.1/docs/Codec-Compression-Zlib-Internal.html#g:2 zlib's incremental API documentation> for more information.
-- ** Compression
, CompressStream(..)
, compressIO
, compressST
-- ** Decompression
, DecompressStream(..)
, decompressIO
, decompressST
, LzmaRet(..)
-- * Parameters
-- ** Compression parameters
, defaultCompressParams
, CompressParams
, compressIntegrityCheck
, compressLevel
, compressLevelExtreme
, compressThreads
, IntegrityCheck(..)
, CompressionLevel(..)
-- ** Decompression parameters
, defaultDecompressParams
, DecompressParams
, decompressTellNoCheck
, decompressTellUnsupportedCheck
, decompressTellAnyCheck
, decompressConcatenated
, decompressAutoDecoder
, decompressMemLimit
) where
import Control.Exception
import Control.Monad
import Control.Monad.ST (stToIO)
import Control.Monad.ST.Lazy (ST, runST, strictToLazyST)
import qualified Control.Monad.ST.Strict as ST.Strict (ST)
import Control.Monad.ST.Unsafe (unsafeIOToST)
import Data.ByteString (ByteString)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as BSL
import qualified Data.ByteString.Lazy.Internal as BSL
import GHC.IO (noDuplicate)
import LibLzma
-- | Decompress lazy 'ByteString' from the @.xz@ format
decompress :: BSL.ByteString -> BSL.ByteString
decompress = decompressWith defaultDecompressParams
-- | Like 'decompress' but with the ability to specify various decompression
-- parameters. Typical usage:
--
-- > decompressWith defaultDecompressParams { decompress... = ... }
decompressWith :: DecompressParams -> BSL.ByteString -> BSL.ByteString
decompressWith parms input = runST (decompress' input)
where
decompress' :: BSL.ByteString -> ST s BSL.ByteString
decompress' ibs0 = loop ibs0 =<< decompressST parms
where
loop BSL.Empty (DecompressStreamEnd rest)
| BS.null rest = return BSL.Empty
| otherwise = error "Codec.Compression.Lzma.decompressWith: trailing data"
loop (BSL.Chunk _ _) (DecompressStreamEnd _) =
error "Codec.Compression.Lzma.decompressWith: trailing data"
loop _ (DecompressStreamError e) =
error ("Codec.Compression.Lzma.decompressWith: decoding error " ++ show e)
loop BSL.Empty (DecompressInputRequired supply) =
loop BSL.Empty =<< supply BS.empty
loop (BSL.Chunk c bs') (DecompressInputRequired supply) =
loop bs' =<< supply c
loop ibs (DecompressOutputAvailable oc next) = do
obs <- loop ibs =<< next
return (BSL.chunk oc obs)
{-# NOINLINE decompressWith #-}
----------------------------------------------------------------------------
----------------------------------------------------------------------------
-- | Compress lazy 'ByteString' into @.xz@ format using 'defaultCompressParams'.
compress :: BSL.ByteString -> BSL.ByteString
compress = compressWith defaultCompressParams
-- | Like 'compress' but with the ability to specify various compression
-- parameters. Typical usage:
--
-- > compressWith defaultCompressParams { compress... = ... }
compressWith :: CompressParams -> BSL.ByteString -> BSL.ByteString
compressWith parms input = runST (compress' input)
where
compress' :: BSL.ByteString -> ST s BSL.ByteString
compress' ibs0 = loop ibs0 =<< compressST parms
where
loop BSL.Empty CompressStreamEnd =
return BSL.Empty
loop (BSL.Chunk _ _) CompressStreamEnd =
error "Codec.Compression.Lzma.compressWith: the impossible happened"
loop BSL.Empty (CompressInputRequired _ supply) =
loop BSL.Empty =<< supply BS.empty
loop (BSL.Chunk c bs') (CompressInputRequired _ supply) =
loop bs' =<< supply c
loop ibs (CompressOutputAvailable oc next) = do
obs <- loop ibs =<< next
return (BSL.chunk oc obs)
{-# NOINLINE compressWith #-}
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
-- Type derived from 'zlib' and augmented with flushing support
data CompressStream m =
CompressInputRequired {- flush -} (m (CompressStream m))
{- supply -} (ByteString -> m (CompressStream m))
-- ^ Compression process requires input to proceed. You can
-- either flush the stream (first field), supply an input chunk
-- (second field), or signal the end of input (via empty
-- chunk).
| CompressOutputAvailable !ByteString (m (CompressStream m)) -- ^ Output chunk available.
| CompressStreamEnd
-- | Incremental compression in the 'IO' monad.
compressIO :: CompressParams -> IO (CompressStream IO)
compressIO parms = (stToIO $ newEncodeLzmaStream parms) >>= either throwIO go
where
bUFSIZ = 32752
go :: LzmaStream -> IO (CompressStream IO)
go ls = return inputRequired
where
inputRequired = CompressInputRequired goFlush (withChunk goFinish goInput)
goInput :: ByteString -> IO (CompressStream IO)
goInput chunk = do
(rc, used, obuf) <- stToIO $ runLzmaStream ls chunk LzmaRun bUFSIZ
let chunk' = BS.drop used chunk
case rc of
LzmaRetOK
| BS.null obuf -> do
unless (used > 0) $
fail "compressIO: input chunk not consumed"
withChunk (return inputRequired) goInput chunk'
| otherwise -> return (CompressOutputAvailable obuf
(withChunk (return inputRequired) goInput chunk'))
_ -> throwIO rc
goFlush, goFinish :: IO (CompressStream IO)
goFlush = goSync LzmaSyncFlush (return inputRequired)
goFinish = goSync LzmaFinish retStreamEnd
-- drain encoder till LzmaRetStreamEnd is reported
goSync :: LzmaAction -> IO (CompressStream IO) -> IO (CompressStream IO)
goSync LzmaRun _ = fail "goSync called with invalid argument"
goSync action next = goSync'
where
goSync' = do
(rc, 0, obuf) <- stToIO $ runLzmaStream ls BS.empty action bUFSIZ
case rc of
LzmaRetOK
| BS.null obuf -> fail ("compressIO: empty output chunk during " ++ show action)
| otherwise -> return (CompressOutputAvailable obuf goSync')
LzmaRetStreamEnd
| BS.null obuf -> next
| otherwise -> return (CompressOutputAvailable obuf next)
_ -> throwIO rc
retStreamEnd = do
!() <- stToIO (endLzmaStream ls)
return CompressStreamEnd
-- | Incremental compression in the lazy 'ST' monad.
compressST :: CompressParams -> ST s (CompressStream (ST s))
compressST parms = strictToLazyST (newEncodeLzmaStream parms) >>=
either throw go
where
bUFSIZ = 32752
go ls = return inputRequired
where
inputRequired = CompressInputRequired goFlush (withChunk goFinish goInput)
goInput :: ByteString -> ST s (CompressStream (ST s))
goInput chunk = do
(rc, used, obuf) <- strictToLazyST (noDuplicateST >>
runLzmaStream ls chunk LzmaRun bUFSIZ)
let chunk' = BS.drop used chunk
case rc of
LzmaRetOK
| BS.null obuf -> do
unless (used > 0) $
error "compressST: input chunk not consumed"
withChunk (return inputRequired) goInput chunk'
| otherwise -> return (CompressOutputAvailable obuf
(withChunk (return inputRequired) goInput chunk'))
_ -> throw rc
goFlush, goFinish :: ST s (CompressStream (ST s))
goFlush = goSync LzmaSyncFlush (return inputRequired)
goFinish = goSync LzmaFinish retStreamEnd
-- drain encoder till LzmaRetStreamEnd is reported
goSync :: LzmaAction -> ST s (CompressStream (ST s)) -> ST s (CompressStream (ST s))
goSync LzmaRun _ = error "compressST: goSync called with invalid argument"
goSync action next = goSync'
where
goSync' = do
(rc, n, obuf) <- strictToLazyST (noDuplicateST >>
runLzmaStream ls BS.empty action bUFSIZ)
when (n /= 0) $ error "compressST: n was not zero"
case rc of
LzmaRetOK
| BS.null obuf -> error ("compressIO: empty output chunk during " ++ show action)
| otherwise -> return (CompressOutputAvailable obuf goSync')
LzmaRetStreamEnd
| BS.null obuf -> next
| otherwise -> return (CompressOutputAvailable obuf next)
_ -> throw rc
retStreamEnd = do
!() <- strictToLazyST (noDuplicateST >> endLzmaStream ls)
return CompressStreamEnd
--------------------------------------------------------------------------------
data DecompressStream m =
DecompressInputRequired (ByteString -> m (DecompressStream m)) -- ^ Decoding process requires input to proceed. An empty 'ByteString' chunk signals end of input.
| DecompressOutputAvailable !ByteString (m (DecompressStream m)) -- ^ Decompressed output chunk available.
| DecompressStreamEnd ByteString -- ^ Decoded stream is finished. Any unconsumed leftovers from the input stream are returned via the 'ByteString' field
| DecompressStreamError !LzmaRet -- TODO define subset-enum of LzmaRet
-- | Incremental decompression in the 'IO' monad.
decompressIO :: DecompressParams -> IO (DecompressStream IO)
decompressIO parms = stToIO (newDecodeLzmaStream parms) >>= either (return . DecompressStreamError) go
where
bUFSIZ = 32752
go :: LzmaStream -> IO (DecompressStream IO)
go ls = return inputRequired
where
inputRequired = DecompressInputRequired goInput
goInput :: ByteString -> IO (DecompressStream IO)
goInput chunk
| BS.null chunk = goFinish
| otherwise = do
(rc, used, obuf) <- stToIO $ runLzmaStream ls chunk LzmaRun bUFSIZ
let chunk' = BS.drop used chunk
case rc of
LzmaRetOK
| BS.null obuf -> do
unless (used > 0) $
fail "decompressIO: input chunk not consumed"
withChunk (return inputRequired) goInput chunk'
| otherwise -> return (DecompressOutputAvailable obuf
(withChunk goDrain goInput chunk'))
LzmaRetStreamEnd
| BS.null obuf -> retStreamEnd chunk'
| otherwise -> return (DecompressOutputAvailable obuf
(retStreamEnd chunk'))
_ -> return (DecompressStreamError rc)
goDrain, goFinish :: IO (DecompressStream IO)
goDrain = goSync LzmaRun (return inputRequired)
goFinish = goSync LzmaFinish (return $ DecompressStreamError LzmaRetOK)
goSync :: LzmaAction -> IO (DecompressStream IO) -> IO (DecompressStream IO)
goSync action next = goSync'
where
goSync' = do
(rc, 0, obuf) <- stToIO $ runLzmaStream ls BS.empty action bUFSIZ
case rc of
LzmaRetOK
| BS.null obuf -> next
| otherwise -> return (DecompressOutputAvailable obuf goSync')
LzmaRetStreamEnd
| BS.null obuf -> eof0
| otherwise -> return (DecompressOutputAvailable obuf eof0)
_ -> return (DecompressStreamError rc)
eof0 = retStreamEnd BS.empty
retStreamEnd chunk' = do
!() <- stToIO (endLzmaStream ls)
return (DecompressStreamEnd chunk')
-- | Incremental decompression in the lazy 'ST' monad.
decompressST :: DecompressParams -> ST s (DecompressStream (ST s))
decompressST parms = strictToLazyST (newDecodeLzmaStream parms) >>=
either (return . DecompressStreamError) go
where
bUFSIZ = 32752
go :: LzmaStream -> ST s (DecompressStream (ST s))
go ls = return inputRequired
where
inputRequired = DecompressInputRequired goInput
goInput :: ByteString -> ST s (DecompressStream (ST s))
goInput chunk
| BS.null chunk = goFinish
| otherwise = do
(rc, used, obuf) <- strictToLazyST (noDuplicateST >>
runLzmaStream ls chunk LzmaRun bUFSIZ)
let chunk' = BS.drop used chunk
case rc of
LzmaRetOK
| BS.null obuf -> do
unless (used > 0) $
error "decompressST: input chunk not consumed"
withChunk (return inputRequired) goInput chunk'
| otherwise -> return (DecompressOutputAvailable obuf
(withChunk goDrain goInput chunk'))
LzmaRetStreamEnd
| BS.null obuf -> retStreamEnd chunk'
| otherwise -> return (DecompressOutputAvailable obuf
(retStreamEnd chunk'))
_ -> return (DecompressStreamError rc)
goDrain, goFinish :: ST s (DecompressStream (ST s))
goDrain = goSync LzmaRun (return inputRequired)
goFinish = goSync LzmaFinish (return $ DecompressStreamError LzmaRetOK)
goSync :: LzmaAction -> ST s (DecompressStream (ST s)) -> ST s (DecompressStream (ST s))
goSync action next = goSync'
where
goSync' = do
(rc, n, obuf) <- strictToLazyST (noDuplicateST >>
runLzmaStream ls BS.empty action bUFSIZ)
when (n /= 0) $ error "decompressST: n was not zero"
case rc of
LzmaRetOK
| BS.null obuf -> next
| otherwise -> return (DecompressOutputAvailable obuf goSync')
LzmaRetStreamEnd
| BS.null obuf -> eof0
| otherwise -> return (DecompressOutputAvailable obuf eof0)
_ -> return (DecompressStreamError rc)
eof0 = retStreamEnd BS.empty
retStreamEnd chunk' = do
!() <- strictToLazyST (noDuplicateST >> endLzmaStream ls)
return (DecompressStreamEnd chunk')
-- | Small 'maybe'-ish helper distinguishing between empty and
-- non-empty 'ByteString's
withChunk :: t -> (ByteString -> t) -> ByteString -> t
withChunk emptyChunk nemptyChunk chunk
| BS.null chunk = emptyChunk
| otherwise = nemptyChunk chunk
-- | See <https://github.com/haskell/zlib/issues/7>
noDuplicateST :: ST.Strict.ST s ()
noDuplicateST = unsafeIOToST noDuplicate