Skip to content

Commit

Permalink
CompressedLZ4 class and docs added
Browse files Browse the repository at this point in the history
  • Loading branch information
arekbulski committed Feb 11, 2021
1 parent af3b682 commit 92aa38c
Show file tree
Hide file tree
Showing 9 changed files with 63 additions and 5 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pytest pytest-benchmark enum34 numpy arrow ruamel.yaml cloudpickle
pip install pytest pytest-benchmark enum34 numpy arrow ruamel.yaml cloudpickle lz4
- name: Run tests
run: |
export PYTHONPATH=$PYTHONPATH:`pwd`
Expand Down
1 change: 1 addition & 0 deletions construct/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
'ChecksumError',
'Compiled',
'Compressed',
'CompressedLZ4',
'Computed',
'Const',
'ConstError',
Expand Down
41 changes: 38 additions & 3 deletions construct/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5130,7 +5130,7 @@ def _sizeof(self, context, path):

class Compressed(Tunnel):
r"""
Compresses and decompresses underlying stream when processing subcon. When parsing, entire stream is consumed. When building, puts compressed bytes without marking the end. This construct should be used with :class:`~construct.core.Prefixed` .
Compresses and decompresses underlying stream when processing subcon. When parsing, entire stream is consumed. When building, it puts compressed bytes without marking the end. This construct should be used with :class:`~construct.core.Prefixed` .
Parsing and building transforms all bytes using a specified codec. Since data is processed until EOF, it behaves similar to `GreedyBytes`. Size is undefined.
Expand All @@ -5146,11 +5146,12 @@ class Compressed(Tunnel):
>>> d = Prefixed(VarInt, Compressed(GreedyBytes, "zlib"))
>>> d.build(bytes(100))
b'\x0cx\x9cc`\xa0=\x00\x00\x00d\x00\x01'
>>> len(_)
13
"""

def __init__(self, subcon, encoding, level=None):
super(Compressed, self).__init__(subcon)
super().__init__(subcon)
self.encoding = encoding
self.level = level
if self.encoding == "zlib":
Expand Down Expand Up @@ -5183,6 +5184,40 @@ def _encode(self, data, context, path):
return self.lib.encode(data, self.encoding)


class CompressedLZ4(Tunnel):
r"""
Compresses and decompresses underlying stream before processing subcon. When parsing, entire stream is consumed. When building, puts compressed bytes without marking the end. This construct should be used with :class:`~construct.core.Prefixed` .
Parsing and building transforms all bytes using LZ4 library. Since data is processed until EOF, it behaves similar to `GreedyBytes`. Size is undefined.
:param subcon: Construct instance, subcon used for storing the value
:raises ImportError: needed module could not be imported by ctor
:raises StreamError: stream failed when reading until EOF
Can propagate lz4.frame exceptions.
Example::
>>> d = Prefixed(VarInt, CompressedLZ4(GreedyBytes))
>>> d.build(bytes(100))
b'"\x04"M\x18h@d\x00\x00\x00\x00\x00\x00\x00#\x0b\x00\x00\x00\x1f\x00\x01\x00KP\x00\x00\x00\x00\x00\x00\x00\x00\x00'
>>> len(_)
35
"""

def __init__(self, subcon):
super().__init__(subcon)
import lz4.frame
self.lib = lz4.frame

def _decode(self, data, context, path):
return self.lib.decompress(data)

def _encode(self, data, context, path):
return self.lib.compress(data)


class Rebuffered(Subconstruct):
r"""
Caches bytes from underlying stream, so it becomes seekable and tellable, and also becomes blocking on reading. Useful for processing non-file streams like pipes, sockets, etc.
Expand Down
1 change: 1 addition & 0 deletions docs/api/tunneling.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@ Core API: Tunneling
.. autofunction:: construct.ProcessRotateLeft
.. autofunction:: construct.Checksum
.. autofunction:: construct.Compressed
.. autofunction:: construct.CompressedLZ4
.. autofunction:: construct.Rebuffered
1 change: 1 addition & 0 deletions docs/intro.rst
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ Following modules are needed only if you want to use certain features:
* Different Python versions support different compression modules (like gzip lzma), if you want to use Compressed class.
* Ruamel.yaml is optional, if you want to use KaitaiStruct (KSY) exporter.
* Cloudpickle is optional, if you want to serialize the classes.
* LZ4 is optional, if you want to use CompressedLZ4 class.


Installing
Expand Down
2 changes: 2 additions & 0 deletions docs/transition210.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,5 @@ cloudpickle is now supported and tested for
ZigZag signed integer encoding from Protocol Buffers added

FormatField now supports ? format string

CompressedLZ4 tunneling class added
12 changes: 11 additions & 1 deletion docs/tunneling.rst
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,18 @@ Data can be easily checksummed. Note that checksum field does not need to be Byt
d.build(dict(fields=dict(value={})))


Data can also be easily compressed. Supported encodings include zlib/gzip/bzip2/lzma and entire codecs module. When parsing, entire stream is consumed. When building, puts compressed bytes without marking the end. This construct should be used with :class:`~construct.core.Prefixed` or entire stream.
Data can also be easily compressed. Supported encodings include zlib/gzip/bzip2/lzma and entire codecs module. When parsing, entire stream is consumed. When building, it puts compressed bytes without marking the end. This construct should be used with :class:`~construct.core.Prefixed` or entire stream.

>>> d = Prefixed(VarInt, Compressed(GreedyBytes, "zlib"))
>>> d.build(bytes(100))
b'\x0cx\x9cc`\xa0=\x00\x00\x00d\x00\x01'
>>> len(_)
13

LZ4 compression is also supported. It provides less compaction but does it at higher throughputs. This class is also supposed to be used with Prefixed class.

>>> d = Prefixed(VarInt, CompressedLZ4(GreedyBytes))
>>> d.build(bytes(100))
b'"\x04"M\x18h@d\x00\x00\x00\x00\x00\x00\x00#\x0b\x00\x00\x00\x1f\x00\x01\x00KP\x00\x00\x00\x00\x00\x00\x00\x00\x00'
>>> len(_)
35
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
"arrow",
"ruamel.yaml",
"cloudpickle",
"lz4",
],
},
keywords = [
Expand Down
7 changes: 7 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1285,6 +1285,13 @@ def test_compressed_prefixed():
assert st.parse(st.build(Container(one=zeros,two=zeros))) == Container(one=zeros,two=zeros)
assert raises(d.sizeof) == SizeofError

def test_compressedlz4():
zeros = bytes(10000)
d = CompressedLZ4(GreedyBytes)
assert d.parse(d.build(zeros)) == zeros
assert len(d.build(zeros)) < 100
assert raises(d.sizeof) == SizeofError

def test_rebuffered():
data = b"0" * 1000
assert Rebuffered(Array(1000,Byte)).parse_stream(io.BytesIO(data)) == [48]*1000
Expand Down

0 comments on commit 92aa38c

Please sign in to comment.