Skip to content

Commit

Permalink
ProcessRotateLeft added, ProcessXor updated (optimised)
Browse files Browse the repository at this point in the history
  • Loading branch information
arekbulski committed Apr 8, 2018
1 parent 1ecbc0f commit c68d658
Show file tree
Hide file tree
Showing 7 changed files with 163 additions and 7 deletions.
2 changes: 2 additions & 0 deletions construct/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@
'Prefixed',
'PrefixedArray',
'Probe',
'ProcessRotateLeft',
'ProcessXor',
'RangeError',
'RawCopy',
Expand All @@ -158,6 +159,7 @@
'RestreamData',
'Restreamed',
'RestreamedBytesIO',
'RotationError',
'Seek',
'Select',
'SelectError',
Expand Down
134 changes: 129 additions & 5 deletions construct/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ class TerminatedError(ConstructError):
pass
class RawCopyError(ConstructError):
pass
class RotationError(ConstructError):
pass
class ChecksumError(ConstructError):
pass
class CancelParsing(ConstructError):
Expand Down Expand Up @@ -4925,8 +4927,8 @@ class ProcessXor(Subconstruct):
Parsing reads till EOF, xors data with the pad, then feeds that data into subcon. Building first builds the subcon into separate BytesIO stream, xors data with the pad, then writes that data into the main stream. Size is the same as subcon, unless it raises SizeofError.
:param subcon: Construct instance
:param padfunc: integer or bytes or context lambda, single or multiple bytes to xor data with
:param subcon: Construct instance
:raises StringError: pad is not integer or bytes
Expand All @@ -4949,11 +4951,15 @@ def _parse(self, stream, context, path):
pad = evaluate(self.padfunc, context)
if not isinstance(pad, (integertypes, bytestringtype)):
raise StringError("ProcessXor needs integer or bytes pad")
if isinstance(pad, bytestringtype) and len(pad) == 1:
pad = byte2int(pad)
data = stream_read_entire(stream)
if isinstance(pad, integertypes):
data = bytes(bytearray((b ^ pad) for b in iterateints(data)))
if not (pad == 0):
data = integers2bytes( (b ^ pad) for b in iterateints(data) )
if isinstance(pad, bytestringtype):
data = bytes(bytearray((b ^ p) for b,p in zip(iterateints(data), itertools.cycle(iterateints(pad)))))
if not (len(pad) <= 64 and pad == bytes(len(pad))):
data = integers2bytes( (b ^ p) for b,p in zip(iterateints(data), itertools.cycle(iterateints(pad))) )
if self.subcon is GreedyBytes:
return data
if type(self.subcon) is GreedyString:
Expand All @@ -4964,13 +4970,131 @@ def _build(self, obj, stream, context, path):
pad = evaluate(self.padfunc, context)
if not isinstance(pad, (integertypes, bytestringtype)):
raise StringError("ProcessXor needs integer or bytes pad")
if isinstance(pad, bytestringtype) and len(pad) == 1:
pad = byte2int(pad)
stream2 = io.BytesIO()
buildret = self.subcon._build(obj, stream2, context, path)
data = stream2.getvalue()
if isinstance(pad, integertypes):
data = bytes(bytearray((b ^ pad) for b in iterateints(data)))
if not (pad == 0):
data = integers2bytes( (b ^ pad) for b in iterateints(data) )
if isinstance(pad, bytestringtype):
data = bytes(bytearray((b ^ p) for b,p in zip(iterateints(data), itertools.cycle(iterateints(pad)))))
if not (len(pad) <= 64 and pad == bytes(len(pad))):
data = integers2bytes( (b ^ p) for b,p in zip(iterateints(data), itertools.cycle(iterateints(pad))) )
stream_write(stream, data)
return buildret

def _sizeof(self, context, path):
return self.subcon._sizeof(context, path)


class ProcessRotateLeft(Subconstruct):
r"""
Transforms bytes between the underlying stream and the subcon.
Used internally by KaitaiStruct compiler, when translating `process: rol/ror` tags.
Parsing reads till EOF, rotates (shifts) the data *left* by amount in bits, then feeds that data into subcon. Building first builds the subcon into separate BytesIO stream, rotates *right* by negating amount, then writes that data into the main stream. Size is the same as subcon, unless it raises SizeofError.
:param amount: integer or context lambda, shift by this amount in bits, treated modulo (group x 8)
:param group: integer or context lambda, shifting is applied to chunks of this size in bytes
:param subcon: Construct instance
:raises RotationError: group is less than 1
:raises RotationError: data length is not a multiple of group size
Can propagate any exception from the lambda, possibly non-ConstructError.
Example::
>>> d = ProcessRotateLeft(4, 1, Int16ub)
>>> d.parse(b'\x0f\xf0')
0xf00f
>>> d = ProcessRotateLeft(4, 2, Int16ub)
>>> d.parse(b'\x0f\xf0')
0xff00
>>> d.sizeof()
2
"""

# formula taken from: http://stackoverflow.com/a/812039
precomputed_single_rotations = {amount: [(i << amount) & 0xff | (i >> (8-amount)) for i in range(256)] for amount in range(1,8)}

def __init__(self, amount, group, subcon):
super(ProcessRotateLeft, self).__init__(subcon)
self.amount = amount
self.group = group

def _parse(self, stream, context, path):
amount = evaluate(self.amount, context)
group = evaluate(self.group, context)
if group < 1:
raise RotationError("group size must be at least 1 to be valid")

amount = amount % (group * 8)
amount_bytes = amount // 8
data = stream_read_entire(stream)
data_ints = bytes2integers(data)

if len(data) % group != 0:
raise RotationError("data length must be a multiple of group size")

if amount == 0:
pass

elif group == 1:
translate = ProcessRotateLeft.precomputed_single_rotations[amount]
data = integers2bytes( translate[a] for a in data_ints )

elif amount % 8 == 0:
indices = [(i + amount_bytes) % group for i in range(group)]
data = integers2bytes( data_ints[i+k] for i in range(0,len(data),group) for k in indices )

else:
amount1 = amount % 8
amount2 = 8 - amount1
indices_pairs = [ ((i+amount_bytes) % group, (i+1+amount_bytes) % group) for i in range(group)]
data = integers2bytes( (data_ints[i+k1] << amount1) & 0xff | (data_ints[i+k2] >> amount2) for i in range(0,len(data),group) for k1,k2 in indices_pairs )

if self.subcon is GreedyBytes:
return data
if type(self.subcon) is GreedyString:
return data.decode(self.subcon.encoding)
return self.subcon._parsereport(io.BytesIO(data), context, path)

def _build(self, obj, stream, context, path):
amount = evaluate(self.amount, context)
group = evaluate(self.group, context)
if group < 1:
raise RotationError("group size must be at least 1 to be valid")

amount = -amount % (group * 8)
amount_bytes = amount // 8
stream2 = io.BytesIO()
buildret = self.subcon._build(obj, stream2, context, path)
data = stream2.getvalue()
data_ints = bytes2integers(data)

if len(data) % group != 0:
raise RotationError("data length must be a multiple of group size")

if amount == 0:
pass

elif group == 1:
translate = ProcessRotateLeft.precomputed_single_rotations[amount]
data = integers2bytes( translate[a] for a in data_ints )

elif amount % 8 == 0:
indices = [(i + amount_bytes) % group for i in range(group)]
data = integers2bytes( data_ints[i+k] for i in range(0,len(data),group) for k in indices )

else:
amount1 = amount % 8
amount2 = 8 - amount1
indices_pairs = [ ((i+amount_bytes) % group, (i+1+amount_bytes) % group) for i in range(group)]
data = integers2bytes( (data_ints[i+k1] << amount1) & 0xff | (data_ints[i+k2] >> amount2) for i in range(0,len(data),group) for k1,k2 in indices_pairs )

stream_write(stream, data)
return buildret

Expand Down
1 change: 1 addition & 0 deletions docs/api/tunneling.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Core API: Tunneling
.. autofunction:: construct.Transformed
.. autofunction:: construct.Restreamed
.. autofunction:: construct.ProcessXor
.. autofunction:: construct.ProcessRotateLeft
.. autofunction:: construct.Checksum
.. autofunction:: construct.Compressed
.. autofunction:: construct.Rebuffered
17 changes: 15 additions & 2 deletions docs/tunneling.rst
Original file line number Diff line number Diff line change
Expand Up @@ -147,19 +147,32 @@ Transformed can also process unknown amount of bytes, if that amount is entire d
Restreamed is similar to Transformed, but the main difference is that Transformed requires fixed-sized subcon because it reads all bytes in advance, processes them, and then feeds them to the subcon. Restreamed on the other hand, reads few bytes at a time, the minimum amount on each stream read. Since both are used mostly internally, there is no tutorial how to use it, other than this short code above.


Processing data with XOR and algorithms
Processing data with XOR and ROL
----------------------------------------

This chapter is mostly relevant to KaitaiStruct compiler implementation, as following constructs exist mostly for that purpose.

Data can be transformed by XORing with a single integer or several bytes, and those can also be taken from the context at runtime. Note that ProcessXor reads entire stream till EOF so it should be wrapped in FixedSized NullTerminated unless you actually want to process entire remaining stream.
Data can be transformed by XORing with a single or several bytes, and the key can also be taken from the context at runtime. Key can be of any positive length.

>>> d = ProcessXor(0xf0 or b'\xf0', Int16ub)
>>> d.parse(b"\x00\xff")
0xf00f
>>> d.sizeof()
2

Data can also be rotated (cycle shifted). Rotation is to the left on parsing, and to the right on building. Amount is in bits, and can be negative to make rotation right instead of left. Group size defines the size of chunks to which rotation is applied.

>>> d = ProcessRotateLeft(4, 1, Int16ub)
>>> d.parse(b'\x0f\xf0')
0xf00f
>>> d = ProcessRotateLeft(4, 2, Int16ub)
>>> d.parse(b'\x0f\xf0')
0xff00
>>> d.sizeof()
2

Note that the classes read entire stream till EOF so they should be wrapped in FixedSized Prefixed etc unless you actually want to process the entire remaining stream.


Compression and checksuming
----------------------------------------
Expand Down
1 change: 1 addition & 0 deletions tests/test_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -786,6 +786,7 @@ def test_class_nullstripped_build(benchmark):
# - not compilable
# Restreamed
# ProcessXor
# ProcessRotateLeft
# Checksum
# - decompilable
# Compressed
Expand Down
1 change: 1 addition & 0 deletions tests/test_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@
# Transformed
# Restreamed
# ProcessXor
# ProcessRotateLeft
# Checksum
"compressed_bzip2_data" / Computed(b'BZh91AY&SYSc\x11\x99\x00\x00\x00A\x00@\x00@\x00 \x00!\x00\x82\x83\x17rE8P\x90Sc\x11\x99'),
"compressed_bzip2" / RestreamData(this.compressed_bzip2_data, Compressed(GreedyBytes, "bzip2", level=9)),
Expand Down
14 changes: 14 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1130,8 +1130,12 @@ def test_restreamed_partial_read():
assert raises(d.parse, b"") == StreamError

def test_processxor():
d = ProcessXor(0, Int16ub)
common(d, b"\xf0\x0f", 0xf00f, 2)
d = ProcessXor(0xf0, Int16ub)
common(d, b"\x00\xff", 0xf00f, 2)
d = ProcessXor(bytes(10), Int16ub)
common(d, b"\xf0\x0f", 0xf00f, 2)
d = ProcessXor(b"\xf0\xf0\xf0\xf0\xf0", Int16ub)
common(d, b"\x00\xff", 0xf00f, 2)

Expand All @@ -1144,6 +1148,16 @@ def test_processxor():
d = ProcessXor(b"XXXXX", GreedyString("utf-8"))
common(d, b"\x00", u"X", SizeofError)

def test_processrotateleft():
d = ProcessRotateLeft(0, 1, GreedyBytes)
common(d, bytes(10), bytes(10))
d = ProcessRotateLeft(0, 2, GreedyBytes)
common(d, bytes(10), bytes(10))
d = ProcessRotateLeft(4, 1, GreedyBytes)
common(d, b'\x0f\xf0', b'\xf0\x0f')
d = ProcessRotateLeft(4, 2, GreedyBytes)
common(d, b'\x0f\xf0', b'\xff\x00')

def test_checksum():
d = Struct(
"fields" / RawCopy(Struct(
Expand Down

0 comments on commit c68d658

Please sign in to comment.