Skip to content

Commit

Permalink
Merge pull request #11 from jd-boyd/feature/raw_functions
Browse files Browse the repository at this point in the history
Make binary header metadata optional
  • Loading branch information
jd-boyd committed Nov 20, 2015
2 parents 45b62dd + f8e3ade commit 7f76307
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 45 deletions.
132 changes: 87 additions & 45 deletions lzomodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
*/


#define MODULE_VERSION "1.08"
#define MODULE_VERSION "1.09"

#include <Python.h>
#include <lzo1x.h>
Expand Down Expand Up @@ -65,10 +65,11 @@ static PyObject *LzoError;
************************************************************************/

static /* const */ char compress__doc__[] =
"compress(string) -- Compress string using the default compression level, "
"returning a string containing compressed data.\n"
"compress(string, level) -- Compress string, using the chosen compression "
"level (either 1 or 9). Return a string containing the compressed data.\n"
"compress(string[,level[,header]]) -- Compress string, returning a string "
"containing compressed data.\n"
"level - Set compression level of either 1 (default) or 9.\n"
"header - Include metadata header for decompression in the output "
"(default: True).\n"
;

static PyObject *
Expand All @@ -78,19 +79,22 @@ compress(PyObject *dummy, PyObject *args)
lzo_voidp wrkmem = NULL;
const lzo_bytep in;
lzo_bytep out;
lzo_bytep outc;
lzo_uint in_len;
lzo_uint out_len;
lzo_uint new_len;
int len;
int level = 1;
int header = 1;
int err;

/* init */
UNUSED(dummy);
if (!PyArg_ParseTuple(args, "s#|i", &in, &len, &level))
if (!PyArg_ParseTuple(args, "s#|ii", &in, &len, &level, &header))
return NULL;
if (len < 0)
return NULL;

in_len = len;
out_len = in_len + in_len / 16 + 64 + 3;

Expand Down Expand Up @@ -118,16 +122,20 @@ compress(PyObject *dummy, PyObject *args)
#else
out = (lzo_bytep) PyString_AsString(result_str);
#endif

outc = header ? out+5 : out; // leave space for header if needed
new_len = out_len;
if (level == 1)
{
out[0] = 0xf0;
err = lzo1x_1_compress(in, in_len, out+5, &new_len, wrkmem);
if (header)
out[0] = 0xf0;
err = lzo1x_1_compress(in, in_len, outc, &new_len, wrkmem);
}
else
{
out[0] = 0xf1;
err = lzo1x_999_compress(in, in_len, out+5, &new_len, wrkmem);
if (header)
out[0] = 0xf1;
err = lzo1x_999_compress(in, in_len, outc, &new_len, wrkmem);
}
PyMem_Free(wrkmem);
if (err != LZO_E_OK || new_len > out_len)
Expand All @@ -138,19 +146,22 @@ compress(PyObject *dummy, PyObject *args)
return NULL;
}

/* save uncompressed length */
out[1] = (unsigned char) ((in_len >> 24) & 0xff);
out[2] = (unsigned char) ((in_len >> 16) & 0xff);
out[3] = (unsigned char) ((in_len >> 8) & 0xff);
out[4] = (unsigned char) ((in_len >> 0) & 0xff);
if (header) {
/* save uncompressed length */
out[1] = (unsigned char) ((in_len >> 24) & 0xff);
out[2] = (unsigned char) ((in_len >> 16) & 0xff);
out[3] = (unsigned char) ((in_len >> 8) & 0xff);
out[4] = (unsigned char) ((in_len >> 0) & 0xff);
}

/* return */
if (new_len != out_len)
#if PY_MAJOR_VERSION >= 3
_PyBytes_Resize(&result_str, 5 + new_len);
_PyBytes_Resize(&result_str, header ? new_len + 5 : new_len);
#else
_PyString_Resize(&result_str, 5 + new_len);
_PyString_Resize(&result_str, header ? new_len + 5 : new_len);
#endif

return result_str;
}

Expand All @@ -160,7 +171,10 @@ compress(PyObject *dummy, PyObject *args)
************************************************************************/

static /* const */ char decompress__doc__[] =
"decompress(string) -- Decompress the data in string, returning a string containing the decompressed data.\n"
"decompress(string[,header[,buflen]]) -- Decompress the data in string, returning a string containing the decompressed data.\n"
"header - Metadata header is included in input (default: True).\n"
"buflen - If header is False, a buffer length in bytes must be given that "
"will fit the output.\n"
;

static PyObject *
Expand All @@ -173,18 +187,28 @@ decompress(PyObject *dummy, PyObject *args)
lzo_uint out_len;
lzo_uint new_len;
int len;
int buflen = -1;
int header = 1;
int err;

/* init */
UNUSED(dummy);
if (!PyArg_ParseTuple(args, "s#", &in, &len))
if (!PyArg_ParseTuple(args, "s#|ii", &in, &len, &header, &buflen))
return NULL;
if (len < 5 + 3 || in[0] < 0xf0 || in[0] > 0xf1)
goto header_error;
in_len = len - 5;
out_len = (in[1] << 24) | (in[2] << 16) | (in[3] << 8) | in[4];
if ((int)out_len < 0 || in_len > out_len + out_len / 16 + 64 + 3)
goto header_error;
if (header) {
if (len < 5 + 3 || in[0] < 0xf0 || in[0] > 0xf1)
goto header_error;
out_len = (in[1] << 24) | (in[2] << 16) | (in[3] << 8) | in[4];
in_len = len - 5;
in += 5;
if ((int)out_len < 0 || in_len > out_len + out_len / 64 + 16 + 3)
goto header_error;
}
else {
if (buflen < 0) return PyErr_Format(LzoError, "Argument buflen required for headerless decompression");
out_len = buflen;
in_len = len;
}

/* alloc buffers */
#if PY_MAJOR_VERSION >= 3
Expand All @@ -202,14 +226,17 @@ decompress(PyObject *dummy, PyObject *args)
out = (lzo_bytep) PyString_AsString(result_str);
#endif
new_len = out_len;
err = lzo1x_decompress_safe(in+5, in_len, out, &new_len, NULL);
if (err != LZO_E_OK || new_len != out_len)
err = lzo1x_decompress_safe(in, in_len, out, &new_len, NULL);
if (err != LZO_E_OK || (header && new_len != out_len) )
{
Py_DECREF(result_str);
PyErr_Format(LzoError, "Compressed data violation %i", err);
return NULL;
}

if (!header && new_len != out_len)
_PyString_Resize(&result_str, new_len);

/* success */
return result_str;

Expand All @@ -224,7 +251,11 @@ decompress(PyObject *dummy, PyObject *args)
************************************************************************/

static /* const */ char optimize__doc__[] =
"optimize(string) -- Optimize the representation of the compressed data, returning a string containing the compressed data.\n"
"optimize(string[,header[,buflen]]) -- Optimize the representation of the "
"compressed data, returning a string containing the compressed data.\n"
"header - Metadata header is included in input (default: True).\n"
"buflen - If header is False, a buffer length in bytes must be given that "
"will fit the input/output.\n"
;

static PyObject *
Expand All @@ -238,17 +269,26 @@ optimize(PyObject *dummy, PyObject *args)
lzo_uint new_len;
int len;
int err;
int header = 1;
int buflen = -1;

/* init */
UNUSED(dummy);
if (!PyArg_ParseTuple(args, "s#", &in, &len))
if (!PyArg_ParseTuple(args, "s#|ii", &in, &len, &header, &buflen))
return NULL;
if (len < 5 + 3 || in[0] < 0xf0 || in[0] > 0xf1)
goto header_error;
in_len = len - 5;
out_len = (in[1] << 24) | (in[2] << 16) | (in[3] << 8) | in[4];
if ((int)out_len < 0 || in_len > out_len + out_len / 16 + 64 + 3)
goto header_error;
if (header) {
if (len < 5 + 3 || in[0] < 0xf0 || in[0] > 0xf1)
goto header_error;
in_len = len - 5;
out_len = (in[1] << 24) | (in[2] << 16) | (in[3] << 8) | in[4];
if ((int)out_len < 0 || in_len > out_len + out_len / 64 + 16 + 3)
goto header_error;
}
else {
if (buflen < 0) return PyErr_Format(LzoError, "Argument buflen required for headerless optimization");
out_len = buflen;
in_len = len;
}

/* alloc buffers */
#if PY_MAJOR_VERSION >= 3
Expand All @@ -272,9 +312,9 @@ optimize(PyObject *dummy, PyObject *args)
in = (lzo_bytep) PyString_AsString(result_str);
#endif
new_len = out_len;
err = lzo1x_optimize(in+5, in_len, out, &new_len, NULL);
err = lzo1x_optimize( header ? in+5 : in, in_len, out, &new_len, NULL);
PyMem_Free(out);
if (err != LZO_E_OK || new_len != out_len)
if (err != LZO_E_OK || (header && new_len != out_len))
{
Py_DECREF(result_str);
PyErr_Format(LzoError, "Compressed data violation %i", err);
Expand Down Expand Up @@ -370,14 +410,16 @@ static /* const */ PyMethodDef methods[] =
static /* const */ char module_documentation[]=
"The functions in this module allow compression and decompression "
"using the LZO library.\n\n"
"adler32(string) -- Compute an Adler-32 checksum.\n"
"adler32(string, start) -- Compute an Adler-32 checksum using a given starting value.\n"
"compress(string) -- Compress a string.\n"
"compress(string, level) -- Compress a string with the given level of compression (either 1 or 9).\n"
"crc32(string) -- Compute a CRC-32 checksum.\n"
"crc32(string, start) -- Compute a CRC-32 checksum using a given starting value.\n"
"decompress(string) -- Decompresses a compressed string.\n"
"optimize(string) -- Optimize a compressed string.\n"
"adler32(string) -- Compute an Adler-32 checksum.\n"
"adler32(string, start) -- Compute an Adler-32 checksum using a given starting value.\n"
"compress(string) -- Compress a string.\n"
"compress(string, ...) -- See help(lzo.compress) for more options.\n"
"crc32(string) -- Compute a CRC-32 checksum.\n"
"crc32(string, start) -- Compute a CRC-32 checksum using a given starting value.\n"
"decompress(string) -- Decompresses a compressed string.\n"
"decompress(string, ...) -- See help(lzo.decompress) for more options.\n"
"optimize(string) -- Optimize a compressed string.\n"
"optimize(string, ...) -- See help(lzo.optimize) for more options.\n"
;

#if PY_MAJOR_VERSION >= 3
Expand Down
22 changes: 22 additions & 0 deletions tests/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,23 @@ def test(src, level=1):
print("compressed %6d -> %6d" % (len(src), len(c)))


def testRaw(src, level=1):
a0 = lzo.adler32(src)
c = lzo.compress(src, level, False)
u1 = lzo.decompress(c, False, len(src))
a1 = lzo.adler32(u1)
o = lzo.optimize(c, False, len(src))
u2 = lzo.decompress(o, False, len(src))
a2 = lzo.adler32(u2)
# make sure it still works when you overstate the output buffer length
u3 = lzo.decompress(c, False, len(src) + 100)
if src != u1 or src != u2 or src != u3:
raise lzo.error("internal error 1")
if a0 != a1 or a0 != a2:
raise lzo.error("internal error 2")
print("compressed %6d -> %6d" % (len(src), len(c)))


def main(args):
# display version information and module documentation
print("LZO version %s (0x%x), %s" % (lzo.LZO_VERSION_STRING, lzo.LZO_VERSION, lzo.LZO_VERSION_DATE))
Expand All @@ -84,10 +101,15 @@ def main(args):

# compress some simple strings
test(b"aaaaaaaaaaaaaaaaaaaaaaaa")
testRaw(b"aaaaaaaaaaaaaaaaaaaaaaaa")
test(b"abcabcabcabcabcabcabcabc")
testRaw(b"abcabcabcabcabcabcabcabc")
test(b"abcabcabcabcabcabcabcabc", level=9)
testRaw(b"abcabcabcabcabcabcabcabc", level=9)
test(b" " * 131072)
testRaw(b" " * 131072)
test(b"")
testRaw(b"")
print("Simple compression test passed.")

# force an exception (because of invalid compressed data)
Expand Down

0 comments on commit 7f76307

Please sign in to comment.