Skip to content

Commit

Permalink
Try using mmap for a few buffers.
Browse files Browse the repository at this point in the history
  • Loading branch information
eerimoq committed Jan 28, 2020
1 parent ec5add5 commit ef5d232
Show file tree
Hide file tree
Showing 3 changed files with 225 additions and 8 deletions.
123 changes: 118 additions & 5 deletions detools/bsdiff.c
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,6 @@ static int append_buffer(PyObject *list_p, uint8_t *buf_p, int32_t size)
}

static int parse_args(PyObject *args_p,
Py_ssize_t *suffix_array_length_p,
int32_t **sa_pp,
char **from_pp,
char **to_pp,
Expand All @@ -184,6 +183,7 @@ static int parse_args(PyObject *args_p,
PyObject *sa_p;
PyObject *from_bytes_p;
PyObject *to_bytes_p;
Py_ssize_t suffix_array_length;

res = PyArg_ParseTuple(args_p,
"OOO",
Expand All @@ -195,9 +195,9 @@ static int parse_args(PyObject *args_p,
return (-1);
}

*suffix_array_length_p = (PyByteArray_Size(sa_p) / sizeof(int32_t));
suffix_array_length = (PyByteArray_Size(sa_p) / sizeof(int32_t));

if (*suffix_array_length_p <= 0) {
if (suffix_array_length <= 0) {
return (-1);
}

Expand Down Expand Up @@ -466,10 +466,8 @@ static PyObject *m_create_patch(PyObject *self_p, PyObject *args_p)
int32_t *sa_p;
uint8_t *debuf_p;
PyObject *list_p;
Py_ssize_t suffix_array_length;

res = parse_args(args_p,
&suffix_array_length,
&sa_p,
(char **)&from_p,
(char **)&to_p,
Expand Down Expand Up @@ -520,9 +518,124 @@ static PyObject *m_create_patch(PyObject *self_p, PyObject *args_p)
return (NULL);
}

static int parse_args_mmap(PyObject *args_p,
Py_buffer *suffix_array_view_p,
Py_buffer *from_view_p,
Py_buffer *to_view_p)
{
int res;
PyObject *suffix_array_mmap_p;
PyObject *from_mmap_p;
PyObject *to_mmap_p;

res = PyArg_ParseTuple(args_p,
"OOO",
&suffix_array_mmap_p,
&from_mmap_p,
&to_mmap_p);

if (res == 0) {
return (-1);
}

res = PyObject_GetBuffer(suffix_array_mmap_p,
suffix_array_view_p,
PyBUF_CONTIG_RO);

if (res == -1) {
return (res);
}

res = PyObject_GetBuffer(from_mmap_p, from_view_p, PyBUF_CONTIG_RO);

if (res == -1) {
goto err1;
}

res = PyObject_GetBuffer(to_mmap_p, to_view_p, PyBUF_CONTIG_RO);

if (res == -1) {
goto err2;
}

return (res);

err2:
PyBuffer_Release(from_view_p);

err1:
PyBuffer_Release(suffix_array_view_p);

return (res);
}

static PyObject *m_create_patch_mmap(PyObject *self_p, PyObject *args_p)
{
int res;
uint8_t *debuf_p;
PyObject *list_p;
Py_buffer suffix_array_view;
Py_buffer from_view;
Py_buffer to_view;

res = parse_args_mmap(args_p,
&suffix_array_view,
&from_view,
&to_view);

if (res != 0) {
return (NULL);
}

debuf_p = PyMem_Malloc(to_view.len + 1);

if (debuf_p == NULL) {
goto err1;
}

list_p = PyList_New(0);

if (list_p == NULL) {
goto err2;
}

res = create_patch_loop(list_p,
suffix_array_view.buf,
from_view.buf,
from_view.len,
to_view.buf,
to_view.len,
debuf_p);

if (res != 0) {
goto err3;
}

PyMem_Free(debuf_p);
PyBuffer_Release(&suffix_array_view);
PyBuffer_Release(&from_view);
PyBuffer_Release(&to_view);

return (list_p);

err3:
Py_DECREF(list_p);

err2:
PyMem_Free(debuf_p);

err1:
PyBuffer_Release(&suffix_array_view);
PyBuffer_Release(&from_view);
PyBuffer_Release(&to_view);

return (NULL);
}

static PyMethodDef module_methods[] = {
{ "pack_size", m_pack_size, METH_O },
{ "create_patch", m_create_patch, METH_VARARGS },
{ "create_patch_mmap", m_create_patch_mmap, METH_VARARGS },
{ NULL }
};

Expand Down
46 changes: 43 additions & 3 deletions detools/create.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import tempfile
import mmap
import lzma
from bz2 import BZ2Compressor
from io import BytesIO
Expand Down Expand Up @@ -50,6 +52,30 @@ def create_compressor(compression):
return compressor


def zero_fill_file(fout, size):
zeros = 4096 * b'\x00'
left = size
chunk_size = 4096

while left > 0:
if left < 4096:
chunk_size = left
zeros = zeros[:left]

fout.write(zeros)
left -= chunk_size

fout.flush()


def mmap_read_only(fin):
return mmap.mmap(fin.fileno(), 0, access=mmap.ACCESS_READ)


def mmap_read_write(fin):
return mmap.mmap(fin.fileno(), 0)


def create_patch_normal_data(ffrom,
fto,
fpatch,
Expand Down Expand Up @@ -83,9 +109,23 @@ def create_patch_normal_data(ffrom,
dfpatch += patch

fpatch.write(compressor.compress(dfpatch))
from_data = file_read(ffrom)
suffix_array = sais.sais(from_data)
chunks = bsdiff.create_patch(suffix_array, from_data, file_read(fto))

try:
with mmap_read_only(ffrom) as from_mmap:
with mmap_read_only(fto) as to_mmap:
with tempfile.TemporaryFile() as fsuffix_array:
zero_fill_file(fsuffix_array, 4 * (file_size(ffrom) + 1))

with mmap_read_write(fsuffix_array) as suffix_array_mmap:
sais.sais_mmap(from_mmap, suffix_array_mmap)
chunks = bsdiff.create_patch_mmap(suffix_array_mmap,
from_mmap,
to_mmap)
except Exception:
print('Failed to use mmap.')
from_data = file_read(ffrom)
suffix_array = sais.sais(from_data)
chunks = bsdiff.create_patch(suffix_array, from_data, file_read(fto))

# with open('data-to.patch', 'wb') as fout:
# for i in range(0, len(chunks), 5):
Expand Down
64 changes: 64 additions & 0 deletions detools/sais.c
Original file line number Diff line number Diff line change
Expand Up @@ -845,8 +845,72 @@ static PyObject *m_sais(PyObject *self_p, PyObject* arg_p)
return (NULL);
}

/**
* def sais_mmap(from_mmap, suffix_array_mmap) -> None
*/
static PyObject *m_sais_mmap(PyObject *self_p, PyObject* args_p)
{
int res;
Py_buffer from_mmap_view;
Py_buffer suffix_array_mmap_view;
PyObject *from_mmap_p;
PyObject *suffix_array_mmap_p;
int32_t *suffix_array_p;

res = PyArg_ParseTuple(args_p,
"OO",
&from_mmap_p,
&suffix_array_mmap_p);

if (res == 0) {
return (NULL);
}

/* Input argument conversion. */
res = PyObject_GetBuffer(from_mmap_p, &from_mmap_view, PyBUF_CONTIG_RO);

if (res == -1) {
return (NULL);
}

res = PyObject_GetBuffer(suffix_array_mmap_p,
&suffix_array_mmap_view,
PyBUF_CONTIG);

if (res == -1) {
goto err1;
}

suffix_array_p = (int32_t *)suffix_array_mmap_view.buf;
suffix_array_p[0] = (int32_t)from_mmap_view.len;

/* Execute the SA-IS algorithm. */
res = sais((uint8_t *)from_mmap_view.buf,
&suffix_array_p[1],
(int32_t)from_mmap_view.len);

if (res != 0) {
goto err2;
}

PyBuffer_Release(&from_mmap_view);
PyBuffer_Release(&suffix_array_mmap_view);
Py_INCREF(Py_None);

return (Py_None);

err2:
PyBuffer_Release(&suffix_array_mmap_view);

err1:
PyBuffer_Release(&from_mmap_view);

return (NULL);
}

static PyMethodDef module_methods[] = {
{ "sais", m_sais, METH_O },
{ "sais_mmap", m_sais_mmap, METH_VARARGS },
{ NULL }
};

Expand Down

0 comments on commit ef5d232

Please sign in to comment.