Skip to content

Commit

Permalink
use less memory for bz2 decompression
Browse files Browse the repository at this point in the history
  • Loading branch information
jbremer committed Sep 12, 2018
1 parent 5d511c9 commit e48d3e5
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 6 deletions.
2 changes: 2 additions & 0 deletions sflock/abstracts.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,8 @@ def from_path(self, filepath, relapath=None, filename=None,
)

def temp_path(self, suffix=""):
# TODO Depending on use-case we may not need a full copy. Perhaps
# abstract away the "if self.f.filepath ... else ..." logic?
fd, filepath = tempfile.mkstemp(suffix=suffix)
shutil.copyfileobj(self.stream, os.fdopen(fd, "wb"))
return filepath
Expand Down
44 changes: 42 additions & 2 deletions sflock/unpack/tar.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@

import bz2
import gzip
import os
import six
import tarfile
import tempfile

from sflock.abstracts import Unpacker, File
from sflock.config import MAX_TOTAL_SIZE
Expand Down Expand Up @@ -83,9 +85,47 @@ def handles(self):
if not self.f.filesize:
return False

fd, filepath = tempfile.mkstemp()
os.write(fd, self.f.stream.read(0x1000))
os.close(fd)

d = bz2.BZ2File(filepath, "r")

os.unlink(filepath)

try:
f = File(contents=bz2.decompress(self.f.contents))
if d.read(0x1000):
return True
except IOError:
return False

return self.magic in f.magic
def unpack(self, password=None, duplicates=None):
dirpath = tempfile.mkdtemp()

if not self.f.filepath:
filepath = self.f.temp_path(".bz2")
temporary = True
else:
filepath = self.f.filepath
temporary = False

f = open(os.path.join(dirpath, "output"), "wb")
d = bz2.BZ2File(filepath, "r")

while f.tell() < MAX_TOTAL_SIZE:
try:
buf = d.read(0x10000)
except IOError:
break
if not buf:
break
f.write(buf)

if temporary:
os.unlink(filepath)

if f.tell() >= MAX_TOTAL_SIZE:
self.f.error = "files_too_large"
return []

return self.process_directory(dirpath, duplicates)
5 changes: 2 additions & 3 deletions sflock/unpack/zip7.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class Zip7File(Unpacker):
magic = "7-zip archive", "# ISO 9660"

def unpack(self, password=None, duplicates=None):
dirpath = tempfile.mkdtemp().encode()
dirpath = tempfile.mkdtemp()

if password:
raise UnpackException(
Expand All @@ -35,8 +35,7 @@ def unpack(self, password=None, duplicates=None):
temporary = True

ret = self.zipjail(
filepath, dirpath, "x", "-mmt=off",
"-o%s" % dirpath.decode(), filepath
filepath, dirpath, "x", "-mmt=off", "-o%s" % dirpath, filepath
)
if not ret:
return []
Expand Down
6 changes: 5 additions & 1 deletion tests/test_tar.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright (C) 2015-2018 Jurriaan Bremer.
# Copyright (C) 2018 Hatching B.V.
# This file is part of SFlock - http://www.sflock.org/.
# See the file 'docs/LICENSE.txt' for copying permission.

Expand Down Expand Up @@ -68,6 +69,8 @@ def test_tar_plain2_bz2(self):
assert t.handles() is True
assert not t.f.selected
files = list(t.unpack())
assert len(files) == 1
files = files[0].children
assert len(files) == 2
assert files[0].relapath == b"sflock.txt"
assert files[0].contents == b"sflock_plain_tar\n"
Expand Down Expand Up @@ -102,7 +105,8 @@ def test_nested_bzip2(self):
assert not t.f.selected
files = list(t.unpack())
assert len(files) == 1

files = files[0].children
assert len(files) == 1
assert files[0].relapath == b"foo/bar.txt"
assert files[0].parentdirs == [b"foo"]
assert files[0].contents == b"hello world\n"
Expand Down
2 changes: 2 additions & 0 deletions tests/test_unpack.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ def test_unpack1():
def test_unpack2():
f = unpack(b"tests/files/tar_nested.tar.bz2")
assert len(f.children) == 1
f = f.children[0]
assert len(f.children) == 1
assert f.children[0].relapath == b"foo/bar.txt"
assert f.children[0].relaname == b"foo/bar.txt"
assert f.children[0].contents == b"hello world\n"
Expand Down

0 comments on commit e48d3e5

Please sign in to comment.