Skip to content
This repository has been archived by the owner on Dec 15, 2018. It is now read-only.

Commit

Permalink
Limit size of the artifacts to be processed.
Browse files Browse the repository at this point in the history
Summary:
Added get_size to FileStorage and removed TODO to limit file size
in get_file of ArtifactStoreFileStorage. It'll be up to the caller
to check the size first.
Changed the artifacts manager to check artifact sizes before attempting
to process them.

A next TODO is to make it clearer what the malformed issue actually is
(eg size of the artifact, name of artifact).

Test Plan: Added test for new artifacts manager behavior

Reviewers: kylec, andrewhe

Reviewed By: andrewhe

Subscribers: changesbot, stacey

Differential Revision: https://tails.corp.dropbox.com/D220989
  • Loading branch information
paulruan committed Aug 17, 2016
1 parent a1a88a9 commit 3fe4054
Show file tree
Hide file tree
Showing 9 changed files with 76 additions and 9 deletions.
1 change: 1 addition & 0 deletions changes/artifacts/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class ArtifactParseError(Exception):

class ArtifactHandler(object):
FILENAMES = ()
MAX_ARTIFACT_BYTES = 50 * 1024 * 1024
logger = logging.getLogger('artifacts')

def __init__(self, step):
Expand Down
6 changes: 5 additions & 1 deletion changes/artifacts/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,13 @@ def process(self, artifact, fp=None):
artifact_name = artifact.name
for cls in self.handlers:
if cls.can_process(artifact_name):
handler = cls(step)
size = artifact.file.get_size()
if size > cls.MAX_ARTIFACT_BYTES:
handler.report_malformed()
continue
if not fp:
fp = artifact.file.get_file()
handler = cls(step)
try:
handler.process(fp, artifact)
finally:
Expand Down
6 changes: 5 additions & 1 deletion changes/db/types/filestorage.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,14 @@ def save(self, fp, filename=None, content_type=None, path=None):
self.set_filename(filename)
if self.filename is None:
raise ValueError('Missing filename')

filename = self.get_storage().save(self.filename, fp, content_type, path)
self.set_filename(filename)

def get_size(self):
if self.filename is None:
raise ValueError('Missing filename')
return self.get_storage().get_size(self.filename)

def get_file(self, offset=None, length=None):
if self.filename is None:
raise ValueError('Missing filename')
Expand Down
5 changes: 4 additions & 1 deletion changes/storage/artifactstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,11 @@ def url_for(self, filename):
filename=filename
)

def get_size(self, filename):
bucket_name, artifact_name = ArtifactStoreFileStorage.get_artifact_name_from_filename(filename)
return ArtifactStoreClient(self.base_url).get_artifact(bucket_name, artifact_name).size

def get_file(self, filename, offset=None, length=None):
# TODO(paulruan): Have a reasonable file size limit
bucket_name, artifact_name = ArtifactStoreFileStorage.get_artifact_name_from_filename(filename)
return ArtifactStoreClient(self.base_url) \
.get_artifact_content(bucket_name, artifact_name, offset, length)
3 changes: 3 additions & 0 deletions changes/storage/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,6 @@ def url_for(self, filename, expire=300):

def get_file(self, filename, offset=None, length=None):
raise NotImplementedError

def get_size(self, filename):
raise NotImplementedError
7 changes: 7 additions & 0 deletions changes/storage/mock.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import absolute_import

import os

from cStringIO import StringIO

from changes.storage.base import FileStorage
Expand All @@ -24,6 +26,11 @@ def save(self, filename, fp, content_type=None, path=None):
def url_for(self, filename, expire=300):
return 'url-not-implemented-for-filestoragecache'

def get_size(self, filename):
fp = StringIO(_cache[filename]['content'])
fp.seek(0, os.SEEK_END)
return fp.tell()

def get_file(self, filename, offset=None, length=None):
start_offset, end_offset = None, None
if offset is not None:
Expand Down
4 changes: 4 additions & 0 deletions changes/storage/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ def url_for(self, filename, expire=300):
key = self.bucket.get_key(self.get_file_path(filename))
return key.generate_url(300)

def get_size(self, filename):
key = self.bucket.get_key(self.get_file_path(filename))
return key.size

def get_file(self, filename, offset=None, length=None):
headers = {}
if offset is not None:
Expand Down
7 changes: 7 additions & 0 deletions changes/testutils/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,3 +544,10 @@ def create_adminmessage(self, **kwargs):
db.session.add(message)
db.session.commit()
return message

def create_any_jobstep(self):
project = self.create_project()
build = self.create_build(project)
job = self.create_job(build)
jobphase = self.create_jobphase(job)
return self.create_jobstep(jobphase)
46 changes: 40 additions & 6 deletions tests/changes/artifacts/test_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,14 @@ class ManagerTest(TestCase):
def test_process_behavior(self, process):
class _CovHandler(ArtifactHandler):
FILENAMES = ('coverage.xml',)
MAX_ARTIFACT_SIZE = 200

class _OtherHandler(ArtifactHandler):
FILENAMES = ('/other.xml',)
MAX_ARTIFACT_SIZE = 200

manager = Manager([_CovHandler, _OtherHandler])

project = self.create_project()
build = self.create_build(project)
job = self.create_job(build)
jobphase = self.create_jobphase(job)
jobstep = self.create_jobstep(jobphase)
jobstep = self.create_any_jobstep()

artifact = self.create_artifact(
step=jobstep,
Expand Down Expand Up @@ -70,6 +67,43 @@ class _OtherHandler(ArtifactHandler):

assert process.call_count == 3

@mock.patch.object(ArtifactHandler, 'process')
@mock.patch.object(ArtifactHandler, 'report_malformed')
def test_process_too_large(self, report_malformed, process):
file_sizes = {
'large/size.xml': 1000,
'medium/size.xml': 500,
'small/size.xml': 10,
}

class _Handler(ArtifactHandler):
FILENAMES = ('size.xml',)
MAX_ARTIFACT_BYTES = 500

manager = Manager([_Handler])
jobstep = self.create_any_jobstep()

for name, size in file_sizes.iteritems():
process.reset_mock()
report_malformed.reset_mock()
artifact = self.create_artifact(
step=jobstep,
name=name,
)
artifact.file.save(StringIO('a' * size), artifact.name)
if size > _Handler.MAX_ARTIFACT_BYTES:
manager.process(artifact)
assert not process.called, \
"Artifact %s should not have been processed" % (artifact.name,)
assert report_malformed.called, \
"Artifact %s should have been reported as malformed" % (artifact.name,)
else:
manager.process(artifact)
assert process.called, \
"Artifact %s should have been processed" % (artifact.name,)
assert not report_malformed.called, \
"Artifact %s should not have been reported as malformed" % (artifact.name,)

def test_can_process(self):
class _CovHandler(ArtifactHandler):
FILENAMES = ('coverage.xml',)
Expand Down

0 comments on commit 3fe4054

Please sign in to comment.