Skip to content

Commit

Permalink
addressing issue with 0 byte files in s3
Browse files Browse the repository at this point in the history
  • Loading branch information
ryandeivert committed Aug 4, 2020
1 parent ee1d202 commit a37bf13
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 7 deletions.
18 changes: 14 additions & 4 deletions streamalert/classifier/payload/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ class S3PayloadError(Exception):
class S3Payload(StreamPayload):
"""S3Payload class"""

MAX_S3_SIZE = 128 * 1024 * 1024

@property
def bucket(self):
return self.raw_record['s3']['bucket']['name']
Expand Down Expand Up @@ -89,11 +91,18 @@ def _check_size(self):
Returns:
bool: True if the file is smaller than 128 MB, False otherwise
"""
# size == 0 or greater than 128MB
if self.size == 0 or (self.size > 128 * 1024 * 1024):
raise S3PayloadError('S3 object {}/{} has an invalid size and cannot be downloaded'
# Ignore 0 size files
if self.size == 0:
LOGGER.warning('S3 file size is 0 bytes, skipping: %s/%s', self.bucket, self.key)
return False

# size greater than 128MB
if self.size > self.MAX_S3_SIZE:
raise S3PayloadError('S3 object {}/{} is too large and cannot be downloaded '
'from S3: {}'.format(self.bucket, self.key, self.display_size))

return True

@staticmethod
def _cleanup():
"""Cleanup method to remove all objects in the Lambda container's temp directory"""
Expand Down Expand Up @@ -209,7 +218,8 @@ def _pre_parse(self):
Yields:
Instances of PayloadRecord back to the caller containing the current log data
"""
self._check_size()
if not self._check_size():
return # _check_size can raise an exception as well

line_num = 0
for line_num, data in self._read_file():
Expand Down
6 changes: 3 additions & 3 deletions tests/unit/streamalert/classifier/payload/test_payload_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,10 @@ def test_check_size_exception_large(self):
self._payload.raw_record['s3']['object']['size'] = 1024 * 1024 * 129 # 129 MB
assert_raises(S3PayloadError, self._payload._check_size)

def test_check_size_exception_zero(self):
"""S3Payload - Check Size, Zero Raises Exception"""
def test_check_size_zero(self):
"""S3Payload - Check Size, Zero"""
self._payload.raw_record['s3']['object']['size'] = 0
assert_raises(S3PayloadError, self._payload._check_size)
assert_equal(self._payload._check_size(), False)

def test_gz_reader(self):
"""S3Payload - GZ Reader"""
Expand Down

0 comments on commit a37bf13

Please sign in to comment.