Skip to content
This repository has been archived by the owner on Mar 24, 2021. It is now read-only.

Commit

Permalink
Merge pull request #360 from alphagov/feature/gzip-bomb
Browse files Browse the repository at this point in the history
Defend against zip bombs
  • Loading branch information
robyoung committed Sep 23, 2014
2 parents 4727906 + d71b451 commit 4ec66ce
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 5 deletions.
34 changes: 31 additions & 3 deletions backdrop/write/decompressing_request.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import gzip

from flask import current_app, g
from flask import current_app, g, abort
from flask.wrappers import Request
from io import BytesIO

Expand Down Expand Up @@ -34,8 +34,7 @@ def get_data(self, *args, **kwargs):

gzipped_content = BytesIO(bytes)

decompressed_content = gzip.GzipFile(mode='rb',
fileobj=gzipped_content)
decompressed_content = SafeGzipDecompressor(gzipped_content)

data = decompressed_content.read().decode('utf-8')

Expand All @@ -47,3 +46,32 @@ def get_data(self, *args, **kwargs):
g._has_decompressed_entity = True

return super(DecompressingRequest, self).get_data(*args, **kwargs)


class SafeGzipDecompressor(object):
"""Class that decompresses gzip streams, and supports a maximum
size to avoid zipbombs.
See http://en.wikipedia.org/wiki/Zip_bomb
"""
blocksize = 8 * 1024

def __init__(self, fileobj, maxsize=10 * 1024 * 1024):
self.maxsize = maxsize
self.gzipobj = gzip.GzipFile(mode='rb', fileobj=fileobj)

def read(self):
b = [""]
buf_size = 0
while True:
data = self.gzipobj.read(self.blocksize)
if not data:
break
b.append(data)
buf_size += len(data)

if buf_size > self.maxsize:
# Compressed file is too large
abort(413)

return "".join(b)
Binary file added features/fixtures/gzip-bomb.gz
Binary file not shown.
12 changes: 10 additions & 2 deletions features/steps/write_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ def step(context, token):
context.bearer_token = token


@given(u'I have compressed content')
def step(context):
context.content_encoding = True


@when('I "{http_method}" the compressed request body to the path "{path}"')
def step(context, http_method, path):
assert http_method in ('POST', 'PUT'), "Only support POST, PUT"
Expand Down Expand Up @@ -157,9 +162,12 @@ def step(context, collection, size):


def _make_headers_from_context(context):
result = []
if context and 'bearer_token' in context:
return [('Authorization', "Bearer %s" % context.bearer_token)]
return []
result.append(('Authorization', "Bearer %s" % context.bearer_token))
if context and 'content_encoding' in context:
result.append(('Content-Encoding', u'gzip'))
return result


def _make_malformed_header_from_context(context):
Expand Down
11 changes: 11 additions & 0 deletions features/write_api/write_api.feature
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,14 @@ Feature: the performance platform write api
and I use the bearer token for the data_set
when I "POST" the compressed request body to the path "/data/group/type"
then I should get back a status of "200"

Scenario: posting gzip-bomb payload to a data-set
Given I have the data in "gzip-bomb.gz"
and I have a data_set named "data_with_times" with settings
| key | value |
| data_group | "group" |
| data_type | "type" |
and I use the bearer token for the data_set
and I have compressed content
when I POST to the specific path "/data/group/type"
then I should get back a status of "413"

0 comments on commit 4ec66ce

Please sign in to comment.