Merge pull request #360 from alphagov/feature/gzip-bomb

Defend against zip bombs
alphagov · Sep 23, 2014 · 4ec66ce · 4ec66ce
2 parents 4727906 + d71b451
commit 4ec66ce
Show file tree

Hide file tree

Showing 4 changed files with 52 additions and 5 deletions.
diff --git a/backdrop/write/decompressing_request.py b/backdrop/write/decompressing_request.py
@@ -1,6 +1,6 @@
 import gzip
 
-from flask import current_app, g
+from flask import current_app, g, abort
 from flask.wrappers import Request
 from io import BytesIO
 
@@ -34,8 +34,7 @@ def get_data(self, *args, **kwargs):
 
             gzipped_content = BytesIO(bytes)
 
-            decompressed_content = gzip.GzipFile(mode='rb',
-                                                 fileobj=gzipped_content)
+            decompressed_content = SafeGzipDecompressor(gzipped_content)
 
             data = decompressed_content.read().decode('utf-8')
 
@@ -47,3 +46,32 @@ def get_data(self, *args, **kwargs):
             g._has_decompressed_entity = True
 
         return super(DecompressingRequest, self).get_data(*args, **kwargs)
+
+
+class SafeGzipDecompressor(object):
+    """Class that decompresses gzip streams, and supports a maximum
+    size to avoid zipbombs.
+
+    See http://en.wikipedia.org/wiki/Zip_bomb
+    """
+    blocksize = 8 * 1024
+
+    def __init__(self, fileobj, maxsize=10 * 1024 * 1024):
+        self.maxsize = maxsize
+        self.gzipobj = gzip.GzipFile(mode='rb', fileobj=fileobj)
+
+    def read(self):
+        b = [""]
+        buf_size = 0
+        while True:
+            data = self.gzipobj.read(self.blocksize)
+            if not data:
+                break
+            b.append(data)
+            buf_size += len(data)
+
+            if buf_size > self.maxsize:
+                # Compressed file is too large
+                abort(413)
+
+        return "".join(b)
diff --git a/features/fixtures/gzip-bomb.gz b/features/fixtures/gzip-bomb.gz
diff --git a/features/steps/write_api.py b/features/steps/write_api.py
@@ -37,6 +37,11 @@ def step(context, token):
     context.bearer_token = token
 
 
+@given(u'I have compressed content')
+def step(context):
+    context.content_encoding = True
+
+
 @when('I "{http_method}" the compressed request body to the path "{path}"')
 def step(context, http_method, path):
     assert http_method in ('POST', 'PUT'), "Only support POST, PUT"
@@ -157,9 +162,12 @@ def step(context, collection, size):
 
 
 def _make_headers_from_context(context):
+    result = []
     if context and 'bearer_token' in context:
-        return [('Authorization', "Bearer %s" % context.bearer_token)]
-    return []
+        result.append(('Authorization', "Bearer %s" % context.bearer_token))
+    if context and 'content_encoding' in context:
+        result.append(('Content-Encoding', u'gzip'))
+    return result
 
 
 def _make_malformed_header_from_context(context):

diff --git a/features/write_api/write_api.feature b/features/write_api/write_api.feature
@@ -93,3 +93,14 @@ Feature: the performance platform write api
           and I use the bearer token for the data_set
          when I "POST" the compressed request body to the path "/data/group/type"
          then I should get back a status of "200"
+
+    Scenario: posting gzip-bomb payload to a data-set
+        Given I have the data in "gzip-bomb.gz"
+          and I have a data_set named "data_with_times" with settings
+            | key        | value   |
+            | data_group | "group" |
+            | data_type  | "type"  |
+          and I use the bearer token for the data_set
+          and I have compressed content
+         when I POST to the specific path "/data/group/type"
+         then I should get back a status of "413"