Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Added support for remote tables in zipfiles.

  • Loading branch information...
commit 5bcc4fb933834dc9b2ed3d617279f846fc0304b2 1 parent 1aab7a6
Jacob Kaplan-Moss authored

Showing 2 changed files with 28 additions and 2 deletions. Show diff stats Hide diff stats

  1. +20 2 remotetable/api.py
  2. +8 0 tests/__init__.py
22 remotetable/api.py
@@ -6,6 +6,11 @@
6 6 import urllib
7 7 import inspect
8 8 import itertools
  9 +import zipfile
  10 +try:
  11 + from cStringIO import StringIO
  12 +except ImportError:
  13 + from StringIO import StringIO
9 14 from . import parsers
10 15
11 16 def open(url, **kwargs):
@@ -20,7 +25,10 @@ def open(url, **kwargs):
20 25 if callable(parser):
21 26 pass
22 27 elif parser is None:
23   - parser = parsers.guess_parser(url)
  28 + if 'filename' in kwargs:
  29 + parser = parsers.guess_parser(kwargs['filename'])
  30 + else:
  31 + parser = parsers.guess_parser(url)
24 32 else:
25 33 parser = parsers.get_parser(parser)
26 34
@@ -51,7 +59,17 @@ def open(url, **kwargs):
51 59 # Grab the data.
52 60 with requests.settings(accept_gzip=False):
53 61 response = requests.request(**request_kwargs)
54   - results = parser(response.raw, **kwargs).read()
  62 +
  63 + # If we got a `filename` argument then assume this is a zipfile.
  64 + # XXX: would be nice to support gzip, etc.
  65 + if 'filename' in kwargs:
  66 + zf = zipfile.ZipFile(StringIO(response.content))
  67 + stream = zf.open(kwargs.pop('filename'))
  68 + else:
  69 + stream = response.raw
  70 +
  71 + # Now parse the stream.
  72 + results = parser(stream, **kwargs).read()
55 73
56 74 # Process select/omit.
57 75 if select_func:
8 tests/__init__.py
@@ -93,3 +93,11 @@ def test_invalid_named_parser(self):
93 93 def test_xls(self):
94 94 t = remotetable.open('http://cloud.github.com/downloads/seamusabshere/remote_table/remote_table_row_hash_test.alternate_order.xls')
95 95 self.assertEqual(t[0]['header2'], 'value2')
  96 +
  97 + def test_open_csv_inside_zipfile(self):
  98 + t = remotetable.open('http://www.epa.gov/climatechange/emissions/downloads10/2010-Inventory-Annex-Tables.zip',
  99 + filename = 'Annex Tables/Annex 3/Table A-93.csv',
  100 + skip = 1,
  101 + select = lambda row: row['Vehicle Age'].strip().isdigit()
  102 + )
  103 + self.assertEqual(t[0]['LDGV'], '9.09%')

0 comments on commit 5bcc4fb

Please sign in to comment.
Something went wrong with that request. Please try again.