Skip to content

Commit

Permalink
[#3390] factor out csv/tsv writing into context managers
Browse files Browse the repository at this point in the history
  • Loading branch information
wardi committed Jan 6, 2017
1 parent 417f1ea commit da3b957
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 40 deletions.
67 changes: 27 additions & 40 deletions ckanext/datastore/controller.py
@@ -1,7 +1,6 @@
# encoding: utf-8

import StringIO
import unicodecsv as csv

import pylons

Expand All @@ -16,11 +15,14 @@
BaseController,
abort,
)
from ckanext.datastore.writer import (
csv_writer,
tsv_writer,
)

int_validator = get_validator('int_validator')
boolean_validator = get_validator('boolean_validator')

UTF8_BOM = u'\uFEFF'.encode('utf-8')
DUMP_FORMATS = 'csv', 'tsv'
PAGINATE_BY = 10000

Expand All @@ -38,36 +40,17 @@ def dump(self, resource_id):
bom = boolean_validator(request.GET.get('bom'), {})
fmt = request.GET.get('format', 'csv')

def start_writer():
def start_writer(columns):
if fmt == 'csv':
response.headers['Content-Type'] = 'text/csv; charset=utf-8'
response.headers['Content-disposition'] = (
'attachment; filename="{name}.csv"'.format(
name=resource_id))
wr = csv.writer(response, encoding='utf-8')
elif fmt == 'tsv':
response.headers['Content-Type'] = (
'text/tab-separated-values; charset=utf-8')
response.headers['Content-disposition'] = (
'attachment; filename="{name}.tsv"'.format(
name=resource_id))
wr = csv.writer(
response, encoding='utf-8', dialect=csv.excel_tab)
else:
abort(400,
_(u'format: must be one of %s') % u', '.join(DUMP_FORMATS))

if bom:
response.write(UTF8_BOM)
return wr

wr = None
while True:
if limit is not None and limit <= 0:
break
return csv_writer(response, columns, resource_id, bom)
if fmt == 'tsv':
return tsv_writer(response, columns, resource_id, bom)
abort(400, _(
u'format: must be one of %s') % u', '.join(DUMP_FORMATS))

def result_page(offset, limit):
try:
result = get_action('datastore_search')(None, {
return get_action('datastore_search')(None, {
'resource_id': resource_id,
'limit':
PAGINATE_BY if limit is None
Expand All @@ -77,17 +60,21 @@ def start_writer():
except ObjectNotFound:
abort(404, _('DataStore resource not found'))

if not wr:
wr = start_writer()
result = result_page(offset, limit)
columns = [x['id'] for x in result['fields']]

with start_writer(columns) as wr:
while True:
if limit is not None and limit <= 0:
break

header = [x['id'] for x in result['fields']]
wr.writerow(header)
for record in result['records']:
wr.writerow([record[column] for column in columns])

for record in result['records']:
wr.writerow([record[column] for column in header])
if len(result['records']) < PAGINATE_BY:
break
offset += PAGINATE_BY
if limit is not None:
limit -= PAGINATE_BY

if len(result['records']) < PAGINATE_BY:
break
offset += PAGINATE_BY
if limit is not None:
limit -= PAGINATE_BY
result = result_page(offset, limit)
61 changes: 61 additions & 0 deletions ckanext/datastore/writer.py
@@ -0,0 +1,61 @@
from contextlib import contextmanager

import unicodecsv

UTF8_BOM = u'\uFEFF'.encode('utf-8')


@contextmanager
def csv_writer(response, columns, name=None, bom=False):
u'''Context manager for writing UTF-8 CSV data to response
:param response: file-like or response-like object for writing
data and headers (response-like objects only)
:param columns: list of column names
:param name: file name (for headers, response-like objects only)
:param bom: True to include a UTF-8 BOM at the start of the file
>>> with csv_writer(response, fields) as d:
>>> d.writerow(row1)
>>> d.writerow(row2)
'''

if hasattr(response, 'headers'):
response.headers['Content-Type'] = 'text/csv; charset=utf-8'
if name:
response.headers['Content-disposition'] = (
'attachment; filename="{name}.csv"'.format(name=name))
wr = unicodecsv.writer(response, encoding='utf-8')
if bom:
response.write(UTF8_BOM)
wr.writerow(columns)
yield wr


@contextmanager
def tsv_writer(response, columns, name=None, bom=False):
u'''Context manager for writing UTF-8 TSV data to response
:param response: file-like or response-like object for writing
data and headers (response-like objects only)
:param columns: list of column names
:param name: file name (for headers, response-like objects only)
:param bom: True to include a UTF-8 BOM at the start of the file
>>> with tsv_writer(response, fields) as d:
>>> d.writerow(row1)
>>> d.writerow(row2)
'''

if hasattr(response, 'headers'):
response.headers['Content-Type'] = (
'text/csv;tab-separated-values charset=utf-8')
if name:
response.headers['Content-disposition'] = (
'attachment; filename="{name}.tsv"'.format(name=name))
wr = unicodecsv.writer(
response, encoding='utf-8', dialect=unicodecsv.excel_tab)
if bom:
response.write(UTF8_BOM)
wr.writerow(columns)
yield wr

0 comments on commit da3b957

Please sign in to comment.