From da3b9571fe0ed4536a9ea34398c15dfc7c4faf3d Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Fri, 6 Jan 2017 15:35:54 -0500 Subject: [PATCH] [#3390] factor out csv/tsv writing into context managers --- ckanext/datastore/controller.py | 67 +++++++++++++-------------------- ckanext/datastore/writer.py | 61 ++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 40 deletions(-) create mode 100644 ckanext/datastore/writer.py diff --git a/ckanext/datastore/controller.py b/ckanext/datastore/controller.py index 81d3759b025..559a1d2dfb7 100644 --- a/ckanext/datastore/controller.py +++ b/ckanext/datastore/controller.py @@ -1,7 +1,6 @@ # encoding: utf-8 import StringIO -import unicodecsv as csv import pylons @@ -16,11 +15,14 @@ BaseController, abort, ) +from ckanext.datastore.writer import ( + csv_writer, + tsv_writer, +) int_validator = get_validator('int_validator') boolean_validator = get_validator('boolean_validator') -UTF8_BOM = u'\uFEFF'.encode('utf-8') DUMP_FORMATS = 'csv', 'tsv' PAGINATE_BY = 10000 @@ -38,36 +40,17 @@ def dump(self, resource_id): bom = boolean_validator(request.GET.get('bom'), {}) fmt = request.GET.get('format', 'csv') - def start_writer(): + def start_writer(columns): if fmt == 'csv': - response.headers['Content-Type'] = 'text/csv; charset=utf-8' - response.headers['Content-disposition'] = ( - 'attachment; filename="{name}.csv"'.format( - name=resource_id)) - wr = csv.writer(response, encoding='utf-8') - elif fmt == 'tsv': - response.headers['Content-Type'] = ( - 'text/tab-separated-values; charset=utf-8') - response.headers['Content-disposition'] = ( - 'attachment; filename="{name}.tsv"'.format( - name=resource_id)) - wr = csv.writer( - response, encoding='utf-8', dialect=csv.excel_tab) - else: - abort(400, - _(u'format: must be one of %s') % u', '.join(DUMP_FORMATS)) - - if bom: - response.write(UTF8_BOM) - return wr - - wr = None - while True: - if limit is not None and limit <= 0: - break + return csv_writer(response, columns, resource_id, bom) + if fmt == 'tsv': + return tsv_writer(response, columns, resource_id, bom) + abort(400, _( + u'format: must be one of %s') % u', '.join(DUMP_FORMATS)) + def result_page(offset, limit): try: - result = get_action('datastore_search')(None, { + return get_action('datastore_search')(None, { 'resource_id': resource_id, 'limit': PAGINATE_BY if limit is None @@ -77,17 +60,21 @@ def start_writer(): except ObjectNotFound: abort(404, _('DataStore resource not found')) - if not wr: - wr = start_writer() + result = result_page(offset, limit) + columns = [x['id'] for x in result['fields']] + + with start_writer(columns) as wr: + while True: + if limit is not None and limit <= 0: + break - header = [x['id'] for x in result['fields']] - wr.writerow(header) + for record in result['records']: + wr.writerow([record[column] for column in columns]) - for record in result['records']: - wr.writerow([record[column] for column in header]) + if len(result['records']) < PAGINATE_BY: + break + offset += PAGINATE_BY + if limit is not None: + limit -= PAGINATE_BY - if len(result['records']) < PAGINATE_BY: - break - offset += PAGINATE_BY - if limit is not None: - limit -= PAGINATE_BY + result = result_page(offset, limit) diff --git a/ckanext/datastore/writer.py b/ckanext/datastore/writer.py new file mode 100644 index 00000000000..b767220ad5a --- /dev/null +++ b/ckanext/datastore/writer.py @@ -0,0 +1,61 @@ +from contextlib import contextmanager + +import unicodecsv + +UTF8_BOM = u'\uFEFF'.encode('utf-8') + + +@contextmanager +def csv_writer(response, columns, name=None, bom=False): + u'''Context manager for writing UTF-8 CSV data to response + + :param response: file-like or response-like object for writing + data and headers (response-like objects only) + :param columns: list of column names + :param name: file name (for headers, response-like objects only) + :param bom: True to include a UTF-8 BOM at the start of the file + + >>> with csv_writer(response, fields) as d: + >>> d.writerow(row1) + >>> d.writerow(row2) + ''' + + if hasattr(response, 'headers'): + response.headers['Content-Type'] = 'text/csv; charset=utf-8' + if name: + response.headers['Content-disposition'] = ( + 'attachment; filename="{name}.csv"'.format(name=name)) + wr = unicodecsv.writer(response, encoding='utf-8') + if bom: + response.write(UTF8_BOM) + wr.writerow(columns) + yield wr + + +@contextmanager +def tsv_writer(response, columns, name=None, bom=False): + u'''Context manager for writing UTF-8 TSV data to response + + :param response: file-like or response-like object for writing + data and headers (response-like objects only) + :param columns: list of column names + :param name: file name (for headers, response-like objects only) + :param bom: True to include a UTF-8 BOM at the start of the file + + >>> with tsv_writer(response, fields) as d: + >>> d.writerow(row1) + >>> d.writerow(row2) + ''' + + if hasattr(response, 'headers'): + response.headers['Content-Type'] = ( + 'text/csv;tab-separated-values charset=utf-8') + if name: + response.headers['Content-disposition'] = ( + 'attachment; filename="{name}.tsv"'.format(name=name)) + wr = unicodecsv.writer( + response, encoding='utf-8', dialect=unicodecsv.excel_tab) + if bom: + response.write(UTF8_BOM) + wr.writerow(columns) + yield wr