From 417f1ea363baa334b260a84c0ca12b3779f02cf8 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Fri, 6 Jan 2017 13:14:14 -0500 Subject: [PATCH] [#3390] datastore: dump TSV with format=tsv --- ckanext/datastore/controller.py | 36 ++++++++++++++++++++++++--------- ckanext/datastore/plugin.py | 2 +- doc/maintaining/datastore.rst | 4 +++- 3 files changed, 31 insertions(+), 11 deletions(-) diff --git a/ckanext/datastore/controller.py b/ckanext/datastore/controller.py index 9e5bdec40f3..81d3759b025 100644 --- a/ckanext/datastore/controller.py +++ b/ckanext/datastore/controller.py @@ -21,12 +21,12 @@ boolean_validator = get_validator('boolean_validator') UTF8_BOM = u'\uFEFF'.encode('utf-8') - +DUMP_FORMATS = 'csv', 'tsv' PAGINATE_BY = 10000 class DatastoreController(BaseController): - def dump_csv(self, resource_id): + def dump(self, resource_id): try: offset = int_validator(request.GET.get('offset', 0), {}) except Invalid as e: @@ -36,6 +36,30 @@ def dump_csv(self, resource_id): except Invalid as e: abort(400, u'limit: ' + e.error) bom = boolean_validator(request.GET.get('bom'), {}) + fmt = request.GET.get('format', 'csv') + + def start_writer(): + if fmt == 'csv': + response.headers['Content-Type'] = 'text/csv; charset=utf-8' + response.headers['Content-disposition'] = ( + 'attachment; filename="{name}.csv"'.format( + name=resource_id)) + wr = csv.writer(response, encoding='utf-8') + elif fmt == 'tsv': + response.headers['Content-Type'] = ( + 'text/tab-separated-values; charset=utf-8') + response.headers['Content-disposition'] = ( + 'attachment; filename="{name}.tsv"'.format( + name=resource_id)) + wr = csv.writer( + response, encoding='utf-8', dialect=csv.excel_tab) + else: + abort(400, + _(u'format: must be one of %s') % u', '.join(DUMP_FORMATS)) + + if bom: + response.write(UTF8_BOM) + return wr wr = None while True: @@ -54,15 +78,9 @@ def dump_csv(self, resource_id): abort(404, _('DataStore resource not found')) if not wr: - response.headers['Content-Type'] = 'text/csv; charset=utf-8' - response.headers['Content-disposition'] = ( - 'attachment; filename="{name}.csv"'.format( - name=resource_id)) - wr = csv.writer(response, encoding='utf-8') + wr = start_writer() header = [x['id'] for x in result['fields']] - if bom: - response.write(UTF8_BOM) wr.writerow(header) for record in result['records']: diff --git a/ckanext/datastore/plugin.py b/ckanext/datastore/plugin.py index 4b725fed813..073ffcee214 100644 --- a/ckanext/datastore/plugin.py +++ b/ckanext/datastore/plugin.py @@ -248,7 +248,7 @@ def get_auth_functions(self): def before_map(self, m): m.connect('/datastore/dump/{resource_id}', controller='ckanext.datastore.controller:DatastoreController', - action='dump_csv') + action='dump') return m def before_show(self, resource_dict): diff --git a/doc/maintaining/datastore.rst b/doc/maintaining/datastore.rst index 8878d41ef6c..a19294f6c9b 100644 --- a/doc/maintaining/datastore.rst +++ b/doc/maintaining/datastore.rst @@ -280,7 +280,9 @@ Download resource as CSV A DataStore resource can be downloaded in the `CSV`_ file format from ``{CKAN-URL}/datastore/dump/{RESOURCE-ID}``. -For an Excel-compatible CSV file use ``{CKAN-URL}/datastore/dump/{RESOURCE-ID}&bom=true`` +For an Excel-compatible CSV file use ``{CKAN-URL}/datastore/dump/{RESOURCE-ID}?bom=true``. + +For tab-separated values use ``{CKAN-URL}/datastore/dump/{RESOURCE-ID}?format=tsv``. .. _CSV: https://en.wikipedia.org/wiki/Comma-separated_values