Skip to content

Commit

Permalink
[#3390] datastore dump format=json
Browse files Browse the repository at this point in the history
  • Loading branch information
wardi committed Jan 6, 2017
1 parent da3b957 commit 0815bee
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 7 deletions.
5 changes: 4 additions & 1 deletion ckanext/datastore/controller.py
Expand Up @@ -18,12 +18,13 @@
from ckanext.datastore.writer import (
csv_writer,
tsv_writer,
json_writer,
)

int_validator = get_validator('int_validator')
boolean_validator = get_validator('boolean_validator')

DUMP_FORMATS = 'csv', 'tsv'
DUMP_FORMATS = 'csv', 'tsv', 'json'
PAGINATE_BY = 10000


Expand All @@ -45,6 +46,8 @@ def start_writer(columns):
return csv_writer(response, columns, resource_id, bom)
if fmt == 'tsv':
return tsv_writer(response, columns, resource_id, bom)
if fmt == 'json':
return json_writer(response, columns, resource_id, bom)
abort(400, _(
u'format: must be one of %s') % u', '.join(DUMP_FORMATS))

Expand Down
58 changes: 55 additions & 3 deletions ckanext/datastore/writer.py
@@ -1,4 +1,6 @@
from contextlib import contextmanager
from email.utils import encode_rfc2231
import json

import unicodecsv

Expand All @@ -24,7 +26,8 @@ def csv_writer(response, columns, name=None, bom=False):
response.headers['Content-Type'] = 'text/csv; charset=utf-8'
if name:
response.headers['Content-disposition'] = (
'attachment; filename="{name}.csv"'.format(name=name))
'attachment; filename="{name}.csv"'.format(
name=encode_rfc2231(name)))
wr = unicodecsv.writer(response, encoding='utf-8')
if bom:
response.write(UTF8_BOM)
Expand All @@ -49,13 +52,62 @@ def tsv_writer(response, columns, name=None, bom=False):

if hasattr(response, 'headers'):
response.headers['Content-Type'] = (
'text/csv;tab-separated-values charset=utf-8')
'text/tab-separated-values; charset=utf-8')
if name:
response.headers['Content-disposition'] = (
'attachment; filename="{name}.tsv"'.format(name=name))
'attachment; filename="{name}.tsv"'.format(
name=encode_rfc2231(name)))
wr = unicodecsv.writer(
response, encoding='utf-8', dialect=unicodecsv.excel_tab)
if bom:
response.write(UTF8_BOM)
wr.writerow(columns)
yield wr


@contextmanager
def json_writer(response, columns, name=None, bom=False):
u'''Context manager for writing UTF-8 JSON data to response
:param response: file-like or response-like object for writing
data and headers (response-like objects only)
:param columns: list of column names
:param name: file name (for headers, response-like objects only)
:param bom: True to include a UTF-8 BOM at the start of the file
>>> with json_writer(response, fields) as d:
>>> d.writerow(row1)
>>> d.writerow(row2)
'''

if hasattr(response, 'headers'):
response.headers['Content-Type'] = (
'application/json; charset=utf-8')
if name:
response.headers['Content-disposition'] = (
'attachment; filename="{name}.json"'.format(
name=encode_rfc2231(name)))
if bom:
response.write(UTF8_BOM)
response.write(b'{\n "data": [')
yield JSONWriter(response, columns)
response.write(b'\n]}\n')


class JSONWriter(object):
def __init__(self, response, columns):
self.response = response
self.columns = columns
self.first = True

def writerow(self, row):
if self.first:
self.first = False
self.response.write(b'\n ')
else:
self.response.write(b',\n ')
self.response.write(json.dumps(
{k: v for (k, v) in zip(self.columns, row)},
ensure_ascii=False,
separators=(',', ':'),
sort_keys=True).encode('utf-8'))
8 changes: 5 additions & 3 deletions doc/maintaining/datastore.rst
Expand Up @@ -275,14 +275,16 @@ API reference

.. _dump:

Download resource as CSV
------------------------
Download resource
-----------------

A DataStore resource can be downloaded in the `CSV`_ file format from ``{CKAN-URL}/datastore/dump/{RESOURCE-ID}``.

For an Excel-compatible CSV file use ``{CKAN-URL}/datastore/dump/{RESOURCE-ID}?bom=true``.

For tab-separated values use ``{CKAN-URL}/datastore/dump/{RESOURCE-ID}?format=tsv``.
Other formats are also supported. For tab-separated values use
``{CKAN-URL}/datastore/dump/{RESOURCE-ID}?format=tsv`` and for JSON use
``{CKAN-URL}/datastore/dump/{RESOURCE-ID}?format=json``.

.. _CSV: https://en.wikipedia.org/wiki/Comma-separated_values

Expand Down

0 comments on commit 0815bee

Please sign in to comment.