Skip to content

Commit

Permalink
X-Records-Up-To-Rows-Max implemented for Datastore dump call. Pretty …
Browse files Browse the repository at this point in the history
…complicated - not sure if it is worth it.
  • Loading branch information
David Read committed Dec 7, 2018
1 parent 54b56ba commit fc0b465
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 8 deletions.
19 changes: 12 additions & 7 deletions ckanext/datastore/controller.py
Expand Up @@ -175,35 +175,40 @@ def result_page(offs, lim):
}, **search_params))

# number of records the user can get is bounded by rows_max
limit = min(limit,
int(config.get('ckan.datastore.search.rows_max', 32000)))
rows_max = int(config.get('ckan.datastore.search.rows_max', 32000))
if limit > rows_max:
limit = rows_max

def set_header(num_records_written):
records_are_up_to_the_limit = num_records_written >= rows_max
response.headers['X-Records-Up-To-Rows-Max'] = \
str(records_are_up_to_the_limit).lower()

result = result_page(offset, limit)

with start_writer(result['fields']) as wr:
while True:
if limit is not None and limit <= 0:
set_header(num_records_written=offset)
break

records = result['records']

wr.write_records(records)

# NB broken because 'records_truncated' is not returned by datastore_search
if result.get('records_truncated', False):
response.headers['X-Records-Truncated'] = 'true'
break

if records_format == 'objects' or records_format == 'lists':
if len(records) < PAGINATE_BY:
set_header(num_records_written=offset + len(records))
break
elif not records:
set_header(num_records_written=offset)
break

offset += PAGINATE_BY
if limit is not None:
limit -= PAGINATE_BY
if limit <= 0:
set_header(num_records_written=offset + limit)
break

result = result_page(offset, limit)
87 changes: 86 additions & 1 deletion ckanext/datastore/tests/test_dump.py
Expand Up @@ -446,7 +446,7 @@ def test_dump_with_low_rows_max(self):
assert_equals('_id,book\r\n'
'1,annakarenina\n',
response.body)
assert response.headers['X-Records-Truncated'] == 'true'
assert response.headers['X-Records-Up-To-Rows-Max'] == 'true'

@mock.patch('ckanext.datastore.controller.PAGINATE_BY', 5)
def test_dump_pagination(self):
Expand All @@ -465,7 +465,9 @@ def test_dump_pagination(self):
'1,0\n2,1\n3,2\n4,3\n5,4\n6,5\n7,6\n8,7\n9,8\n10,9\n'
'11,10\n12,11\n',
response.body)
assert response.headers['X-Records-Up-To-Rows-Max'] == 'false'

@helpers.change_config('ckan.datastore.search.rows_max', '7')
@mock.patch('ckanext.datastore.controller.PAGINATE_BY', 5)
def test_dump_pagination_csv_with_limit(self):
resource = factories.Resource()
Expand All @@ -483,7 +485,67 @@ def test_dump_pagination_csv_with_limit(self):
'_id,record\r\n'
'1,0\n2,1\n3,2\n4,3\n5,4\n6,5\n',
response.body)
assert response.headers['X-Records-Up-To-Rows-Max'] == 'false'

@helpers.change_config('ckan.datastore.search.rows_max', '7')
@mock.patch('ckanext.datastore.controller.PAGINATE_BY', 6)
def test_dump_pagination_csv_with_limit_same_as_paginate(self):
resource = factories.Resource()
data = {
'resource_id': resource['id'],
'force': True,
'records': [{u'record': str(num)} for num in range(12)],
}
helpers.call_action('datastore_create', **data)

app = self._get_test_app()
response = app.get('/datastore/dump/{0}?limit=6'.format(
str(resource['id'])))
assert_equals(
'_id,record\r\n'
'1,0\n2,1\n3,2\n4,3\n5,4\n6,5\n',
response.body)
assert response.headers['X-Records-Up-To-Rows-Max'] == 'false'

@helpers.change_config('ckan.datastore.search.rows_max', '6')
@mock.patch('ckanext.datastore.controller.PAGINATE_BY', 5)
def test_dump_pagination_with_rows_max(self):
resource = factories.Resource()
data = {
'resource_id': resource['id'],
'force': True,
'records': [{u'record': str(num)} for num in range(12)],
}
helpers.call_action('datastore_create', **data)

app = self._get_test_app()
response = app.get('/datastore/dump/{0}?limit=7'.format(str(resource['id'])))
assert_equals(
'_id,record\r\n'
'1,0\n2,1\n3,2\n4,3\n5,4\n6,5\n',
response.body)
assert response.headers['X-Records-Up-To-Rows-Max'] == 'true'

@helpers.change_config('ckan.datastore.search.rows_max', '6')
@mock.patch('ckanext.datastore.controller.PAGINATE_BY', 6)
def test_dump_pagination_with_rows_max_same_as_paginate(self):
resource = factories.Resource()
data = {
'resource_id': resource['id'],
'force': True,
'records': [{u'record': str(num)} for num in range(12)],
}
helpers.call_action('datastore_create', **data)

app = self._get_test_app()
response = app.get('/datastore/dump/{0}?limit=7'.format(str(resource['id'])))
assert_equals(
'_id,record\r\n'
'1,0\n2,1\n3,2\n4,3\n5,4\n6,5\n',
response.body)
assert response.headers['X-Records-Up-To-Rows-Max'] == 'true'

@helpers.change_config('ckan.datastore.search.rows_max', '7')
@mock.patch('ckanext.datastore.controller.PAGINATE_BY', 5)
def test_dump_pagination_json_with_limit(self):
resource = factories.Resource()
Expand All @@ -503,3 +565,26 @@ def test_dump_pagination_json_with_limit(self):
' "records": [\n [1,0],\n [2,1],\n [3,2],\n [4,3],\n'
' [5,4],\n [6,5]\n]}\n',
response.body)
assert response.headers['X-Records-Up-To-Rows-Max'] == 'false'

@helpers.change_config('ckan.datastore.search.rows_max', '6')
@mock.patch('ckanext.datastore.controller.PAGINATE_BY', 5)
def test_dump_pagination_json_with_rows_max(self):
resource = factories.Resource()
data = {
'resource_id': resource['id'],
'force': True,
'records': [{u'record': str(num)} for num in range(12)],
}
helpers.call_action('datastore_create', **data)

app = self._get_test_app()
response = app.get('/datastore/dump/{0}?limit=7&format=json'.format(
str(resource['id'])))
assert_equals(
'{\n "fields": [{"type":"int","id":"_id"},'
'{"type":"int4","id":"record"}],\n'
' "records": [\n [1,0],\n [2,1],\n [3,2],\n [4,3],\n'
' [5,4],\n [6,5]\n]}\n',
response.body)
assert response.headers['X-Records-Up-To-Rows-Max'] == 'true'
1 change: 1 addition & 0 deletions doc/maintaining/configuration.rst
Expand Up @@ -301,6 +301,7 @@ Maximum allowed value for the number of rows returned by the datastore.
Specifically this limits:
* ``datastore_search``'s ``limit`` parameter.
* ``datastore_search_sql`` queries have this limit inserted.
* Datastore 'dump' at /datastore/dump/{RESOURCE-ID}

Site Settings
-------------
Expand Down
10 changes: 10 additions & 0 deletions doc/maintaining/datastore.rst
Expand Up @@ -249,6 +249,16 @@ JSON (``?format=json``) and XML (``?format=xml``). E.g. to download an Excel-com
tab-separated file use
``{CKAN-URL}/datastore/dump/{RESOURCE-ID}?format=tsv&bom=true``.

A number of parameters from :meth:`~ckanext.datastore.logic.action.datastore_search` can be used:
``offset``, ``limit``, ``filters``, ``q``, ``distinct``, ``plain``, ``language``, ``fields``, ``sort``

The number of records returned is limited by site configuration
``ckan.datastore.search.rows_max``, which has a default of 32000.
Whether or not the number of records returned reaches this limit is indicated
by a HTTP header:

X-Records-Up-To-Rows-Max: true

.. _CSV: https://en.wikipedia.org/wiki/Comma-separated_values


Expand Down

0 comments on commit fc0b465

Please sign in to comment.