From 497795cc3770a6ffac5501a90039341033e6a4b0 Mon Sep 17 00:00:00 2001 From: David Read Date: Fri, 7 Dec 2018 20:59:00 +0000 Subject: [PATCH] datastore dump is NOT limited by rows_max. --- ckanext/datastore/controller.py | 27 ++++++++++---------------- ckanext/datastore/logic/action.py | 24 +++-------------------- ckanext/datastore/logic/schema.py | 4 +++- ckanext/datastore/tests/test_dump.py | 17 +++++----------- ckanext/datastore/tests/test_search.py | 6 ++---- doc/maintaining/configuration.rst | 1 - doc/maintaining/datastore.rst | 7 ------- 7 files changed, 23 insertions(+), 63 deletions(-) diff --git a/ckanext/datastore/controller.py b/ckanext/datastore/controller.py index e99bf7296c9..a49ad6ea645 100644 --- a/ckanext/datastore/controller.py +++ b/ckanext/datastore/controller.py @@ -174,22 +174,18 @@ def result_page(offs, lim): 'include_total': False, }, **search_params)) - # number of records the user can get is bounded by rows_max - rows_max = int(config.get('ckan.datastore.search.rows_max', 32000)) - if limit > rows_max: - limit = rows_max - - def set_header(num_records_written): - records_are_up_to_the_limit = num_records_written >= rows_max - response.headers['X-Records-Up-To-Rows-Max'] = \ - str(records_are_up_to_the_limit).lower() - result = result_page(offset, limit) + if result['limit'] != limit: + # The limit has been reduced to ckan.datastore.search.rows_max + # so let's page over that amount instead + paginate_by = result['limit'] + else: + paginate_by = PAGINATE_BY + with start_writer(result['fields']) as wr: while True: if limit is not None and limit <= 0: - set_header(num_records_written=offset) break records = result['records'] @@ -197,18 +193,15 @@ def set_header(num_records_written): wr.write_records(records) if records_format == 'objects' or records_format == 'lists': - if len(records) < PAGINATE_BY: - set_header(num_records_written=offset + len(records)) + if len(records) < paginate_by: break elif not records: - set_header(num_records_written=offset) break - offset += PAGINATE_BY + offset += paginate_by if limit is not None: - limit -= PAGINATE_BY + limit -= paginate_by if limit <= 0: - set_header(num_records_written=offset + limit) break result = result_page(offset, limit) diff --git a/ckanext/datastore/logic/action.py b/ckanext/datastore/logic/action.py index 75a2a6e757f..e33ea0c58c4 100644 --- a/ckanext/datastore/logic/action.py +++ b/ckanext/datastore/logic/action.py @@ -473,8 +473,9 @@ def datastore_search(context, data_dict): :type fields: list of dictionaries :param offset: query offset value :type offset: int - :param limit: query limit value (which is bounded to site configuration - value ``ckan.datastore.search.rows_max`` which has default of 32000) + :param limit: queried limit value (if the requested ``limit`` was above the + ``ckan.datastore.search.rows_max`` value then this response ``limit`` + will be set to the value of ``ckan.datastore.search.rows_max``) :type limit: int :param filters: query filters :type filters: list of dictionaries @@ -484,12 +485,6 @@ def datastore_search(context, data_dict): :type total_was_estimated: bool :param records: list of matching results :type records: depends on records_format value passed - :param records_truncated: indicates whether the number of records returned - was limited by the internal limit, which is 32000 records (or other - value set in the site's configuration - ``ckan.datastore.search.rows_max``). If records are truncated by this, - this key has value True, otherwise the key is not returned at all. - :type records_truncated: bool ''' backend = DatastoreBackend.get_active_backend() @@ -498,13 +493,6 @@ def datastore_search(context, data_dict): if errors: raise p.toolkit.ValidationError(errors) - rows_max = int(config.get('ckan.datastore.search.rows_max', 32000)) - if data_dict['limit'] > rows_max: - limit_reduced_by_rows_max = True - data_dict['limit'] = rows_max - else: - limit_reduced_by_rows_max = False - res_id = data_dict['resource_id'] if data_dict['resource_id'] not in WHITELISTED_RESOURCES: @@ -527,12 +515,6 @@ def datastore_search(context, data_dict): result.pop('id', None) result.pop('connection_url', None) - # NB this is broken as result['records'] is a string - need total - if limit_reduced_by_rows_max and len(result['records']) == rows_max: - result['records'] = result['records'][:-1] - # result['total'] == rows_max - assert result['total'] == rows_max + 1 - data_dict['records_truncated'] = True return result diff --git a/ckanext/datastore/logic/schema.py b/ckanext/datastore/logic/schema.py index 160ff3f5fab..43d1e1be588 100644 --- a/ckanext/datastore/logic/schema.py +++ b/ckanext/datastore/logic/schema.py @@ -169,7 +169,9 @@ def datastore_search_schema(): 'plain': [ignore_missing, boolean_validator], 'filters': [ignore_missing, json_validator], 'language': [ignore_missing, text_type], - 'limit': [default(100), natural_number_validator], + 'limit': [default(100), natural_number_validator, + limit_to_configured_maximum('ckan.datastore.search.rows_max', + 32000)], 'offset': [ignore_missing, int_validator], 'fields': [ignore_missing, list_of_strings_or_string], 'sort': [ignore_missing, list_of_strings_or_string], diff --git a/ckanext/datastore/tests/test_dump.py b/ckanext/datastore/tests/test_dump.py index d1bff296208..197b9af62fb 100644 --- a/ckanext/datastore/tests/test_dump.py +++ b/ckanext/datastore/tests/test_dump.py @@ -444,9 +444,9 @@ def test_dump_with_low_rows_max(self): app = self._get_test_app() response = app.get('/datastore/dump/{0}'.format(str(resource['id']))) assert_equals('_id,book\r\n' - '1,annakarenina\n', + '1,annakarenina\n' + '2,warandpeace\n', response.body) - assert response.headers['X-Records-Up-To-Rows-Max'] == 'true' @mock.patch('ckanext.datastore.controller.PAGINATE_BY', 5) def test_dump_pagination(self): @@ -465,7 +465,6 @@ def test_dump_pagination(self): '1,0\n2,1\n3,2\n4,3\n5,4\n6,5\n7,6\n8,7\n9,8\n10,9\n' '11,10\n12,11\n', response.body) - assert response.headers['X-Records-Up-To-Rows-Max'] == 'false' @helpers.change_config('ckan.datastore.search.rows_max', '7') @mock.patch('ckanext.datastore.controller.PAGINATE_BY', 5) @@ -485,7 +484,6 @@ def test_dump_pagination_csv_with_limit(self): '_id,record\r\n' '1,0\n2,1\n3,2\n4,3\n5,4\n6,5\n', response.body) - assert response.headers['X-Records-Up-To-Rows-Max'] == 'false' @helpers.change_config('ckan.datastore.search.rows_max', '7') @mock.patch('ckanext.datastore.controller.PAGINATE_BY', 6) @@ -505,7 +503,6 @@ def test_dump_pagination_csv_with_limit_same_as_paginate(self): '_id,record\r\n' '1,0\n2,1\n3,2\n4,3\n5,4\n6,5\n', response.body) - assert response.headers['X-Records-Up-To-Rows-Max'] == 'false' @helpers.change_config('ckan.datastore.search.rows_max', '6') @mock.patch('ckanext.datastore.controller.PAGINATE_BY', 5) @@ -522,9 +519,8 @@ def test_dump_pagination_with_rows_max(self): response = app.get('/datastore/dump/{0}?limit=7'.format(str(resource['id']))) assert_equals( '_id,record\r\n' - '1,0\n2,1\n3,2\n4,3\n5,4\n6,5\n', + '1,0\n2,1\n3,2\n4,3\n5,4\n6,5\n7,6\n', response.body) - assert response.headers['X-Records-Up-To-Rows-Max'] == 'true' @helpers.change_config('ckan.datastore.search.rows_max', '6') @mock.patch('ckanext.datastore.controller.PAGINATE_BY', 6) @@ -541,9 +537,8 @@ def test_dump_pagination_with_rows_max_same_as_paginate(self): response = app.get('/datastore/dump/{0}?limit=7'.format(str(resource['id']))) assert_equals( '_id,record\r\n' - '1,0\n2,1\n3,2\n4,3\n5,4\n6,5\n', + '1,0\n2,1\n3,2\n4,3\n5,4\n6,5\n7,6\n', response.body) - assert response.headers['X-Records-Up-To-Rows-Max'] == 'true' @helpers.change_config('ckan.datastore.search.rows_max', '7') @mock.patch('ckanext.datastore.controller.PAGINATE_BY', 5) @@ -565,7 +560,6 @@ def test_dump_pagination_json_with_limit(self): ' "records": [\n [1,0],\n [2,1],\n [3,2],\n [4,3],\n' ' [5,4],\n [6,5]\n]}\n', response.body) - assert response.headers['X-Records-Up-To-Rows-Max'] == 'false' @helpers.change_config('ckan.datastore.search.rows_max', '6') @mock.patch('ckanext.datastore.controller.PAGINATE_BY', 5) @@ -585,6 +579,5 @@ def test_dump_pagination_json_with_rows_max(self): '{\n "fields": [{"type":"int","id":"_id"},' '{"type":"int4","id":"record"}],\n' ' "records": [\n [1,0],\n [2,1],\n [3,2],\n [4,3],\n' - ' [5,4],\n [6,5]\n]}\n', + ' [5,4],\n [6,5],\n [7,6]\n]}\n', response.body) - assert response.headers['X-Records-Up-To-Rows-Max'] == 'true' diff --git a/ckanext/datastore/tests/test_search.py b/ckanext/datastore/tests/test_search.py index c0116ed8b7e..b107a05bcd4 100644 --- a/ckanext/datastore/tests/test_search.py +++ b/ckanext/datastore/tests/test_search.py @@ -301,9 +301,7 @@ def test_search_limit(self): result = helpers.call_action('datastore_search', **search_data) assert_equals(result['total'], 2) assert_equals(result['records'], [{u'the year': 2014, u'_id': 1}]) - # it's limited by the user-given limit, rather than the rows_max - # setting, so it is not records_truncated - assert 'records_truncated' not in result + assert_equals(result['limit'], 1) def test_search_limit_invalid(self): resource = factories.Resource() @@ -363,7 +361,7 @@ def test_search_limit_config_default(self): result = helpers.call_action('datastore_search', **search_data) assert_equals(result['total'], 2) assert_equals(result['records'], [{u'the year': 2014, u'_id': 1}]) - assert_equals(result[u'records_truncated'], True) + assert_equals(result['limit'], 1) class TestDatastoreSearchLegacyTests(DatastoreLegacyTestBase): diff --git a/doc/maintaining/configuration.rst b/doc/maintaining/configuration.rst index 0bdfb4f4de5..767361284c4 100644 --- a/doc/maintaining/configuration.rst +++ b/doc/maintaining/configuration.rst @@ -301,7 +301,6 @@ Maximum allowed value for the number of rows returned by the datastore. Specifically this limits: * ``datastore_search``'s ``limit`` parameter. * ``datastore_search_sql`` queries have this limit inserted. -* Datastore 'dump' at /datastore/dump/{RESOURCE-ID} Site Settings ------------- diff --git a/doc/maintaining/datastore.rst b/doc/maintaining/datastore.rst index 102942339c1..f38aad2f882 100644 --- a/doc/maintaining/datastore.rst +++ b/doc/maintaining/datastore.rst @@ -252,13 +252,6 @@ tab-separated file use A number of parameters from :meth:`~ckanext.datastore.logic.action.datastore_search` can be used: ``offset``, ``limit``, ``filters``, ``q``, ``distinct``, ``plain``, ``language``, ``fields``, ``sort`` -The number of records returned is limited by site configuration -``ckan.datastore.search.rows_max``, which has a default of 32000. -Whether or not the number of records returned reaches this limit is indicated -by a HTTP header: - - X-Records-Up-To-Rows-Max: true - .. _CSV: https://en.wikipedia.org/wiki/Comma-separated_values