diff --git a/ckan/lib/navl/validators.py b/ckan/lib/navl/validators.py index cfcb1a2d5d2..b65b20b1386 100644 --- a/ckan/lib/navl/validators.py +++ b/ckan/lib/navl/validators.py @@ -2,7 +2,7 @@ import ckan.lib.navl.dictization_functions as df -from ckan.common import _ +from ckan.common import _, config missing = df.missing StopOnError = df.StopOnError @@ -123,3 +123,18 @@ def unicode_only(value): if not isinstance(value, unicode): raise Invalid(_('Must be a Unicode string value')) return value + +def limit_to_configured_maximum(config_option, default_limit): + ''' + If the value is over a limit, it changes it to the limit. The limit is + defined by a configuration option, or if that is not set, a given int + default_limit. + ''' + def callable(key, data, errors, context): + + value = data.get(key) + limit = int(config.get(config_option, default_limit)) + if value > limit: + data[key] = limit + + return callable diff --git a/ckanext/datastore/backend/postgres.py b/ckanext/datastore/backend/postgres.py index c3b64e75423..374092cb7cb 100644 --- a/ckanext/datastore/backend/postgres.py +++ b/ckanext/datastore/backend/postgres.py @@ -1342,7 +1342,7 @@ def _execute_single_statement_copy_to(context, sql_string, where_values, buf): cursor.close() -def format_results(context, results, data_dict): +def format_results(context, results, data_dict, rows_max): result_fields = [] for field in results.cursor.description: result_fields.append({ @@ -1358,6 +1358,8 @@ def format_results(context, results, data_dict): field['type']) records.append(converted_row) data_dict['records'] = records + if data_dict.get('records_truncated', False): + data_dict['records'] = data_dict['records'][:rows_max] data_dict['fields'] = result_fields return _unrename_json_field(data_dict) @@ -1504,6 +1506,11 @@ def search_sql(context, data_dict): sql = data_dict['sql'].replace('%', '%%') + # limit the number of results to ckan.datastore.search.rows_max + 1 + # (the +1 is so that we know if the results went over the limit or not) + rows_max = int(config.get('ckan.datastore.search.rows_max', 32000)) + sql = 'SELECT * FROM ({0}) AS blah LIMIT {1} ;'.format(sql, rows_max + 1) + try: context['connection'].execute( @@ -1520,7 +1527,10 @@ def search_sql(context, data_dict): results = context['connection'].execute(sql) - return format_results(context, results, data_dict) + if results.rowcount == rows_max + 1: + data_dict['records_truncated'] = True + + return format_results(context, results, data_dict, rows_max) except ProgrammingError, e: if e.orig.pgcode == _PG_ERR_CODE['permission_denied']: @@ -1695,6 +1705,11 @@ def configure(self, config): else: self._check_urls_and_permissions() + # check rows_max is valid on CKAN start-up + rows_max = config.get('ckan.datastore.search.rows_max') + if rows_max is not None: + int(rows_max) + def datastore_delete(self, context, data_dict, fields_types, query_dict): query_dict['where'] += _where_clauses(data_dict, fields_types) return query_dict @@ -1709,6 +1724,7 @@ def datastore_search(self, context, data_dict, fields_types, query_dict): field_ids = fields_types.keys() ts_query, rank_column = _textsearch_query(data_dict) + # add default limit here just in case - already defaulted in the schema limit = data_dict.get('limit', 100) offset = data_dict.get('offset', 0) diff --git a/ckanext/datastore/controller.py b/ckanext/datastore/controller.py index 902cb37f40e..664de4805b1 100644 --- a/ckanext/datastore/controller.py +++ b/ckanext/datastore/controller.py @@ -14,6 +14,7 @@ render, c, h, + config, ) from ckanext.datastore.writer import ( csv_writer, @@ -141,6 +142,15 @@ def result_page(offs, lim): result = result_page(offset, limit) + if result['limit'] != limit: + # `limit` (from PAGINATE_BY) must have been more than + # ckan.datastore.search.rows_max, so datastore_search responded with a + # limit matching ckan.datastore.search.rows_max. So we need to paginate + # by that amount instead, otherwise we'll have gaps in the records. + paginate_by = result['limit'] + else: + paginate_by = PAGINATE_BY + with start_writer(result['fields']) as wr: while True: if limit is not None and limit <= 0: @@ -151,14 +161,14 @@ def result_page(offs, lim): wr.write_records(records) if records_format == 'objects' or records_format == 'lists': - if len(records) < PAGINATE_BY: + if len(records) < paginate_by: break elif not records: break - offset += PAGINATE_BY + offset += paginate_by if limit is not None: - limit -= PAGINATE_BY + limit -= paginate_by if limit <= 0: break diff --git a/ckanext/datastore/logic/action.py b/ckanext/datastore/logic/action.py index 9221f0d751f..2a2e45ab978 100644 --- a/ckanext/datastore/logic/action.py +++ b/ckanext/datastore/logic/action.py @@ -392,7 +392,9 @@ def datastore_search(context, data_dict): :param language: language of the full text query (optional, default: english) :type language: string - :param limit: maximum number of rows to return (optional, default: 100) + :param limit: maximum number of rows to return + (optional, default: ``100``, upper limit: ``32000`` unless set in + site's configuration ``ckan.datastore.search.rows_max``) :type limit: int :param offset: offset this number of rows (optional) :type offset: int @@ -432,7 +434,9 @@ def datastore_search(context, data_dict): :type fields: list of dictionaries :param offset: query offset value :type offset: int - :param limit: query limit value + :param limit: queried limit value (if the requested ``limit`` was above the + ``ckan.datastore.search.rows_max`` value then this response ``limit`` + will be set to the value of ``ckan.datastore.search.rows_max``) :type limit: int :param filters: query filters :type filters: list of dictionaries @@ -440,6 +444,12 @@ def datastore_search(context, data_dict): :type total: int :param records: list of matching results :type records: depends on records_format value passed + :param records_truncated: indicates whether the number of records returned + was limited by the internal limit, which is 32000 records (or other + value set in the site's configuration + ``ckan.datastore.search.rows_max``). If records are truncated by this, + this key has value True, otherwise the key is not returned at all. + :type records_truncated: bool ''' backend = DatastoreBackend.get_active_backend() @@ -481,6 +491,8 @@ def datastore_search_sql(context, data_dict): engine is the `PostgreSQL engine `_. There is an enforced timeout on SQL queries to avoid an unintended DOS. + The number of results returned is limited to 32000, unless set in the + site's configuration ``ckan.datastore.search.rows_max`` DataStore resource that belong to a private CKAN resource cannot be searched with this action. Use :meth:`~ckanext.datastore.logic.action.datastore_search` instead. diff --git a/ckanext/datastore/logic/schema.py b/ckanext/datastore/logic/schema.py index 903e016d71c..24e33abd8dd 100644 --- a/ckanext/datastore/logic/schema.py +++ b/ckanext/datastore/logic/schema.py @@ -18,6 +18,8 @@ OneOf = get_validator('OneOf') unicode_only = get_validator('unicode_only') default = get_validator('default') +natural_number_validator = get_validator('natural_number_validator') +limit_to_configured_maximum = get_validator('limit_to_configured_maximum') def rename(old, new): @@ -157,7 +159,9 @@ def datastore_search_schema(): 'plain': [ignore_missing, boolean_validator], 'filters': [ignore_missing, json_validator], 'language': [ignore_missing, unicode], - 'limit': [ignore_missing, int_validator], + 'limit': [default(100), natural_number_validator, + limit_to_configured_maximum('ckan.datastore.search.rows_max', + 32000)], 'offset': [ignore_missing, int_validator], 'fields': [ignore_missing, list_of_strings_or_string], 'sort': [ignore_missing, list_of_strings_or_string], diff --git a/ckanext/datastore/tests/test_dump.py b/ckanext/datastore/tests/test_dump.py index 419473aa9b1..9527e10fac3 100644 --- a/ckanext/datastore/tests/test_dump.py +++ b/ckanext/datastore/tests/test_dump.py @@ -1,40 +1,39 @@ # encoding: utf-8 +from nose.tools import assert_equals, assert_in +import mock import json -import ckan.config.middleware as middleware -import ckan.lib.create_test_data as ctd -import ckan.model as model -import ckan.plugins as p -import ckan.tests.legacy as tests -import ckanext.datastore.backend.postgres as db -import ckanext.datastore.tests.helpers as helpers -import nose -import paste.fixture -import sqlalchemy.orm as orm -from ckan.common import config -from nose.tools import assert_equals, assert_in +from ckanext.datastore.tests.helpers import DatastoreFunctionalTestBase +import ckan.tests.helpers as helpers +import ckan.tests.factories as factories -class TestDatastoreDump(object): - sysadmin_user = None - normal_user = None - - @classmethod - def setup_class(cls): - wsgiapp = middleware.make_app(config['global_conf'], **config) - cls.app = paste.fixture.TestApp(wsgiapp) - if not tests.is_datastore_supported(): - raise nose.SkipTest("Datastore not supported") - p.load('datastore') - ctd.CreateTestData.create() - cls.sysadmin_user = model.User.get('testsysadmin') - cls.normal_user = model.User.get('annafan') - resource = model.Package.get('annakarenina').resources[0] - cls.data = { - 'resource_id': resource.id, +class TestDatastoreDump(DatastoreFunctionalTestBase): + def test_dump_basic(self): + resource = factories.Resource() + data = { + 'resource_id': resource['id'], + 'force': True, + 'records': [ + {u'book': 'annakarenina'}, + {u'book': 'warandpeace'}, + ], + } + helpers.call_action('datastore_create', **data) + + app = self._get_test_app() + response = app.get('/datastore/dump/{0}'.format(str(resource['id']))) + assert_equals('_id,book\r\n' + '1,annakarenina\n' + '2,warandpeace\n', + response.body) + + def test_all_fields_types(self): + resource = factories.Resource() + data = { + 'resource_id': resource['id'], 'force': True, - 'aliases': 'books', 'fields': [ { 'id': u'b\xfck', @@ -83,26 +82,11 @@ def setup_class(cls): } ] } - postparams = '%s=1' % json.dumps(cls.data) - auth = {'Authorization': str(cls.sysadmin_user.apikey)} - res = cls.app.post('/api/action/datastore_create', params=postparams, - extra_environ=auth) - res_dict = json.loads(res.body) - assert res_dict['success'] is True - - engine = db.get_write_engine() - cls.Session = orm.scoped_session(orm.sessionmaker(bind=engine)) + helpers.call_action('datastore_create', **data) - @classmethod - def teardown_class(cls): - helpers.rebuild_all_dbs(cls.Session) - p.unload('datastore') - - def test_dump_basic(self): - auth = {'Authorization': str(self.normal_user.apikey)} - res = self.app.get('/datastore/dump/{0}'.format(str( - self.data['resource_id'])), extra_environ=auth) - content = res.body.decode('utf-8') + app = self._get_test_app() + response = app.get('/datastore/dump/{0}'.format(str(resource['id']))) + content = response.body.decode('utf-8') expected = ( u'_id,b\xfck,author,published' u',characters,random_letters,nested') @@ -110,33 +94,110 @@ def test_dump_basic(self): assert_in('warandpeace', content) assert_in('"[""Princess Anna"",""Sergius""]"', content) + def test_alias(self): + resource = factories.Resource() + data = { + 'resource_id': resource['id'], + 'force': True, + 'aliases': 'books', + 'records': [ + {u'book': 'annakarenina'}, + {u'book': 'warandpeace'}, + ], + } + helpers.call_action('datastore_create', **data) + + app = self._get_test_app() # get with alias instead of id - res = self.app.get('/datastore/dump/{0}'.format(str( - self.data['aliases'])), extra_environ=auth) + response = app.get('/datastore/dump/books') + assert_equals('_id,book\r\n' + '1,annakarenina\n' + '2,warandpeace\n', + response.body) def test_dump_does_not_exist_raises_404(self): - auth = {'Authorization': str(self.normal_user.apikey)} - self.app.get('/datastore/dump/{0}'.format(str( - 'does-not-exist')), extra_environ=auth, status=404) + app = self._get_test_app() + app.get('/datastore/dump/does-not-exist', status=404) def test_dump_limit(self): - auth = {'Authorization': str(self.normal_user.apikey)} - res = self.app.get('/datastore/dump/{0}?limit=1'.format(str( - self.data['resource_id'])), extra_environ=auth) - content = res.body.decode('utf-8') + resource = factories.Resource() + data = { + 'resource_id': resource['id'], + 'force': True, + 'records': [ + {u'book': 'annakarenina'}, + {u'book': 'warandpeace'}, + ], + } + helpers.call_action('datastore_create', **data) + app = self._get_test_app() + response = app.get('/datastore/dump/{0}?limit=1'.format(str( + resource['id']))) + content = response.body.decode('utf-8') expected_content = ( - u'_id,b\xfck,author,published,characters,random_letters,' - u'nested\r\n1,annakarenina,tolstoy,2005-03-01T00:00:00,' - u'"[""Princess Anna"",""Sergius""]",' - u'"[""a"",""e"",""x""]","[""b"", ' - u'{""moo"": ""moo""}]"\n') + u'_id,book\r\n' + u'1,annakarenina\n') assert_equals(content, expected_content) def test_dump_tsv(self): - auth = {'Authorization': str(self.normal_user.apikey)} - res = self.app.get('/datastore/dump/{0}?limit=1&format=tsv'.format(str( - self.data['resource_id'])), extra_environ=auth) + resource = factories.Resource() + data = { + 'resource_id': resource['id'], + 'force': True, + 'fields': [ + { + 'id': u'b\xfck', + 'type': 'text' + }, + { + 'id': 'author', + 'type': 'text' + }, + { + 'id': 'published' + }, + { + 'id': u'characters', + u'type': u'_text' + }, + { + 'id': 'random_letters', + 'type': 'text[]' + } + ], + 'records': [ + { + u'b\xfck': 'annakarenina', + 'author': 'tolstoy', + 'published': '2005-03-01', + 'nested': [ + 'b', + {'moo': 'moo'} + ], + u'characters': [ + u'Princess Anna', + u'Sergius' + ], + 'random_letters': [ + 'a', 'e', 'x' + ] + }, + { + u'b\xfck': 'warandpeace', + 'author': 'tolstoy', + 'nested': {'a': 'b'}, + 'random_letters': [ + + ] + } + ] + } + helpers.call_action('datastore_create', **data) + + app = self._get_test_app() + res = app.get('/datastore/dump/{0}?limit=1&format=tsv'.format(str( + resource['id']))) content = res.body.decode('utf-8') expected_content = ( @@ -148,9 +209,63 @@ def test_dump_tsv(self): assert_equals(content, expected_content) def test_dump_json(self): - auth = {'Authorization': str(self.normal_user.apikey)} - res = self.app.get('/datastore/dump/{0}?limit=1&format=json'.format( - str(self.data['resource_id'])), extra_environ=auth) + resource = factories.Resource() + data = { + 'resource_id': resource['id'], + 'force': True, + 'fields': [ + { + 'id': u'b\xfck', + 'type': 'text' + }, + { + 'id': 'author', + 'type': 'text' + }, + { + 'id': 'published' + }, + { + 'id': u'characters', + u'type': u'_text' + }, + { + 'id': 'random_letters', + 'type': 'text[]' + } + ], + 'records': [ + { + u'b\xfck': 'annakarenina', + 'author': 'tolstoy', + 'published': '2005-03-01', + 'nested': [ + 'b', + {'moo': 'moo'} + ], + u'characters': [ + u'Princess Anna', + u'Sergius' + ], + 'random_letters': [ + 'a', 'e', 'x' + ] + }, + { + u'b\xfck': 'warandpeace', + 'author': 'tolstoy', + 'nested': {'a': 'b'}, + 'random_letters': [ + + ] + } + ] + } + helpers.call_action('datastore_create', **data) + + app = self._get_test_app() + res = app.get('/datastore/dump/{0}?limit=1&format=json'.format( + str(resource['id']))) content = res.body.decode('utf-8') expected_content = ( u'{\n "fields": [{"type":"int","id":"_id"},{"type":"text",' @@ -164,9 +279,63 @@ def test_dump_json(self): assert_equals(content, expected_content) def test_dump_xml(self): - auth = {'Authorization': str(self.normal_user.apikey)} - res = self.app.get('/datastore/dump/{0}?limit=1&format=xml'.format(str( - self.data['resource_id'])), extra_environ=auth) + resource = factories.Resource() + data = { + 'resource_id': resource['id'], + 'force': True, + 'fields': [ + { + 'id': u'b\xfck', + 'type': 'text' + }, + { + 'id': 'author', + 'type': 'text' + }, + { + 'id': 'published' + }, + { + 'id': u'characters', + u'type': u'_text' + }, + { + 'id': 'random_letters', + 'type': 'text[]' + } + ], + 'records': [ + { + u'b\xfck': 'annakarenina', + 'author': 'tolstoy', + 'published': '2005-03-01', + 'nested': [ + 'b', + {'moo': 'moo'} + ], + u'characters': [ + u'Princess Anna', + u'Sergius' + ], + 'random_letters': [ + 'a', 'e', 'x' + ] + }, + { + u'b\xfck': 'warandpeace', + 'author': 'tolstoy', + 'nested': {'a': 'b'}, + 'random_letters': [ + + ] + } + ] + } + helpers.call_action('datastore_create', **data) + + app = self._get_test_app() + res = app.get('/datastore/dump/{0}?limit=1&format=xml'.format(str( + resource['id']))) content = res.body.decode('utf-8') expected_content = ( u'\n' @@ -193,3 +362,143 @@ def test_dump_xml(self): u'\n' ) assert_equals(content, expected_content) + + @helpers.change_config('ckan.datastore.search.rows_max', '3') + def test_dump_with_low_rows_max(self): + resource = factories.Resource() + data = { + 'resource_id': resource['id'], + 'force': True, + 'records': [{u'record': str(num)} for num in range(12)], + } + helpers.call_action('datastore_create', **data) + + app = self._get_test_app() + response = app.get('/datastore/dump/{0}'.format(str(resource['id']))) + assert_equals(get_csv_record_values(response.body), + range(12)) + + @mock.patch('ckanext.datastore.controller.PAGINATE_BY', 5) + def test_dump_pagination(self): + resource = factories.Resource() + data = { + 'resource_id': resource['id'], + 'force': True, + 'records': [{u'record': str(num)} for num in range(12)], + } + helpers.call_action('datastore_create', **data) + + app = self._get_test_app() + response = app.get('/datastore/dump/{0}'.format(str(resource['id']))) + assert_equals(get_csv_record_values(response.body), + range(12)) + + @helpers.change_config('ckan.datastore.search.rows_max', '7') + @mock.patch('ckanext.datastore.controller.PAGINATE_BY', 5) + def test_dump_pagination_csv_with_limit(self): + resource = factories.Resource() + data = { + 'resource_id': resource['id'], + 'force': True, + 'records': [{u'record': str(num)} for num in range(12)], + } + helpers.call_action('datastore_create', **data) + + app = self._get_test_app() + response = app.get('/datastore/dump/{0}?limit=11'.format( + str(resource['id']))) + assert_equals(get_csv_record_values(response.body), + range(11)) + + @helpers.change_config('ckan.datastore.search.rows_max', '7') + @mock.patch('ckanext.datastore.controller.PAGINATE_BY', 6) + def test_dump_pagination_csv_with_limit_same_as_paginate(self): + resource = factories.Resource() + data = { + 'resource_id': resource['id'], + 'force': True, + 'records': [{u'record': str(num)} for num in range(12)], + } + helpers.call_action('datastore_create', **data) + + app = self._get_test_app() + response = app.get('/datastore/dump/{0}?limit=6'.format( + str(resource['id']))) + assert_equals(get_csv_record_values(response.body), + range(6)) + + @helpers.change_config('ckan.datastore.search.rows_max', '6') + @mock.patch('ckanext.datastore.controller.PAGINATE_BY', 5) + def test_dump_pagination_with_rows_max(self): + resource = factories.Resource() + data = { + 'resource_id': resource['id'], + 'force': True, + 'records': [{u'record': str(num)} for num in range(12)], + } + helpers.call_action('datastore_create', **data) + + app = self._get_test_app() + response = app.get('/datastore/dump/{0}?limit=7'.format(str(resource['id']))) + assert_equals(get_csv_record_values(response.body), + range(7)) + + @helpers.change_config('ckan.datastore.search.rows_max', '6') + @mock.patch('ckanext.datastore.controller.PAGINATE_BY', 6) + def test_dump_pagination_with_rows_max_same_as_paginate(self): + resource = factories.Resource() + data = { + 'resource_id': resource['id'], + 'force': True, + 'records': [{u'record': str(num)} for num in range(12)], + } + helpers.call_action('datastore_create', **data) + + app = self._get_test_app() + response = app.get('/datastore/dump/{0}?limit=7'.format(str(resource['id']))) + assert_equals(get_csv_record_values(response.body), + range(7)) + + @helpers.change_config('ckan.datastore.search.rows_max', '7') + @mock.patch('ckanext.datastore.controller.PAGINATE_BY', 5) + def test_dump_pagination_json_with_limit(self): + resource = factories.Resource() + data = { + 'resource_id': resource['id'], + 'force': True, + 'records': [{u'record': str(num)} for num in range(12)], + } + helpers.call_action('datastore_create', **data) + + app = self._get_test_app() + response = app.get('/datastore/dump/{0}?limit=6&format=json'.format( + str(resource['id']))) + assert_equals(get_json_record_values(response.body), + range(6)) + + @helpers.change_config('ckan.datastore.search.rows_max', '6') + @mock.patch('ckanext.datastore.controller.PAGINATE_BY', 5) + def test_dump_pagination_json_with_rows_max(self): + resource = factories.Resource() + data = { + 'resource_id': resource['id'], + 'force': True, + 'records': [{u'record': str(num)} for num in range(12)], + } + helpers.call_action('datastore_create', **data) + + app = self._get_test_app() + response = app.get('/datastore/dump/{0}?limit=7&format=json'.format( + str(resource['id']))) + assert_equals(get_json_record_values(response.body), + range(7)) + + +def get_csv_record_values(response_body): + return [int(record.split(',')[1]) + for record in response_body.split()[1:]] + + +def get_json_record_values(response_body): + return [record[1] + for record in json.loads(response_body)['records']] diff --git a/ckanext/datastore/tests/test_search.py b/ckanext/datastore/tests/test_search.py index d7bf64e3729..59c9bf1758c 100644 --- a/ckanext/datastore/tests/test_search.py +++ b/ckanext/datastore/tests/test_search.py @@ -9,17 +9,20 @@ import ckan.plugins as p import ckan.lib.create_test_data as ctd import ckan.model as model +import ckan.logic as logic import ckan.tests.legacy as tests -from ckan.common import config import ckanext.datastore.backend.postgres as db -from ckanext.datastore.tests.helpers import extract, rebuild_all_dbs +from ckanext.datastore.tests.helpers import ( + extract, rebuild_all_dbs, + DatastoreFunctionalTestBase) import ckan.tests.helpers as helpers import ckan.tests.factories as factories assert_equals = nose.tools.assert_equals assert_raises = nose.tools.assert_raises +assert_raises_regexp = nose.tools.assert_raises_regexp assert_in = nose.tools.assert_in @@ -594,15 +597,6 @@ def test_search_full_text_invalid_field_value(self): res_dict = json.loads(res.body) assert res_dict['success'] is False - def test_search_table_metadata(self): - data = {'resource_id': "_table_metadata"} - postparams = '%s=1' % json.dumps(data) - auth = {'Authorization': str(self.normal_user.apikey)} - res = self.app.post('/api/action/datastore_search', params=postparams, - extra_environ=auth) - res_dict = json.loads(res.body) - assert res_dict['success'] is True - def test_search_is_unsuccessful_when_called_with_filters_not_as_dict(self): data = { 'resource_id': self.data['resource_id'], @@ -795,7 +789,6 @@ def setup_class(cls): cls.data = { 'resource_id': resource.id, 'force': True, - 'aliases': 'books4', 'fields': [{'id': u'b\xfck', 'type': 'text'}, {'id': 'author', 'type': 'text'}, {'id': 'published'}], @@ -845,56 +838,6 @@ def teardown_class(cls): rebuild_all_dbs(cls.Session) p.unload('datastore') - def test_validates_sql_has_a_single_statement(self): - sql = 'SELECT * FROM public."{0}"; SELECT * FROM public."{0}";'.format(self.data['resource_id']) - assert_raises(p.toolkit.ValidationError, - helpers.call_action, 'datastore_search_sql', sql=sql) - - def test_works_with_semicolons_inside_strings(self): - sql = 'SELECT * FROM public."{0}" WHERE "author" = \'foo; bar\''.format(self.data['resource_id']) - helpers.call_action('datastore_search_sql', sql=sql) - - def test_invalid_statement(self): - query = 'SELECT ** FROM foobar' - data = {'sql': query} - postparams = json.dumps(data) - auth = {'Authorization': str(self.normal_user.apikey)} - res = self.app.post('/api/action/datastore_search_sql', params=postparams, - extra_environ=auth, status=409) - res_dict = json.loads(res.body) - assert res_dict['success'] is False - - def test_select_basic(self): - query = 'SELECT * FROM "{0}"'.format(self.data['resource_id']) - data = {'sql': query} - postparams = json.dumps(data) - auth = {'Authorization': str(self.normal_user.apikey)} - res = self.app.post('/api/action/datastore_search_sql', params=postparams, - extra_environ=auth) - res_dict = json.loads(res.body) - assert res_dict['success'] is True - result = res_dict['result'] - assert len(result['records']) == len(self.expected_records) - for (row_index, row) in enumerate(result['records']): - expected_row = self.expected_records[row_index] - assert set(row.keys()) == set(expected_row.keys()) - for field in row: - if field == '_full_text': - for ft_value in expected_row['_full_text']: - assert ft_value in row['_full_text'] - else: - assert row[field] == expected_row[field] - - # test alias search - query = 'SELECT * FROM "{0}"'.format(self.data['aliases']) - data = {'sql': query} - postparams = json.dumps(data) - res = self.app.post('/api/action/datastore_search_sql', params=postparams, - extra_environ=auth) - res_dict_alias = json.loads(res.body) - - assert result['records'] == res_dict_alias['result']['records'] - def test_select_where_like_with_percent(self): query = 'SELECT * FROM public."{0}" WHERE "author" LIKE \'tol%\''.format(self.data['resource_id']) data = {'sql': query} @@ -1095,3 +1038,133 @@ def test_not_authorized_to_access_system_tables(self): res_dict = json.loads(res.body) assert res_dict['success'] is False assert res_dict['error']['__type'] == 'Authorization Error' + + +class TestDatastoreSQLFunctional(DatastoreFunctionalTestBase): + def test_validates_sql_has_a_single_statement(self): + resource = factories.Resource() + data = { + 'resource_id': resource['id'], + 'force': True, + 'records': [ + {'the year': 2014}, + {'the year': 2013}, + ], + } + helpers.call_action('datastore_create', **data) + sql = 'SELECT * FROM public."{0}"; SELECT * FROM public."{0}";' \ + .format(resource['id']) + with assert_raises_regexp(p.toolkit.ValidationError, + 'Query is not a single statement'): + helpers.call_action('datastore_search_sql', sql=sql) + + def test_works_with_semicolons_inside_strings(self): + resource = factories.Resource() + data = { + 'resource_id': resource['id'], + 'force': True, + 'records': [ + {'author': 'bob'}, + {'author': 'jane'}, + ], + } + helpers.call_action('datastore_create', **data) + sql = 'SELECT * FROM public."{0}" WHERE "author" = \'foo; bar\'' \ + .format(resource['id']) + helpers.call_action('datastore_search_sql', sql=sql) + + def test_invalid_statement(self): + sql = 'SELECT ** FROM foobar' + with assert_raises_regexp( + logic.ValidationError, 'syntax error at or near "FROM"'): + helpers.call_action('datastore_search_sql', sql=sql) + + def test_select_basic(self): + resource = factories.Resource() + data = { + 'resource_id': resource['id'], + 'force': True, + 'records': [ + {u'b\xfck': 'annakarenina', + 'author': 'tolstoy', + 'published': '2005-03-01', + 'nested': ['b', {'moo': 'moo'}]}, + {u'b\xfck': 'warandpeace', + 'author': 'tolstoy', + 'nested': {'a': 'b'}} + ], + } + expected_records = [{u'_full_text': [u"'annakarenina'", u"'b'", + u"'moo'", u"'tolstoy'", + u"'2005'"], + u'_id': 1, + u'author': u'tolstoy', + u'b\xfck': u'annakarenina', + u'nested': [u'b', {u'moo': u'moo'}], + u'published': u'2005-03-01T00:00:00'}, + {u'_full_text': [u"'tolstoy'", u"'warandpeac'", + u"'b'"], + u'_id': 2, + u'author': u'tolstoy', + u'b\xfck': u'warandpeace', + u'nested': {u'a': u'b'}, + u'published': None}] + helpers.call_action('datastore_create', **data) + sql = 'SELECT * FROM "{0}"'.format(resource['id']) + result = helpers.call_action('datastore_search_sql', sql=sql) + assert_equals(len(result['records']), 2) + for (row_index, row) in enumerate(result['records']): + expected_row = expected_records[row_index] + assert set(row.keys()) == set(expected_row.keys()) + for field in row: + if field == '_full_text': + for ft_value in expected_row['_full_text']: + assert ft_value in row['_full_text'] + else: + assert_equals(row[field], expected_row[field]) + assert u'records_truncated' not in result + + def test_alias_search(self): + resource = factories.Resource() + data = { + 'resource_id': resource['id'], + 'force': True, + 'aliases': 'books4', + 'records': [ + {u'b\xfck': 'annakarenina', + 'author': 'tolstoy', + 'published': '2005-03-01', + 'nested': ['b', {'moo': 'moo'}]}, + {u'b\xfck': 'warandpeace', + 'author': 'tolstoy', + 'nested': {'a': 'b'}} + ], + } + helpers.call_action('datastore_create', **data) + sql = 'SELECT * FROM "{0}"'.format(resource['id']) + result = helpers.call_action('datastore_search_sql', sql=sql) + sql = 'SELECT * FROM "books4"' + result_with_alias = helpers.call_action('datastore_search_sql', + sql=sql) + assert result['records'] == result_with_alias['records'] + + @helpers.change_config('ckan.datastore.search.rows_max', '2') + def test_search_limit(self): + resource = factories.Resource() + data = { + 'resource_id': resource['id'], + 'force': True, + 'records': [ + {'the year': 2014}, + {'the year': 2013}, + {'the year': 2015}, + {'the year': 2016}, + ], + } + result = helpers.call_action('datastore_create', **data) + sql = 'SELECT * FROM "{0}"'.format(resource['id']) + result = helpers.call_action('datastore_search_sql', sql=sql) + assert_equals(len(result['records']), 2) + assert_equals([res[u'the year'] for res in result['records']], + [2014, 2013]) + assert_equals(result[u'records_truncated'], True) diff --git a/doc/maintaining/configuration.rst b/doc/maintaining/configuration.rst index 9ff1b764080..9305d5943e4 100644 --- a/doc/maintaining/configuration.rst +++ b/doc/maintaining/configuration.rst @@ -270,6 +270,24 @@ Default value: ``True`` This option allows you to disable the datastore_search_sql action function, and corresponding API endpoint if you do not wish it to be activated. +.. _ckan.datastore.search.rows_max: + +ckan.datastore.search.rows_max +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Example:: + + ckan.datastore.search.rows_max = 1000000 + +Default value: ``32000`` + +Maximum allowed value for the number of rows returned by the datastore. + +Specifically this limits: + +* ``datastore_search``'s ``limit`` parameter. +* ``datastore_search_sql`` queries have this limit inserted. + Site Settings -------------