diff --git a/ckan/lib/navl/validators.py b/ckan/lib/navl/validators.py
index cfcb1a2d5d2..b65b20b1386 100644
--- a/ckan/lib/navl/validators.py
+++ b/ckan/lib/navl/validators.py
@@ -2,7 +2,7 @@
import ckan.lib.navl.dictization_functions as df
-from ckan.common import _
+from ckan.common import _, config
missing = df.missing
StopOnError = df.StopOnError
@@ -123,3 +123,18 @@ def unicode_only(value):
if not isinstance(value, unicode):
raise Invalid(_('Must be a Unicode string value'))
return value
+
+def limit_to_configured_maximum(config_option, default_limit):
+ '''
+ If the value is over a limit, it changes it to the limit. The limit is
+ defined by a configuration option, or if that is not set, a given int
+ default_limit.
+ '''
+ def callable(key, data, errors, context):
+
+ value = data.get(key)
+ limit = int(config.get(config_option, default_limit))
+ if value > limit:
+ data[key] = limit
+
+ return callable
diff --git a/ckanext/datastore/backend/postgres.py b/ckanext/datastore/backend/postgres.py
index c3b64e75423..374092cb7cb 100644
--- a/ckanext/datastore/backend/postgres.py
+++ b/ckanext/datastore/backend/postgres.py
@@ -1342,7 +1342,7 @@ def _execute_single_statement_copy_to(context, sql_string, where_values, buf):
cursor.close()
-def format_results(context, results, data_dict):
+def format_results(context, results, data_dict, rows_max):
result_fields = []
for field in results.cursor.description:
result_fields.append({
@@ -1358,6 +1358,8 @@ def format_results(context, results, data_dict):
field['type'])
records.append(converted_row)
data_dict['records'] = records
+ if data_dict.get('records_truncated', False):
+ data_dict['records'] = data_dict['records'][:rows_max]
data_dict['fields'] = result_fields
return _unrename_json_field(data_dict)
@@ -1504,6 +1506,11 @@ def search_sql(context, data_dict):
sql = data_dict['sql'].replace('%', '%%')
+ # limit the number of results to ckan.datastore.search.rows_max + 1
+ # (the +1 is so that we know if the results went over the limit or not)
+ rows_max = int(config.get('ckan.datastore.search.rows_max', 32000))
+ sql = 'SELECT * FROM ({0}) AS blah LIMIT {1} ;'.format(sql, rows_max + 1)
+
try:
context['connection'].execute(
@@ -1520,7 +1527,10 @@ def search_sql(context, data_dict):
results = context['connection'].execute(sql)
- return format_results(context, results, data_dict)
+ if results.rowcount == rows_max + 1:
+ data_dict['records_truncated'] = True
+
+ return format_results(context, results, data_dict, rows_max)
except ProgrammingError, e:
if e.orig.pgcode == _PG_ERR_CODE['permission_denied']:
@@ -1695,6 +1705,11 @@ def configure(self, config):
else:
self._check_urls_and_permissions()
+ # check rows_max is valid on CKAN start-up
+ rows_max = config.get('ckan.datastore.search.rows_max')
+ if rows_max is not None:
+ int(rows_max)
+
def datastore_delete(self, context, data_dict, fields_types, query_dict):
query_dict['where'] += _where_clauses(data_dict, fields_types)
return query_dict
@@ -1709,6 +1724,7 @@ def datastore_search(self, context, data_dict, fields_types, query_dict):
field_ids = fields_types.keys()
ts_query, rank_column = _textsearch_query(data_dict)
+ # add default limit here just in case - already defaulted in the schema
limit = data_dict.get('limit', 100)
offset = data_dict.get('offset', 0)
diff --git a/ckanext/datastore/controller.py b/ckanext/datastore/controller.py
index 902cb37f40e..664de4805b1 100644
--- a/ckanext/datastore/controller.py
+++ b/ckanext/datastore/controller.py
@@ -14,6 +14,7 @@
render,
c,
h,
+ config,
)
from ckanext.datastore.writer import (
csv_writer,
@@ -141,6 +142,15 @@ def result_page(offs, lim):
result = result_page(offset, limit)
+ if result['limit'] != limit:
+ # `limit` (from PAGINATE_BY) must have been more than
+ # ckan.datastore.search.rows_max, so datastore_search responded with a
+ # limit matching ckan.datastore.search.rows_max. So we need to paginate
+ # by that amount instead, otherwise we'll have gaps in the records.
+ paginate_by = result['limit']
+ else:
+ paginate_by = PAGINATE_BY
+
with start_writer(result['fields']) as wr:
while True:
if limit is not None and limit <= 0:
@@ -151,14 +161,14 @@ def result_page(offs, lim):
wr.write_records(records)
if records_format == 'objects' or records_format == 'lists':
- if len(records) < PAGINATE_BY:
+ if len(records) < paginate_by:
break
elif not records:
break
- offset += PAGINATE_BY
+ offset += paginate_by
if limit is not None:
- limit -= PAGINATE_BY
+ limit -= paginate_by
if limit <= 0:
break
diff --git a/ckanext/datastore/logic/action.py b/ckanext/datastore/logic/action.py
index 9221f0d751f..2a2e45ab978 100644
--- a/ckanext/datastore/logic/action.py
+++ b/ckanext/datastore/logic/action.py
@@ -392,7 +392,9 @@ def datastore_search(context, data_dict):
:param language: language of the full text query
(optional, default: english)
:type language: string
- :param limit: maximum number of rows to return (optional, default: 100)
+ :param limit: maximum number of rows to return
+ (optional, default: ``100``, upper limit: ``32000`` unless set in
+ site's configuration ``ckan.datastore.search.rows_max``)
:type limit: int
:param offset: offset this number of rows (optional)
:type offset: int
@@ -432,7 +434,9 @@ def datastore_search(context, data_dict):
:type fields: list of dictionaries
:param offset: query offset value
:type offset: int
- :param limit: query limit value
+ :param limit: queried limit value (if the requested ``limit`` was above the
+ ``ckan.datastore.search.rows_max`` value then this response ``limit``
+ will be set to the value of ``ckan.datastore.search.rows_max``)
:type limit: int
:param filters: query filters
:type filters: list of dictionaries
@@ -440,6 +444,12 @@ def datastore_search(context, data_dict):
:type total: int
:param records: list of matching results
:type records: depends on records_format value passed
+ :param records_truncated: indicates whether the number of records returned
+ was limited by the internal limit, which is 32000 records (or other
+ value set in the site's configuration
+ ``ckan.datastore.search.rows_max``). If records are truncated by this,
+ this key has value True, otherwise the key is not returned at all.
+ :type records_truncated: bool
'''
backend = DatastoreBackend.get_active_backend()
@@ -481,6 +491,8 @@ def datastore_search_sql(context, data_dict):
engine is the
`PostgreSQL engine `_.
There is an enforced timeout on SQL queries to avoid an unintended DOS.
+ The number of results returned is limited to 32000, unless set in the
+ site's configuration ``ckan.datastore.search.rows_max``
DataStore resource that belong to a private CKAN resource cannot be
searched with this action. Use
:meth:`~ckanext.datastore.logic.action.datastore_search` instead.
diff --git a/ckanext/datastore/logic/schema.py b/ckanext/datastore/logic/schema.py
index 903e016d71c..24e33abd8dd 100644
--- a/ckanext/datastore/logic/schema.py
+++ b/ckanext/datastore/logic/schema.py
@@ -18,6 +18,8 @@
OneOf = get_validator('OneOf')
unicode_only = get_validator('unicode_only')
default = get_validator('default')
+natural_number_validator = get_validator('natural_number_validator')
+limit_to_configured_maximum = get_validator('limit_to_configured_maximum')
def rename(old, new):
@@ -157,7 +159,9 @@ def datastore_search_schema():
'plain': [ignore_missing, boolean_validator],
'filters': [ignore_missing, json_validator],
'language': [ignore_missing, unicode],
- 'limit': [ignore_missing, int_validator],
+ 'limit': [default(100), natural_number_validator,
+ limit_to_configured_maximum('ckan.datastore.search.rows_max',
+ 32000)],
'offset': [ignore_missing, int_validator],
'fields': [ignore_missing, list_of_strings_or_string],
'sort': [ignore_missing, list_of_strings_or_string],
diff --git a/ckanext/datastore/tests/test_dump.py b/ckanext/datastore/tests/test_dump.py
index 419473aa9b1..9527e10fac3 100644
--- a/ckanext/datastore/tests/test_dump.py
+++ b/ckanext/datastore/tests/test_dump.py
@@ -1,40 +1,39 @@
# encoding: utf-8
+from nose.tools import assert_equals, assert_in
+import mock
import json
-import ckan.config.middleware as middleware
-import ckan.lib.create_test_data as ctd
-import ckan.model as model
-import ckan.plugins as p
-import ckan.tests.legacy as tests
-import ckanext.datastore.backend.postgres as db
-import ckanext.datastore.tests.helpers as helpers
-import nose
-import paste.fixture
-import sqlalchemy.orm as orm
-from ckan.common import config
-from nose.tools import assert_equals, assert_in
+from ckanext.datastore.tests.helpers import DatastoreFunctionalTestBase
+import ckan.tests.helpers as helpers
+import ckan.tests.factories as factories
-class TestDatastoreDump(object):
- sysadmin_user = None
- normal_user = None
-
- @classmethod
- def setup_class(cls):
- wsgiapp = middleware.make_app(config['global_conf'], **config)
- cls.app = paste.fixture.TestApp(wsgiapp)
- if not tests.is_datastore_supported():
- raise nose.SkipTest("Datastore not supported")
- p.load('datastore')
- ctd.CreateTestData.create()
- cls.sysadmin_user = model.User.get('testsysadmin')
- cls.normal_user = model.User.get('annafan')
- resource = model.Package.get('annakarenina').resources[0]
- cls.data = {
- 'resource_id': resource.id,
+class TestDatastoreDump(DatastoreFunctionalTestBase):
+ def test_dump_basic(self):
+ resource = factories.Resource()
+ data = {
+ 'resource_id': resource['id'],
+ 'force': True,
+ 'records': [
+ {u'book': 'annakarenina'},
+ {u'book': 'warandpeace'},
+ ],
+ }
+ helpers.call_action('datastore_create', **data)
+
+ app = self._get_test_app()
+ response = app.get('/datastore/dump/{0}'.format(str(resource['id'])))
+ assert_equals('_id,book\r\n'
+ '1,annakarenina\n'
+ '2,warandpeace\n',
+ response.body)
+
+ def test_all_fields_types(self):
+ resource = factories.Resource()
+ data = {
+ 'resource_id': resource['id'],
'force': True,
- 'aliases': 'books',
'fields': [
{
'id': u'b\xfck',
@@ -83,26 +82,11 @@ def setup_class(cls):
}
]
}
- postparams = '%s=1' % json.dumps(cls.data)
- auth = {'Authorization': str(cls.sysadmin_user.apikey)}
- res = cls.app.post('/api/action/datastore_create', params=postparams,
- extra_environ=auth)
- res_dict = json.loads(res.body)
- assert res_dict['success'] is True
-
- engine = db.get_write_engine()
- cls.Session = orm.scoped_session(orm.sessionmaker(bind=engine))
+ helpers.call_action('datastore_create', **data)
- @classmethod
- def teardown_class(cls):
- helpers.rebuild_all_dbs(cls.Session)
- p.unload('datastore')
-
- def test_dump_basic(self):
- auth = {'Authorization': str(self.normal_user.apikey)}
- res = self.app.get('/datastore/dump/{0}'.format(str(
- self.data['resource_id'])), extra_environ=auth)
- content = res.body.decode('utf-8')
+ app = self._get_test_app()
+ response = app.get('/datastore/dump/{0}'.format(str(resource['id'])))
+ content = response.body.decode('utf-8')
expected = (
u'_id,b\xfck,author,published'
u',characters,random_letters,nested')
@@ -110,33 +94,110 @@ def test_dump_basic(self):
assert_in('warandpeace', content)
assert_in('"[""Princess Anna"",""Sergius""]"', content)
+ def test_alias(self):
+ resource = factories.Resource()
+ data = {
+ 'resource_id': resource['id'],
+ 'force': True,
+ 'aliases': 'books',
+ 'records': [
+ {u'book': 'annakarenina'},
+ {u'book': 'warandpeace'},
+ ],
+ }
+ helpers.call_action('datastore_create', **data)
+
+ app = self._get_test_app()
# get with alias instead of id
- res = self.app.get('/datastore/dump/{0}'.format(str(
- self.data['aliases'])), extra_environ=auth)
+ response = app.get('/datastore/dump/books')
+ assert_equals('_id,book\r\n'
+ '1,annakarenina\n'
+ '2,warandpeace\n',
+ response.body)
def test_dump_does_not_exist_raises_404(self):
- auth = {'Authorization': str(self.normal_user.apikey)}
- self.app.get('/datastore/dump/{0}'.format(str(
- 'does-not-exist')), extra_environ=auth, status=404)
+ app = self._get_test_app()
+ app.get('/datastore/dump/does-not-exist', status=404)
def test_dump_limit(self):
- auth = {'Authorization': str(self.normal_user.apikey)}
- res = self.app.get('/datastore/dump/{0}?limit=1'.format(str(
- self.data['resource_id'])), extra_environ=auth)
- content = res.body.decode('utf-8')
+ resource = factories.Resource()
+ data = {
+ 'resource_id': resource['id'],
+ 'force': True,
+ 'records': [
+ {u'book': 'annakarenina'},
+ {u'book': 'warandpeace'},
+ ],
+ }
+ helpers.call_action('datastore_create', **data)
+ app = self._get_test_app()
+ response = app.get('/datastore/dump/{0}?limit=1'.format(str(
+ resource['id'])))
+ content = response.body.decode('utf-8')
expected_content = (
- u'_id,b\xfck,author,published,characters,random_letters,'
- u'nested\r\n1,annakarenina,tolstoy,2005-03-01T00:00:00,'
- u'"[""Princess Anna"",""Sergius""]",'
- u'"[""a"",""e"",""x""]","[""b"", '
- u'{""moo"": ""moo""}]"\n')
+ u'_id,book\r\n'
+ u'1,annakarenina\n')
assert_equals(content, expected_content)
def test_dump_tsv(self):
- auth = {'Authorization': str(self.normal_user.apikey)}
- res = self.app.get('/datastore/dump/{0}?limit=1&format=tsv'.format(str(
- self.data['resource_id'])), extra_environ=auth)
+ resource = factories.Resource()
+ data = {
+ 'resource_id': resource['id'],
+ 'force': True,
+ 'fields': [
+ {
+ 'id': u'b\xfck',
+ 'type': 'text'
+ },
+ {
+ 'id': 'author',
+ 'type': 'text'
+ },
+ {
+ 'id': 'published'
+ },
+ {
+ 'id': u'characters',
+ u'type': u'_text'
+ },
+ {
+ 'id': 'random_letters',
+ 'type': 'text[]'
+ }
+ ],
+ 'records': [
+ {
+ u'b\xfck': 'annakarenina',
+ 'author': 'tolstoy',
+ 'published': '2005-03-01',
+ 'nested': [
+ 'b',
+ {'moo': 'moo'}
+ ],
+ u'characters': [
+ u'Princess Anna',
+ u'Sergius'
+ ],
+ 'random_letters': [
+ 'a', 'e', 'x'
+ ]
+ },
+ {
+ u'b\xfck': 'warandpeace',
+ 'author': 'tolstoy',
+ 'nested': {'a': 'b'},
+ 'random_letters': [
+
+ ]
+ }
+ ]
+ }
+ helpers.call_action('datastore_create', **data)
+
+ app = self._get_test_app()
+ res = app.get('/datastore/dump/{0}?limit=1&format=tsv'.format(str(
+ resource['id'])))
content = res.body.decode('utf-8')
expected_content = (
@@ -148,9 +209,63 @@ def test_dump_tsv(self):
assert_equals(content, expected_content)
def test_dump_json(self):
- auth = {'Authorization': str(self.normal_user.apikey)}
- res = self.app.get('/datastore/dump/{0}?limit=1&format=json'.format(
- str(self.data['resource_id'])), extra_environ=auth)
+ resource = factories.Resource()
+ data = {
+ 'resource_id': resource['id'],
+ 'force': True,
+ 'fields': [
+ {
+ 'id': u'b\xfck',
+ 'type': 'text'
+ },
+ {
+ 'id': 'author',
+ 'type': 'text'
+ },
+ {
+ 'id': 'published'
+ },
+ {
+ 'id': u'characters',
+ u'type': u'_text'
+ },
+ {
+ 'id': 'random_letters',
+ 'type': 'text[]'
+ }
+ ],
+ 'records': [
+ {
+ u'b\xfck': 'annakarenina',
+ 'author': 'tolstoy',
+ 'published': '2005-03-01',
+ 'nested': [
+ 'b',
+ {'moo': 'moo'}
+ ],
+ u'characters': [
+ u'Princess Anna',
+ u'Sergius'
+ ],
+ 'random_letters': [
+ 'a', 'e', 'x'
+ ]
+ },
+ {
+ u'b\xfck': 'warandpeace',
+ 'author': 'tolstoy',
+ 'nested': {'a': 'b'},
+ 'random_letters': [
+
+ ]
+ }
+ ]
+ }
+ helpers.call_action('datastore_create', **data)
+
+ app = self._get_test_app()
+ res = app.get('/datastore/dump/{0}?limit=1&format=json'.format(
+ str(resource['id'])))
content = res.body.decode('utf-8')
expected_content = (
u'{\n "fields": [{"type":"int","id":"_id"},{"type":"text",'
@@ -164,9 +279,63 @@ def test_dump_json(self):
assert_equals(content, expected_content)
def test_dump_xml(self):
- auth = {'Authorization': str(self.normal_user.apikey)}
- res = self.app.get('/datastore/dump/{0}?limit=1&format=xml'.format(str(
- self.data['resource_id'])), extra_environ=auth)
+ resource = factories.Resource()
+ data = {
+ 'resource_id': resource['id'],
+ 'force': True,
+ 'fields': [
+ {
+ 'id': u'b\xfck',
+ 'type': 'text'
+ },
+ {
+ 'id': 'author',
+ 'type': 'text'
+ },
+ {
+ 'id': 'published'
+ },
+ {
+ 'id': u'characters',
+ u'type': u'_text'
+ },
+ {
+ 'id': 'random_letters',
+ 'type': 'text[]'
+ }
+ ],
+ 'records': [
+ {
+ u'b\xfck': 'annakarenina',
+ 'author': 'tolstoy',
+ 'published': '2005-03-01',
+ 'nested': [
+ 'b',
+ {'moo': 'moo'}
+ ],
+ u'characters': [
+ u'Princess Anna',
+ u'Sergius'
+ ],
+ 'random_letters': [
+ 'a', 'e', 'x'
+ ]
+ },
+ {
+ u'b\xfck': 'warandpeace',
+ 'author': 'tolstoy',
+ 'nested': {'a': 'b'},
+ 'random_letters': [
+
+ ]
+ }
+ ]
+ }
+ helpers.call_action('datastore_create', **data)
+
+ app = self._get_test_app()
+ res = app.get('/datastore/dump/{0}?limit=1&format=xml'.format(str(
+ resource['id'])))
content = res.body.decode('utf-8')
expected_content = (
u'\n'
@@ -193,3 +362,143 @@ def test_dump_xml(self):
u'\n'
)
assert_equals(content, expected_content)
+
+ @helpers.change_config('ckan.datastore.search.rows_max', '3')
+ def test_dump_with_low_rows_max(self):
+ resource = factories.Resource()
+ data = {
+ 'resource_id': resource['id'],
+ 'force': True,
+ 'records': [{u'record': str(num)} for num in range(12)],
+ }
+ helpers.call_action('datastore_create', **data)
+
+ app = self._get_test_app()
+ response = app.get('/datastore/dump/{0}'.format(str(resource['id'])))
+ assert_equals(get_csv_record_values(response.body),
+ range(12))
+
+ @mock.patch('ckanext.datastore.controller.PAGINATE_BY', 5)
+ def test_dump_pagination(self):
+ resource = factories.Resource()
+ data = {
+ 'resource_id': resource['id'],
+ 'force': True,
+ 'records': [{u'record': str(num)} for num in range(12)],
+ }
+ helpers.call_action('datastore_create', **data)
+
+ app = self._get_test_app()
+ response = app.get('/datastore/dump/{0}'.format(str(resource['id'])))
+ assert_equals(get_csv_record_values(response.body),
+ range(12))
+
+ @helpers.change_config('ckan.datastore.search.rows_max', '7')
+ @mock.patch('ckanext.datastore.controller.PAGINATE_BY', 5)
+ def test_dump_pagination_csv_with_limit(self):
+ resource = factories.Resource()
+ data = {
+ 'resource_id': resource['id'],
+ 'force': True,
+ 'records': [{u'record': str(num)} for num in range(12)],
+ }
+ helpers.call_action('datastore_create', **data)
+
+ app = self._get_test_app()
+ response = app.get('/datastore/dump/{0}?limit=11'.format(
+ str(resource['id'])))
+ assert_equals(get_csv_record_values(response.body),
+ range(11))
+
+ @helpers.change_config('ckan.datastore.search.rows_max', '7')
+ @mock.patch('ckanext.datastore.controller.PAGINATE_BY', 6)
+ def test_dump_pagination_csv_with_limit_same_as_paginate(self):
+ resource = factories.Resource()
+ data = {
+ 'resource_id': resource['id'],
+ 'force': True,
+ 'records': [{u'record': str(num)} for num in range(12)],
+ }
+ helpers.call_action('datastore_create', **data)
+
+ app = self._get_test_app()
+ response = app.get('/datastore/dump/{0}?limit=6'.format(
+ str(resource['id'])))
+ assert_equals(get_csv_record_values(response.body),
+ range(6))
+
+ @helpers.change_config('ckan.datastore.search.rows_max', '6')
+ @mock.patch('ckanext.datastore.controller.PAGINATE_BY', 5)
+ def test_dump_pagination_with_rows_max(self):
+ resource = factories.Resource()
+ data = {
+ 'resource_id': resource['id'],
+ 'force': True,
+ 'records': [{u'record': str(num)} for num in range(12)],
+ }
+ helpers.call_action('datastore_create', **data)
+
+ app = self._get_test_app()
+ response = app.get('/datastore/dump/{0}?limit=7'.format(str(resource['id'])))
+ assert_equals(get_csv_record_values(response.body),
+ range(7))
+
+ @helpers.change_config('ckan.datastore.search.rows_max', '6')
+ @mock.patch('ckanext.datastore.controller.PAGINATE_BY', 6)
+ def test_dump_pagination_with_rows_max_same_as_paginate(self):
+ resource = factories.Resource()
+ data = {
+ 'resource_id': resource['id'],
+ 'force': True,
+ 'records': [{u'record': str(num)} for num in range(12)],
+ }
+ helpers.call_action('datastore_create', **data)
+
+ app = self._get_test_app()
+ response = app.get('/datastore/dump/{0}?limit=7'.format(str(resource['id'])))
+ assert_equals(get_csv_record_values(response.body),
+ range(7))
+
+ @helpers.change_config('ckan.datastore.search.rows_max', '7')
+ @mock.patch('ckanext.datastore.controller.PAGINATE_BY', 5)
+ def test_dump_pagination_json_with_limit(self):
+ resource = factories.Resource()
+ data = {
+ 'resource_id': resource['id'],
+ 'force': True,
+ 'records': [{u'record': str(num)} for num in range(12)],
+ }
+ helpers.call_action('datastore_create', **data)
+
+ app = self._get_test_app()
+ response = app.get('/datastore/dump/{0}?limit=6&format=json'.format(
+ str(resource['id'])))
+ assert_equals(get_json_record_values(response.body),
+ range(6))
+
+ @helpers.change_config('ckan.datastore.search.rows_max', '6')
+ @mock.patch('ckanext.datastore.controller.PAGINATE_BY', 5)
+ def test_dump_pagination_json_with_rows_max(self):
+ resource = factories.Resource()
+ data = {
+ 'resource_id': resource['id'],
+ 'force': True,
+ 'records': [{u'record': str(num)} for num in range(12)],
+ }
+ helpers.call_action('datastore_create', **data)
+
+ app = self._get_test_app()
+ response = app.get('/datastore/dump/{0}?limit=7&format=json'.format(
+ str(resource['id'])))
+ assert_equals(get_json_record_values(response.body),
+ range(7))
+
+
+def get_csv_record_values(response_body):
+ return [int(record.split(',')[1])
+ for record in response_body.split()[1:]]
+
+
+def get_json_record_values(response_body):
+ return [record[1]
+ for record in json.loads(response_body)['records']]
diff --git a/ckanext/datastore/tests/test_search.py b/ckanext/datastore/tests/test_search.py
index d7bf64e3729..59c9bf1758c 100644
--- a/ckanext/datastore/tests/test_search.py
+++ b/ckanext/datastore/tests/test_search.py
@@ -9,17 +9,20 @@
import ckan.plugins as p
import ckan.lib.create_test_data as ctd
import ckan.model as model
+import ckan.logic as logic
import ckan.tests.legacy as tests
-from ckan.common import config
import ckanext.datastore.backend.postgres as db
-from ckanext.datastore.tests.helpers import extract, rebuild_all_dbs
+from ckanext.datastore.tests.helpers import (
+ extract, rebuild_all_dbs,
+ DatastoreFunctionalTestBase)
import ckan.tests.helpers as helpers
import ckan.tests.factories as factories
assert_equals = nose.tools.assert_equals
assert_raises = nose.tools.assert_raises
+assert_raises_regexp = nose.tools.assert_raises_regexp
assert_in = nose.tools.assert_in
@@ -594,15 +597,6 @@ def test_search_full_text_invalid_field_value(self):
res_dict = json.loads(res.body)
assert res_dict['success'] is False
- def test_search_table_metadata(self):
- data = {'resource_id': "_table_metadata"}
- postparams = '%s=1' % json.dumps(data)
- auth = {'Authorization': str(self.normal_user.apikey)}
- res = self.app.post('/api/action/datastore_search', params=postparams,
- extra_environ=auth)
- res_dict = json.loads(res.body)
- assert res_dict['success'] is True
-
def test_search_is_unsuccessful_when_called_with_filters_not_as_dict(self):
data = {
'resource_id': self.data['resource_id'],
@@ -795,7 +789,6 @@ def setup_class(cls):
cls.data = {
'resource_id': resource.id,
'force': True,
- 'aliases': 'books4',
'fields': [{'id': u'b\xfck', 'type': 'text'},
{'id': 'author', 'type': 'text'},
{'id': 'published'}],
@@ -845,56 +838,6 @@ def teardown_class(cls):
rebuild_all_dbs(cls.Session)
p.unload('datastore')
- def test_validates_sql_has_a_single_statement(self):
- sql = 'SELECT * FROM public."{0}"; SELECT * FROM public."{0}";'.format(self.data['resource_id'])
- assert_raises(p.toolkit.ValidationError,
- helpers.call_action, 'datastore_search_sql', sql=sql)
-
- def test_works_with_semicolons_inside_strings(self):
- sql = 'SELECT * FROM public."{0}" WHERE "author" = \'foo; bar\''.format(self.data['resource_id'])
- helpers.call_action('datastore_search_sql', sql=sql)
-
- def test_invalid_statement(self):
- query = 'SELECT ** FROM foobar'
- data = {'sql': query}
- postparams = json.dumps(data)
- auth = {'Authorization': str(self.normal_user.apikey)}
- res = self.app.post('/api/action/datastore_search_sql', params=postparams,
- extra_environ=auth, status=409)
- res_dict = json.loads(res.body)
- assert res_dict['success'] is False
-
- def test_select_basic(self):
- query = 'SELECT * FROM "{0}"'.format(self.data['resource_id'])
- data = {'sql': query}
- postparams = json.dumps(data)
- auth = {'Authorization': str(self.normal_user.apikey)}
- res = self.app.post('/api/action/datastore_search_sql', params=postparams,
- extra_environ=auth)
- res_dict = json.loads(res.body)
- assert res_dict['success'] is True
- result = res_dict['result']
- assert len(result['records']) == len(self.expected_records)
- for (row_index, row) in enumerate(result['records']):
- expected_row = self.expected_records[row_index]
- assert set(row.keys()) == set(expected_row.keys())
- for field in row:
- if field == '_full_text':
- for ft_value in expected_row['_full_text']:
- assert ft_value in row['_full_text']
- else:
- assert row[field] == expected_row[field]
-
- # test alias search
- query = 'SELECT * FROM "{0}"'.format(self.data['aliases'])
- data = {'sql': query}
- postparams = json.dumps(data)
- res = self.app.post('/api/action/datastore_search_sql', params=postparams,
- extra_environ=auth)
- res_dict_alias = json.loads(res.body)
-
- assert result['records'] == res_dict_alias['result']['records']
-
def test_select_where_like_with_percent(self):
query = 'SELECT * FROM public."{0}" WHERE "author" LIKE \'tol%\''.format(self.data['resource_id'])
data = {'sql': query}
@@ -1095,3 +1038,133 @@ def test_not_authorized_to_access_system_tables(self):
res_dict = json.loads(res.body)
assert res_dict['success'] is False
assert res_dict['error']['__type'] == 'Authorization Error'
+
+
+class TestDatastoreSQLFunctional(DatastoreFunctionalTestBase):
+ def test_validates_sql_has_a_single_statement(self):
+ resource = factories.Resource()
+ data = {
+ 'resource_id': resource['id'],
+ 'force': True,
+ 'records': [
+ {'the year': 2014},
+ {'the year': 2013},
+ ],
+ }
+ helpers.call_action('datastore_create', **data)
+ sql = 'SELECT * FROM public."{0}"; SELECT * FROM public."{0}";' \
+ .format(resource['id'])
+ with assert_raises_regexp(p.toolkit.ValidationError,
+ 'Query is not a single statement'):
+ helpers.call_action('datastore_search_sql', sql=sql)
+
+ def test_works_with_semicolons_inside_strings(self):
+ resource = factories.Resource()
+ data = {
+ 'resource_id': resource['id'],
+ 'force': True,
+ 'records': [
+ {'author': 'bob'},
+ {'author': 'jane'},
+ ],
+ }
+ helpers.call_action('datastore_create', **data)
+ sql = 'SELECT * FROM public."{0}" WHERE "author" = \'foo; bar\'' \
+ .format(resource['id'])
+ helpers.call_action('datastore_search_sql', sql=sql)
+
+ def test_invalid_statement(self):
+ sql = 'SELECT ** FROM foobar'
+ with assert_raises_regexp(
+ logic.ValidationError, 'syntax error at or near "FROM"'):
+ helpers.call_action('datastore_search_sql', sql=sql)
+
+ def test_select_basic(self):
+ resource = factories.Resource()
+ data = {
+ 'resource_id': resource['id'],
+ 'force': True,
+ 'records': [
+ {u'b\xfck': 'annakarenina',
+ 'author': 'tolstoy',
+ 'published': '2005-03-01',
+ 'nested': ['b', {'moo': 'moo'}]},
+ {u'b\xfck': 'warandpeace',
+ 'author': 'tolstoy',
+ 'nested': {'a': 'b'}}
+ ],
+ }
+ expected_records = [{u'_full_text': [u"'annakarenina'", u"'b'",
+ u"'moo'", u"'tolstoy'",
+ u"'2005'"],
+ u'_id': 1,
+ u'author': u'tolstoy',
+ u'b\xfck': u'annakarenina',
+ u'nested': [u'b', {u'moo': u'moo'}],
+ u'published': u'2005-03-01T00:00:00'},
+ {u'_full_text': [u"'tolstoy'", u"'warandpeac'",
+ u"'b'"],
+ u'_id': 2,
+ u'author': u'tolstoy',
+ u'b\xfck': u'warandpeace',
+ u'nested': {u'a': u'b'},
+ u'published': None}]
+ helpers.call_action('datastore_create', **data)
+ sql = 'SELECT * FROM "{0}"'.format(resource['id'])
+ result = helpers.call_action('datastore_search_sql', sql=sql)
+ assert_equals(len(result['records']), 2)
+ for (row_index, row) in enumerate(result['records']):
+ expected_row = expected_records[row_index]
+ assert set(row.keys()) == set(expected_row.keys())
+ for field in row:
+ if field == '_full_text':
+ for ft_value in expected_row['_full_text']:
+ assert ft_value in row['_full_text']
+ else:
+ assert_equals(row[field], expected_row[field])
+ assert u'records_truncated' not in result
+
+ def test_alias_search(self):
+ resource = factories.Resource()
+ data = {
+ 'resource_id': resource['id'],
+ 'force': True,
+ 'aliases': 'books4',
+ 'records': [
+ {u'b\xfck': 'annakarenina',
+ 'author': 'tolstoy',
+ 'published': '2005-03-01',
+ 'nested': ['b', {'moo': 'moo'}]},
+ {u'b\xfck': 'warandpeace',
+ 'author': 'tolstoy',
+ 'nested': {'a': 'b'}}
+ ],
+ }
+ helpers.call_action('datastore_create', **data)
+ sql = 'SELECT * FROM "{0}"'.format(resource['id'])
+ result = helpers.call_action('datastore_search_sql', sql=sql)
+ sql = 'SELECT * FROM "books4"'
+ result_with_alias = helpers.call_action('datastore_search_sql',
+ sql=sql)
+ assert result['records'] == result_with_alias['records']
+
+ @helpers.change_config('ckan.datastore.search.rows_max', '2')
+ def test_search_limit(self):
+ resource = factories.Resource()
+ data = {
+ 'resource_id': resource['id'],
+ 'force': True,
+ 'records': [
+ {'the year': 2014},
+ {'the year': 2013},
+ {'the year': 2015},
+ {'the year': 2016},
+ ],
+ }
+ result = helpers.call_action('datastore_create', **data)
+ sql = 'SELECT * FROM "{0}"'.format(resource['id'])
+ result = helpers.call_action('datastore_search_sql', sql=sql)
+ assert_equals(len(result['records']), 2)
+ assert_equals([res[u'the year'] for res in result['records']],
+ [2014, 2013])
+ assert_equals(result[u'records_truncated'], True)
diff --git a/doc/maintaining/configuration.rst b/doc/maintaining/configuration.rst
index 9ff1b764080..9305d5943e4 100644
--- a/doc/maintaining/configuration.rst
+++ b/doc/maintaining/configuration.rst
@@ -270,6 +270,24 @@ Default value: ``True``
This option allows you to disable the datastore_search_sql action function, and
corresponding API endpoint if you do not wish it to be activated.
+.. _ckan.datastore.search.rows_max:
+
+ckan.datastore.search.rows_max
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Example::
+
+ ckan.datastore.search.rows_max = 1000000
+
+Default value: ``32000``
+
+Maximum allowed value for the number of rows returned by the datastore.
+
+Specifically this limits:
+
+* ``datastore_search``'s ``limit`` parameter.
+* ``datastore_search_sql`` queries have this limit inserted.
+
Site Settings
-------------