diff --git a/ckanext/datastore/db.py b/ckanext/datastore/db.py index 84ad63a8ec0..22e598e9a4a 100644 --- a/ckanext/datastore/db.py +++ b/ckanext/datastore/db.py @@ -90,12 +90,16 @@ def _is_valid_table_name(name): return _is_valid_field_name(name) -def _validate_int(i, field_name): +def _validate_int(i, field_name, non_negative=False): try: - int(i) + i = int(i) except ValueError: raise ValidationError({ - 'field_name': ['{0} is not an integer'.format(i)] + field_name: ['{0} is not an integer'.format(i)] + }) + if non_negative and i < 0: + raise ValidationError({ + field_name: ['{0} is not a non-negative integer'.format(i)] }) @@ -206,7 +210,7 @@ def _guess_type(field): try: datetime.datetime.strptime(field, format) return 'timestamp' - except ValueError: + except (ValueError, TypeError): continue return 'text' @@ -837,8 +841,8 @@ def search_data(context, data_dict): limit = data_dict.get('limit', 100) offset = data_dict.get('offset', 0) - _validate_int(limit, 'limit') - _validate_int(offset, 'offset') + _validate_int(limit, 'limit', non_negative=True) + _validate_int(offset, 'offset', non_negative=True) if 'limit' in data_dict: data_dict['limit'] = int(limit) diff --git a/ckanext/datastore/logic/action.py b/ckanext/datastore/logic/action.py index 76809f762ed..34774b83ca6 100644 --- a/ckanext/datastore/logic/action.py +++ b/ckanext/datastore/logic/action.py @@ -14,7 +14,9 @@ def datastore_create(context, data_dict): The datastore_create action allows a user to post JSON data to be stored against a resource. This endpoint also supports altering tables, - aliases and indexes and bulk insertion. + aliases and indexes and bulk insertion. This endpoint can be called multiple + times to ininially insert more data, add fields, change the aliases or indexes + as well as the primary keys. See :ref:`fields` and :ref:`records` for details on how to lay out records. @@ -31,16 +33,25 @@ def datastore_create(context, data_dict): :param indexes: indexes on table :type indexes: list or comma separated string - :returns: the newly created data object. + Please note that setting the ``aliases``, ``indexes`` or ``primary_key`` replaces the exising + aliases or constraints. Setting ``records`` appends the provided records to the resource. + + **Results:** + + :returns: The newly created data object. :rtype: dictionary + See :ref:`fields` and :ref:`records` for details on how to lay out records. + ''' model = _get_or_bust(context, 'model') - id = _get_or_bust(data_dict, 'resource_id') + if 'id' in data_dict: + data_dict['resource_id'] = data_dict['id'] + res_id = _get_or_bust(data_dict, 'resource_id') - if not model.Resource.get(id): + if not model.Resource.get(res_id): raise p.toolkit.ObjectNotFound(p.toolkit._( - 'Resource "{0}" was not found.'.format(id) + 'Resource "{0}" was not found.'.format(res_id) )) p.toolkit.check_access('datastore_create', context, data_dict) @@ -89,10 +100,14 @@ def datastore_upsert(context, data_dict): Possible options are: upsert (default), insert, update :type method: string - :returns: the newly created data object. + **Results:** + + :returns: The modified data object. :rtype: dictionary ''' + if 'id' in data_dict: + data_dict['resource_id'] = data_dict['id'] res_id = _get_or_bust(data_dict, 'resource_id') data_dict['connection_url'] = pylons.config['ckan.datastore.write_url'] @@ -124,10 +139,14 @@ def datastore_delete(context, data_dict): If missing delete whole table and all dependent views. :type filters: dictionary - :returns: original filters sent. + **Results:** + + :returns: Original filters sent. :rtype: dictionary ''' + if 'id' in data_dict: + data_dict['resource_id'] = data_dict['id'] res_id = _get_or_bust(data_dict, 'resource_id') data_dict['connection_url'] = pylons.config['ckan.datastore.write_url'] @@ -176,6 +195,12 @@ def datastore_search(context, data_dict): e.g.: "fieldname1, fieldname2 desc" :type sort: string + Setting the ``plain`` flag to false enables the entire PostgreSQL `full text search query language`_. + + A listing of all available resources can be found at the alias ``_table_metadata``. + + .. _full text search query language: http://www.postgresql.org/docs/9.1/static/datatype-textsearch.html#DATATYPE-TSQUERY + **Results:** The result of this action is a dict with the following keys: @@ -195,6 +220,8 @@ def datastore_search(context, data_dict): :type records: list of dictionaries ''' + if 'id' in data_dict: + data_dict['resource_id'] = data_dict['id'] res_id = _get_or_bust(data_dict, 'resource_id') data_dict['connection_url'] = pylons.config.get('ckan.datastore.read_url', @@ -224,7 +251,8 @@ def datastore_search_sql(context, data_dict): The datastore_search_sql action allows a user to search data in a resource or connect multiple resources with join expressions. The underlying SQL engine is the - `PostgreSQL engine `_ + `PostgreSQL engine `_. + There is an enforced timeout on SQL queries to avoid an unintended DOS. .. note:: This action is only available when using PostgreSQL 9.X and using a read-only user on the database. It is not available in :ref:`legacy mode`. diff --git a/ckanext/datastore/tests/test_search.py b/ckanext/datastore/tests/test_search.py index 1168159032b..6f80fd49955 100644 --- a/ckanext/datastore/tests/test_search.py +++ b/ckanext/datastore/tests/test_search.py @@ -203,6 +203,15 @@ def test_search_invalid_limit(self): res_dict = json.loads(res.body) assert res_dict['success'] is False + data = {'resource_id': self.data['resource_id'], + 'limit': -1} + postparams = '%s=1' % json.dumps(data) + auth = {'Authorization': str(self.sysadmin_user.apikey)} + res = self.app.post('/api/action/datastore_search', params=postparams, + extra_environ=auth, status=409) + res_dict = json.loads(res.body) + assert res_dict['success'] is False + def test_search_offset(self): data = {'resource_id': self.data['resource_id'], 'limit': 1, @@ -227,6 +236,15 @@ def test_search_invalid_offset(self): res_dict = json.loads(res.body) assert res_dict['success'] is False + data = {'resource_id': self.data['resource_id'], + 'offset': -1} + postparams = '%s=1' % json.dumps(data) + auth = {'Authorization': str(self.sysadmin_user.apikey)} + res = self.app.post('/api/action/datastore_search', params=postparams, + extra_environ=auth, status=409) + res_dict = json.loads(res.body) + assert res_dict['success'] is False + def test_search_full_text(self): data = {'resource_id': self.data['resource_id'], 'q': 'annakarenina'} diff --git a/doc/datastore-api.rst b/doc/datastore-api.rst index 9b723c5170d..efefaef5116 100644 --- a/doc/datastore-api.rst +++ b/doc/datastore-api.rst @@ -32,6 +32,7 @@ There are several endpoints into the DataStore API, they are: ``datastore_search_htsql()``, see :ref:`datastore_search_htsql` at ``http://{YOUR-CKAN-INSTALLATION}/api/3/action/datastore_search_htsql`` +To understand the differences between the three last API endpoints, see :ref:`comparison_querying`. API Reference ============= @@ -39,13 +40,11 @@ API Reference The datastore related API actions are accessed via CKAN's :ref:`action-api`. When POSTing requests, parameters should be provided as JSON objects. -.. note:: Lists can always be expressed in different ways. It is possible to use lists, comma separated strings or single items. These are valid lists: ``['foo', 'bar']``, ``'foo, bar'``, ``"foo", "bar"`` and ``'foo'``. +.. note:: Lists can always be expressed in different ways. It is possible to use lists, comma separated strings or single items. These are valid lists: ``['foo', 'bar']``, ``'foo, bar'``, ``"foo", "bar"`` and ``'foo'``. Additionally, there are several ways to define a boolean value. ``True``, ``on`` and ``1`` are all vaid boolean values. .. automodule:: ckanext.datastore.logic.action :members: - - .. _fields: Fields @@ -148,6 +147,7 @@ HTSQL Support The `ckanext-htsql `_ extension adds an API action that allows a user to search data in a resource using the `HTSQL `_ query expression language. Please refer to the extension documentation to know more. +.. _comparison_querying: Comparison of different querying methods ---------------------------------------- @@ -194,12 +194,14 @@ To the following endpoint: * Dataset Model Endpoint: ``http://{YOUR-CKAN-INSTALLATION}/api/rest/dataset`` -More details about creating a resource through the Data API are available on the :ref:`CKAN API page `. More information about the Datastore API can be found on the :doc:`datastore page `. +More details about creating a resource through the Data API are available on the :ref:`CKAN API page `. Examples -------- +.. note:: There is a special view that lists all available resources from the DataStore. It can be found at the alias ``_table_metadata``. + Some of the following commands require obtaining an :ref:`API Key `. cURL (or Browser) diff --git a/doc/datastore.rst b/doc/datastore.rst index c8388edea3b..374f21d9417 100644 --- a/doc/datastore.rst +++ b/doc/datastore.rst @@ -4,7 +4,8 @@ DataStore The CKAN DataStore provides a database for structured storage of data together with a powerful Web-accessible Data API, all seamlessly integrated into the CKAN -interface and authorization system. +interface and authorization system. At the same time, we kept the layer between the +underlying database and the user as thin as possible. The installation and set-up of the DataStore in outlined in :doc:`datastore-setup`.