From f8c807e73bfa9be85100677a19632ae3f85da154 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Tue, 4 Jun 2013 23:06:31 +0200 Subject: [PATCH 01/88] [#973] Make sure we always talk about datastore resource and not just resources, extend introduction to include motivation for the datastore --- ckanext/datastore/logic/action.py | 2 +- doc/data-viewer.rst | 1 + doc/datastore.rst | 51 +++++++++++++++++-------------- 3 files changed, 30 insertions(+), 24 deletions(-) diff --git a/ckanext/datastore/logic/action.py b/ckanext/datastore/logic/action.py index c811fc06204..ca4abe67aa9 100644 --- a/ckanext/datastore/logic/action.py +++ b/ckanext/datastore/logic/action.py @@ -273,7 +273,7 @@ def datastore_search_sql(context, data_dict): There is an enforced timeout on SQL queries to avoid an unintended DOS. .. note:: This action is only available when using PostgreSQL 9.X and using a read-only user on the database. - It is not available in :ref:`legacy mode`. + It is not available in :ref:`legacy mode`. :param sql: a single sql select statement :type sql: string diff --git a/doc/data-viewer.rst b/doc/data-viewer.rst index 2a446de4724..bbfa8500bcc 100644 --- a/doc/data-viewer.rst +++ b/doc/data-viewer.rst @@ -76,6 +76,7 @@ Note that these documents will be directly linked by the browser, so the way in which they are shown may vary. If you want to ensure for instance that XML based documents are correctly previewed, have a look at `Viewing highlighted XML, JSON and plain text data`_. +.. _data-explorer: Viewing structured data: the Data Explorer ------------------------------------------ diff --git a/doc/datastore.rst b/doc/datastore.rst index 734e8f1fb8f..1876d9d4a11 100644 --- a/doc/datastore.rst +++ b/doc/datastore.rst @@ -2,19 +2,24 @@ DataStore Extension =================== -.. todo:: +The CKAN DataStore provides an *ad hoc* database for structured storage of data together +with a powerful Web-accessible Data API, all seamlessly integrated into the CKAN +API and authorization system. At the same time, we kept the layer between the +underlying database and the user as thin as possible. - What features does the datastore actually provide that users care about? - Why would they want to use it? +The CKAN DataStore offers an API for reading, searching and filtering data without +the need to download the entire file first. The DataStore is an ad hoc database which +means that it is a collection of tables with unknown relationships. This allows +you to search in one DataStore resource (a *table* in the database) as well as queries +across DataStore resources. - - API for reading, writing data without downloading, uploading entire file - - Enables Recline previews - - API for searching data, including search across resources +Data can be written incrementally to the DataStore through the API. New data can be +inserted, existing data can be updated or deleted. You can also add a new column to +an existing table even if the DataStore resource already contains some data. -The CKAN DataStore provides a database for structured storage of data together -with a powerful Web-accessible Data API, all seamlessly integrated into the CKAN -interface and authorization system. At the same time, we kept the layer between the -underlying database and the user as thin as possible. +A DataStore resource can not be created on its own. It is always required to have an +associated CKAN resource. If data is stored in the DataStore, it will automatically be +previewed by the :ref:`recline preview extension `. .. contents:: :depth: 1 @@ -45,7 +50,7 @@ Setting up the DataStore DataStore on versions prior to 9.0 (for example 8.4). However, the :meth:`~ckanext.datastore.logic.action.datastore_search_sql` will not be available and the set-up is slightly different. Make sure, you read - :ref:`legacy_mode` for more details. + :ref:`legacy-mode` for more details. .. warning:: @@ -63,7 +68,7 @@ Add the ``datastore`` plugin to your CKAN config file:: .. warning:: Make sure that you follow the steps in `Set Permissions`_ below correctly. Wrong settings could lead to serious security issues. -The DataStore requires a separate PostgreSQL database to save the resources to. +The DataStore requires a separate PostgreSQL database to save the DataStore resources to. List existing databases:: @@ -118,7 +123,7 @@ Replace ``pass`` with the passwords you created for your |database_user| and Set Permissions --------------- -.. tip:: See :ref:`legacy_mode` if these steps continue to fail or seem too complicated for your set-up. However, keep in mind that the legacy mode is limited in its capabilities. +.. tip:: See :ref:`legacy-mode` if these steps continue to fail or seem too complicated for your set-up. However, keep in mind that the legacy mode is limited in its capabilities. Once the DataStore database and the users are created, the permissions on the DataStore and CKAN database have to be set. Since there are different set-ups, there are different ways of setting the permissions. Only **one** of the options should be used. @@ -170,19 +175,19 @@ Copy the ``set_permissions.sql`` file to the server that the database runs on. M ================== The DataStore is now set-up. To test the set-up, (re)start CKAN and run the -following command to list all resources that are in the DataStore:: +following command to list all DataStore resources:: curl -X GET "http://127.0.0.1:5000/api/3/action/datastore_search?resource_id=_table_metadata" This should return a JSON page without errors. -To test the whether the set-up allows writing, you can create a new resource in -the DataStore. To do so, run the following command:: +To test the whether the set-up allows writing, you can create a new DataStore resource. +To do so, run the following command:: curl -X POST http://127.0.0.1:5000/api/3/action/datastore_create -H "Authorization: {YOUR-API-KEY}" -d '{"resource_id": "{RESOURCE-ID}", "fields": [ {"id": "a"}, {"id": "b"} ], "records": [ { "a": 1, "b": "xyz"}, {"a": 2, "b": "zzz"} ]}' -Replace ``{YOUR-API-KEY}`` with a valid API key and ``{RESOURCE-ID}`` with a -resource id of an existing CKAN resource. +Replace ``{YOUR-API-KEY}`` with a valid API key and ``{RESOURCE-ID}`` with the +id of an existing CKAN resource. A table named after the resource id should have been created on your DataStore database. Visiting this URL should return a response from the DataStore with @@ -192,12 +197,12 @@ the records inserted above:: You can now delete the DataStore table with:: - curl -X POST http://127.0.0.1:5000/api/3/action/datastore_delete -H "Authorization: {YOUR-API-KEY}" -d '{"resource_id": "{RESOURCE-ID}"}' + curl -X POST http://127.0.0.1:5000/api/3/action/datastore_delete -H "Authorization: {YOUR-API-KEY}" -d '{"resource_id": "{RESOURCE-ID}"}' To find out more about the DataStore API, see `The DataStore API`_. -.. _legacy_mode: +.. _legacy-mode: Legacy mode: use the DataStore with old PostgreSQL versions =========================================================== @@ -278,7 +283,7 @@ Example:: Records ------- -A record is the data to be inserted in a table and is defined as follows:: +A record is the data to be inserted in a DataStore resource and is defined as follows:: { "": # data to be set @@ -332,7 +337,7 @@ You can find more information about the formatting of dates in the `date/time ty .. _date/time types section of the PostgreSQL documentation: http://www.postgresql.org/docs/9.1/static/datatype-datetime.html -.. _resource_aliases: +.. _resource-aliases: Resource aliases ---------------- @@ -372,7 +377,7 @@ Internal structure of the database The DataStore is a thin layer on top of a PostgreSQL database. Each DataStore resource belongs to a CKAN resource. The name of a table in the DataStore is always the resource id of the CKAN resource for the data. -As explained in :ref:`resource_aliases`, a resource can have mnemonic aliases which are stored as views in the database. +As explained in :ref:`resource-aliases`, a resource can have mnemonic aliases which are stored as views in the database. All aliases (views) and resources (tables respectively relations) of the DataStore can be found in a special view called ``_table_metadata``. To access the list, open ``http://{YOUR-CKAN-INSTALLATION}/api/3/action/datastore_search?resource_id=_table_metadata``. From 8d03df644d331ea4a84d22bcf311a9c5594397d9 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Wed, 12 Jun 2013 19:32:09 +0200 Subject: [PATCH 02/88] [#981] Allow users to create a ckan resource and a datastore resource with one call --- ckanext/datastore/logic/action.py | 29 ++++++++++++++++++++++ ckanext/datastore/logic/schema.py | 4 +++- ckanext/datastore/plugin.py | 1 - ckanext/datastore/tests/test_create.py | 33 ++++++++++++++++++++++++++ 4 files changed, 65 insertions(+), 2 deletions(-) diff --git a/ckanext/datastore/logic/action.py b/ckanext/datastore/logic/action.py index 9240e642dee..08e4862bc49 100644 --- a/ckanext/datastore/logic/action.py +++ b/ckanext/datastore/logic/action.py @@ -6,6 +6,7 @@ import ckan.lib.navl.dictization_functions import ckan.logic as logic import ckan.plugins as p +import ckan.lib.helpers as h import ckanext.datastore.db as db import ckanext.datastore.logic.schema as dsschema @@ -25,10 +26,16 @@ def datastore_create(context, data_dict): times to initially insert more data, add fields, change the aliases or indexes as well as the primary keys. + To create a datastore resource and a CKAN resource at the same time, + provide a valid ``package_id`` and omit the ``resource_id``. + See :ref:`fields` and :ref:`records` for details on how to lay out records. :param resource_id: resource id that the data is going to be stored against. :type resource_id: string + :param package_id: package in which a new resource will be crated. + use instead of ``resource_id``. + :type package_id: string :param aliases: names for read only aliases of the resource. :type aliases: list or comma separated string :param fields: fields/columns and their extra metadata. @@ -61,6 +68,28 @@ def datastore_create(context, data_dict): p.toolkit.check_access('datastore_create', context, data_dict) + if 'package_id' in data_dict and 'resource_id' in data_dict: + raise p.toolkit.ValidationError({ + 'resource_id': ['package_id cannot be used with resource_id'] + }) + + if not 'package_id' in data_dict and not 'resource_id' in data_dict: + raise p.toolkit.ValidationError({ + 'resource_id': ['resource_id or package_id required'] + }) + + # create ckan resource if package_id was provided + if 'package_id' in data_dict: + res = p.toolkit.get_action('resource_create')(context, { + 'package_id': data_dict['package_id'], + 'url': '_tmp' + }) + res['url'] = h.url_for( + controller='ckanext.datastore.controller:DatastoreController', + action='dump', resource_id=res['id']) + p.toolkit.get_action('resource_update')(context, res) + data_dict['resource_id'] = res['id'] + data_dict['connection_url'] = pylons.config['ckan.datastore.write_url'] # validate aliases diff --git a/ckanext/datastore/logic/schema.py b/ckanext/datastore/logic/schema.py index 3c4e0d95986..824f7cbf1fd 100644 --- a/ckanext/datastore/logic/schema.py +++ b/ckanext/datastore/logic/schema.py @@ -8,6 +8,7 @@ not_missing = get_validator('not_missing') not_empty = get_validator('not_empty') resource_id_exists = get_validator('resource_id_exists') +package_id_exists = get_validator('package_id_exists') ignore_missing = get_validator('ignore_missing') empty = get_validator('empty') boolean_validator = get_validator('boolean_validator') @@ -66,7 +67,8 @@ def json_validator(value, context): def datastore_create_schema(): schema = { - 'resource_id': [not_missing, unicode, resource_id_exists], + 'resource_id': [ignore_missing, unicode, resource_id_exists], + 'package_id': [ignore_missing, unicode, package_id_exists], 'id': [ignore_missing], 'aliases': [ignore_missing, list_of_strings_or_string], 'fields': { diff --git a/ckanext/datastore/plugin.py b/ckanext/datastore/plugin.py index 820419a4a81..0f2658a8749 100644 --- a/ckanext/datastore/plugin.py +++ b/ckanext/datastore/plugin.py @@ -248,7 +248,6 @@ def get_auth_functions(self): 'datastore_change_permissions': auth.datastore_change_permissions} def before_map(self, m): - print "Load mapping" m.connect('/datastore/dump/{resource_id}', controller='ckanext.datastore.controller:DatastoreController', action='dump') diff --git a/ckanext/datastore/tests/test_create.py b/ckanext/datastore/tests/test_create.py index 02d38cd1824..5eeca324bc6 100644 --- a/ckanext/datastore/tests/test_create.py +++ b/ckanext/datastore/tests/test_create.py @@ -525,6 +525,39 @@ def test_create_basic(self): assert res_dict['success'] is True, res_dict + def test_create_ckan_resource_in_package(self): + package = model.Package.get('annakarenina') + data = { + 'package_id': package.id + } + postparams = '%s=1' % json.dumps(data) + auth = {'Authorization': str(self.sysadmin_user.apikey)} + res = self.app.post('/api/action/datastore_create', params=postparams, + extra_environ=auth, status=200) + res_dict = json.loads(res.body) + + assert 'resource_id' in res_dict['result'] + assert len(model.Package.get('annakarenina').resources) == 3 + + res = tests.call_action_api( + self.app, 'resource_show', id=res_dict['result']['resource_id']) + assert res['url'] == '/datastore/dump/' + res['id'], res + + def test_cant_provide_resource_and_package_id(self): + package = model.Package.get('annakarenina') + resource = package.resources[0] + data = { + 'resource_id': resource.id, + 'package_id': package.id + } + postparams = '%s=1' % json.dumps(data) + auth = {'Authorization': str(self.sysadmin_user.apikey)} + res = self.app.post('/api/action/datastore_create', params=postparams, + extra_environ=auth, status=409) + res_dict = json.loads(res.body) + + assert res_dict['error']['__type'] == 'Validation Error' + def test_guess_types(self): resource = model.Package.get('annakarenina').resources[1] From 397507fcb256d4a74fe5c7d2b35aa62b1740fc10 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Wed, 12 Jun 2013 20:09:05 +0200 Subject: [PATCH 03/88] [#981] Define datapusher_submit action --- ckanext/datastore/logic/action.py | 14 ++++++++++++++ ckanext/datastore/plugin.py | 3 ++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/ckanext/datastore/logic/action.py b/ckanext/datastore/logic/action.py index 08e4862bc49..d01a6aa658f 100644 --- a/ckanext/datastore/logic/action.py +++ b/ckanext/datastore/logic/action.py @@ -411,6 +411,20 @@ def datastore_make_public(context, data_dict): db.make_public(context, data_dict) +def datapusher_submit(context, data_dict): + ''' Submit a job to the datapusher. The datapusher is a service that + imports tabular data into the datastore. + + :param resource_id: The resource id of the resource that the data + should be imported in. The resource's URL will be used to get the data. + :type resource_id: string + :param set_url_to_dump: If set to true, the URL of the resource will be set + to the :ref:`datastore dump ` URL after the data has been imported. + :type set_url_to_dump: boolean + ''' + pass + + def _resource_exists(context, data_dict): # Returns true if the resource exists in CKAN and in the datastore model = _get_or_bust(context, 'model') diff --git a/ckanext/datastore/plugin.py b/ckanext/datastore/plugin.py index 0f2658a8749..52ca16a468c 100644 --- a/ckanext/datastore/plugin.py +++ b/ckanext/datastore/plugin.py @@ -232,7 +232,8 @@ def get_actions(self): actions = {'datastore_create': action.datastore_create, 'datastore_upsert': action.datastore_upsert, 'datastore_delete': action.datastore_delete, - 'datastore_search': action.datastore_search} + 'datastore_search': action.datastore_search, + 'datapusher_submit': action.datapusher_submit} if not self.legacy_mode: actions.update({ 'datastore_search_sql': action.datastore_search_sql, From 701aa743db6674094c66cf4a3e8cdba08b01faa6 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Wed, 12 Jun 2013 20:11:42 +0200 Subject: [PATCH 04/88] [#981] Disable test until #547 resolves issues with routes in plugins --- ckanext/datastore/tests/test_create.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ckanext/datastore/tests/test_create.py b/ckanext/datastore/tests/test_create.py index 5eeca324bc6..8f6efe36cfa 100644 --- a/ckanext/datastore/tests/test_create.py +++ b/ckanext/datastore/tests/test_create.py @@ -541,7 +541,8 @@ def test_create_ckan_resource_in_package(self): res = tests.call_action_api( self.app, 'resource_show', id=res_dict['result']['resource_id']) - assert res['url'] == '/datastore/dump/' + res['id'], res + # disabled until #547 fixes problems with the plugins in tests + #assert res['url'] == '/datastore/dump/' + res['id'], res def test_cant_provide_resource_and_package_id(self): package = model.Package.get('annakarenina') From db00f0970c2731d8c096156bb0b7a797c204aaf2 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Tue, 2 Jul 2013 16:17:33 -0400 Subject: [PATCH 05/88] [#1078] use SOLR data_dict instead of package_dictize when possible --- ckan/logic/action/get.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 829b6cc5f65..c5c14e0c438 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -752,7 +752,10 @@ def package_show(context, data_dict): _check_access('package_show', context, data_dict) - package_dict = model_dictize.package_dictize(pkg, context) + if 'revision_id' not in context and 'revision_date' not in context: + package_dict = json.loads(search.show(name_or_id)['data_dict']) + else: + package_dict = model_dictize.package_dictize(pkg, context) for item in plugins.PluginImplementations(plugins.IPackageController): item.read(pkg) From df8cdd08e564e0106b9f3fc00664d2c3c0e257fa Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Tue, 2 Jul 2013 17:44:45 -0400 Subject: [PATCH 06/88] [#1078] fall back to package_dictize on SearchError --- ckan/logic/action/get.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index c5c14e0c438..c3673cc2c00 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -752,9 +752,13 @@ def package_show(context, data_dict): _check_access('package_show', context, data_dict) + package_dict = None if 'revision_id' not in context and 'revision_date' not in context: - package_dict = json.loads(search.show(name_or_id)['data_dict']) - else: + try: + package_dict = json.loads(search.show(name_or_id)['data_dict']) + except search.SearchError: + pass + if not package_dict: package_dict = model_dictize.package_dictize(pkg, context) for item in plugins.PluginImplementations(plugins.IPackageController): From e55380470036ac85c1ff78281835744266363a5e Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Wed, 3 Jul 2013 14:51:14 -0400 Subject: [PATCH 07/88] [#1078] treat socket errors as solr lookup failure --- ckan/logic/action/get.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index c3673cc2c00..d5f4f189106 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -4,6 +4,7 @@ import logging import json import datetime +import socket from pylons import config import sqlalchemy @@ -756,7 +757,7 @@ def package_show(context, data_dict): if 'revision_id' not in context and 'revision_date' not in context: try: package_dict = json.loads(search.show(name_or_id)['data_dict']) - except search.SearchError: + except (search.SearchError, socket.error): pass if not package_dict: package_dict = model_dictize.package_dictize(pkg, context) From c07c79baaee97513234ffcd41c383bffa2c6f87d Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Wed, 3 Jul 2013 16:42:21 -0400 Subject: [PATCH 08/88] [#1078] don't use cache if schema passed --- ckan/logic/action/get.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index d5f4f189106..cc07cf9c07b 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -754,7 +754,8 @@ def package_show(context, data_dict): _check_access('package_show', context, data_dict) package_dict = None - if 'revision_id' not in context and 'revision_date' not in context: + no_cache_context = ['revision_id', 'revision_date', 'schema'] + if not any(k in context for k in no_cache_context): try: package_dict = json.loads(search.show(name_or_id)['data_dict']) except (search.SearchError, socket.error): From 484947a24d36c4c98e1589e9c2b34c2459133870 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Mon, 8 Jul 2013 13:58:58 -0400 Subject: [PATCH 09/88] [#1078] don't use cache if different revision --- ckan/logic/action/get.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index cc07cf9c07b..49b155c65dc 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -758,6 +758,8 @@ def package_show(context, data_dict): if not any(k in context for k in no_cache_context): try: package_dict = json.loads(search.show(name_or_id)['data_dict']) + if pkg.revision_id != package_dict.get('revision_id'): + package_dict = None except (search.SearchError, socket.error): pass if not package_dict: From 879b9376005177e1805ba305a583e771fbe7f17f Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Mon, 8 Jul 2013 17:29:20 -0400 Subject: [PATCH 10/88] [#1078] don't use cache when indexing --- ckan/lib/search/__init__.py | 15 +++++++-------- ckan/logic/action/get.py | 4 +++- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/ckan/lib/search/__init__.py b/ckan/lib/search/__init__.py index d7afc157e3a..1d6762d68a4 100644 --- a/ckan/lib/search/__init__.py +++ b/ckan/lib/search/__init__.py @@ -123,7 +123,8 @@ def notify(self, entity, operation): dispatch_by_operation( entity.__class__.__name__, logic.get_action('package_show')( - {'model': model, 'ignore_auth': True, 'validate': False}, + {'model': model, 'ignore_auth': True, 'validate': False, + 'use_cache': False}, {'id': entity.id}), operation ) @@ -147,18 +148,18 @@ def rebuild(package_id=None, only_missing=False, force=False, refresh=False, def log.info("Rebuilding search index...") package_index = index_for(model.Package) + context = {'model': model, 'ignore_auth': True, 'validate': False, + 'use_cache': False} if package_id: - pkg_dict = logic.get_action('package_show')( - {'model': model, 'ignore_auth': True, 'validate': False}, + pkg_dict = logic.get_action('package_show')(context, {'id': package_id}) log.info('Indexing just package %r...', pkg_dict['name']) package_index.remove_dict(pkg_dict) package_index.insert_dict(pkg_dict) elif package_ids: for package_id in package_ids: - pkg_dict = logic.get_action('package_show')( - {'model': model, 'ignore_auth': True, 'validate': False}, + pkg_dict = logic.get_action('package_show')(context, {'id': package_id}) log.info('Indexing just package %r...', pkg_dict['name']) package_index.update_dict(pkg_dict, True) @@ -185,9 +186,7 @@ def rebuild(package_id=None, only_missing=False, force=False, refresh=False, def for pkg_id in package_ids: try: package_index.update_dict( - logic.get_action('package_show')( - {'model': model, 'ignore_auth': True, - 'validate': False}, + logic.get_action('package_show')(context, {'id': pkg_id} ), defer_commit diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 49b155c65dc..8d24ab9719b 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -755,7 +755,9 @@ def package_show(context, data_dict): package_dict = None no_cache_context = ['revision_id', 'revision_date', 'schema'] - if not any(k in context for k in no_cache_context): + use_cache = (context.get('use_cache', True) + and not any(k in context for k in no_cache_context)) + if use_cache: try: package_dict = json.loads(search.show(name_or_id)['data_dict']) if pkg.revision_id != package_dict.get('revision_id'): From db77781b1ab5bc72f89f3a40d6e2256535e581c3 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Mon, 8 Jul 2013 18:19:16 -0400 Subject: [PATCH 11/88] [#1078] fix for before_view with cached dataset --- ckan/logic/action/get.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 8d24ab9719b..859f1b82507 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -764,8 +764,12 @@ def package_show(context, data_dict): package_dict = None except (search.SearchError, socket.error): pass + if not package_dict: package_dict = model_dictize.package_dictize(pkg, context) + elif context.get('for_view'): + for item in plugins.PluginImplementations(plugins.IPackageController): + package_dict = item.before_view(package_dict) for item in plugins.PluginImplementations(plugins.IPackageController): item.read(pkg) From 557b028b08e1054b41d9fbae7550598b08fe5f09 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Tue, 9 Jul 2013 09:18:06 -0400 Subject: [PATCH 12/88] [#1078] move before_view plugin from dictize to action, feels more consistent --- ckan/lib/dictization/model_dictize.py | 4 ---- ckan/logic/action/get.py | 3 ++- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/ckan/lib/dictization/model_dictize.py b/ckan/lib/dictization/model_dictize.py index 3bb20695820..e0b0c66a871 100644 --- a/ckan/lib/dictization/model_dictize.py +++ b/ckan/lib/dictization/model_dictize.py @@ -300,10 +300,6 @@ def package_dictize(pkg, context): result_dict['metadata_created'] = pkg.metadata_created.isoformat() \ if pkg.metadata_created else None - if context.get('for_view'): - for item in plugins.PluginImplementations( plugins.IPackageController): - result_dict = item.before_view(result_dict) - return result_dict def _get_members(context, group, member_type): diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 859f1b82507..e4e666af067 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -767,7 +767,8 @@ def package_show(context, data_dict): if not package_dict: package_dict = model_dictize.package_dictize(pkg, context) - elif context.get('for_view'): + + if context.get('for_view'): for item in plugins.PluginImplementations(plugins.IPackageController): package_dict = item.before_view(package_dict) From 9af97400264eb7030b08733cc76246dfd1d100b9 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Tue, 2 Jul 2013 17:52:41 -0400 Subject: [PATCH 13/88] [#1078] store show_package_schema-validated data_dict in SOLR --- ckan/config/solr/schema-2.0.xml | 1 + ckan/lib/search/index.py | 13 ++++++++++++ ckan/logic/action/get.py | 30 ++++++++++++++++++---------- ckanext/multilingual/solr/schema.xml | 1 + 4 files changed, 35 insertions(+), 10 deletions(-) diff --git a/ckan/config/solr/schema-2.0.xml b/ckan/config/solr/schema-2.0.xml index 7e681738c5b..26b8f0dedd6 100644 --- a/ckan/config/solr/schema-2.0.xml +++ b/ckan/config/solr/schema-2.0.xml @@ -129,6 +129,7 @@ + diff --git a/ckan/lib/search/index.py b/ckan/lib/search/index.py index 21b301e6f6d..bc654006de7 100644 --- a/ckan/lib/search/index.py +++ b/ckan/lib/search/index.py @@ -18,9 +18,13 @@ from ckan.plugins import (PluginImplementations, IPackageController) import ckan.logic as logic +import ckan.lib.plugins as lib_plugins +import ckan.lib.navl.dictization_functions log = logging.getLogger(__name__) +_validate = ckan.lib.navl.dictization_functions.validate + TYPE_FIELD = "entity_type" PACKAGE_TYPE = "package" KEY_CHARS = string.digits + string.letters + "_-" @@ -102,8 +106,17 @@ def update_dict(self, pkg_dict, defer_commit=False): def index_package(self, pkg_dict, defer_commit=False): if pkg_dict is None: return + pkg_dict['data_dict'] = json.dumps(pkg_dict) + if config.get('ckan.cache_validated_datasets', True): + package_plugin = lib_plugins.lookup_package_plugin(pkg_dict['type']) + + schema = package_plugin.show_package_schema() + validated_pkg_dict, errors = _validate(pkg_dict, schema, { + 'model': model, 'session': model.Session}) + pkg_dict['validated_data_dict'] = json.dumps(validated_pkg_dict) + # add to string field for sorting title = pkg_dict.get('title') if title: diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index e4e666af067..56512bf040d 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -759,14 +759,22 @@ def package_show(context, data_dict): and not any(k in context for k in no_cache_context)) if use_cache: try: - package_dict = json.loads(search.show(name_or_id)['data_dict']) - if pkg.revision_id != package_dict.get('revision_id'): - package_dict = None + search_result = search.show(name_or_id) except (search.SearchError, socket.error): pass + else: + if 'validated_data_dict' in search_result: + package_dict = json.loads(search_result['validated_data_dict']) + package_dict_validated = True + else: + package_dict = json.loads(search_result['data_dict']) + package_dict_validated = False + if pkg.revision_id != package_dict.get('revision_id'): + package_dict = None if not package_dict: package_dict = model_dictize.package_dictize(pkg, context) + package_dict_validated = False if context.get('for_view'): for item in plugins.PluginImplementations(plugins.IPackageController): @@ -775,14 +783,16 @@ def package_show(context, data_dict): for item in plugins.PluginImplementations(plugins.IPackageController): item.read(pkg) - package_plugin = lib_plugins.lookup_package_plugin(package_dict['type']) - if 'schema' in context: - schema = context['schema'] - else: - schema = package_plugin.show_package_schema() + if not package_dict_validated: + package_plugin = lib_plugins.lookup_package_plugin(package_dict['type']) + if 'schema' in context: + schema = context['schema'] + else: + schema = package_plugin.show_package_schema() - if schema and context.get('validate', True): - package_dict, errors = _validate(package_dict, schema, context=context) + if schema and context.get('validate', True): + package_dict, errors = _validate(package_dict, schema, + context=context) for item in plugins.PluginImplementations(plugins.IPackageController): item.after_show(context, package_dict) diff --git a/ckanext/multilingual/solr/schema.xml b/ckanext/multilingual/solr/schema.xml index 82f3e40769b..10f08696681 100644 --- a/ckanext/multilingual/solr/schema.xml +++ b/ckanext/multilingual/solr/schema.xml @@ -436,6 +436,7 @@ + From a00e218c50bf5027de9a4bcaeda1b683094da92f Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Tue, 9 Jul 2013 11:08:33 -0400 Subject: [PATCH 14/88] [#1078] package index: treat missing type as default type --- ckan/lib/search/index.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ckan/lib/search/index.py b/ckan/lib/search/index.py index bc654006de7..5b73264648f 100644 --- a/ckan/lib/search/index.py +++ b/ckan/lib/search/index.py @@ -110,7 +110,8 @@ def index_package(self, pkg_dict, defer_commit=False): pkg_dict['data_dict'] = json.dumps(pkg_dict) if config.get('ckan.cache_validated_datasets', True): - package_plugin = lib_plugins.lookup_package_plugin(pkg_dict['type']) + package_plugin = lib_plugins.lookup_package_plugin( + pkg_dict.get('type')) schema = package_plugin.show_package_schema() validated_pkg_dict, errors = _validate(pkg_dict, schema, { From 458283baf64f510e8391a149e9728849300c520b Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Tue, 9 Jul 2013 13:41:59 -0400 Subject: [PATCH 15/88] [#1078] validated data dict store missing as null --- ckan/lib/navl/dictization_functions.py | 9 +++++++++ ckan/lib/search/index.py | 3 ++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/ckan/lib/navl/dictization_functions.py b/ckan/lib/navl/dictization_functions.py index 5eadebdc30b..e05ad851287 100644 --- a/ckan/lib/navl/dictization_functions.py +++ b/ckan/lib/navl/dictization_functions.py @@ -1,6 +1,7 @@ import copy import formencode as fe import inspect +import json from pylons import config from ckan.common import _ @@ -402,3 +403,11 @@ def unflatten(data): unflattened[key] = [unflattened[key][s] for s in sorted(unflattened[key])] return unflattened + + +class MissingNullEncoder(json.JSONEncoder): + '''json encoder that treats missing objects as null''' + def default(self, obj): + if isinstance(obj, Missing): + return None + return json.JSONEncoder.default(self, obj) diff --git a/ckan/lib/search/index.py b/ckan/lib/search/index.py index 5b73264648f..1f6d8bed8ae 100644 --- a/ckan/lib/search/index.py +++ b/ckan/lib/search/index.py @@ -116,7 +116,8 @@ def index_package(self, pkg_dict, defer_commit=False): schema = package_plugin.show_package_schema() validated_pkg_dict, errors = _validate(pkg_dict, schema, { 'model': model, 'session': model.Session}) - pkg_dict['validated_data_dict'] = json.dumps(validated_pkg_dict) + pkg_dict['validated_data_dict'] = json.dumps(validated_pkg_dict, + cls=ckan.lib.navl.dictization_functions.MissingNullEncoder) # add to string field for sorting title = pkg_dict.get('title') From f239d8412a70c7c4bccb7cbeac06648e412874bd Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Tue, 9 Jul 2013 13:53:54 -0400 Subject: [PATCH 16/88] [#1078] data_dict may still be used when schema passed --- ckan/logic/action/get.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 56512bf040d..fc8eee34bf7 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -754,16 +754,17 @@ def package_show(context, data_dict): _check_access('package_show', context, data_dict) package_dict = None - no_cache_context = ['revision_id', 'revision_date', 'schema'] use_cache = (context.get('use_cache', True) - and not any(k in context for k in no_cache_context)) + and not 'revision_id' in context + and not 'revision_date' in context) if use_cache: try: search_result = search.show(name_or_id) except (search.SearchError, socket.error): pass else: - if 'validated_data_dict' in search_result: + use_validated_cache = 'schema' not in context + if use_validated_cache and 'validated_data_dict' in search_result: package_dict = json.loads(search_result['validated_data_dict']) package_dict_validated = True else: From 0b4bef130b9c9efc1ec57789d9396f8ac55c4a5e Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Wed, 17 Jul 2013 11:48:18 +0200 Subject: [PATCH 17/88] [#973] Improve references in data viewer docs --- doc/data-viewer.rst | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/doc/data-viewer.rst b/doc/data-viewer.rst index bbfa8500bcc..51494c99593 100644 --- a/doc/data-viewer.rst +++ b/doc/data-viewer.rst @@ -19,11 +19,11 @@ use a custom widget. The data preview functionality that is provided by CKAN is described in the following sections: -* `Viewing images and text files`_ -* `Viewing structured data: the Data Explorer`_ -* `Viewing JSON data`_ -* `Viewing PDF documents`_ -* `Embedding Previews In Other Web Pages`_ +* :ref:`image-preview` +* :ref:`data-explorer` +* :ref:`text-preview` +* :ref:`pdf-preview` +* :ref:`embed-previews` These sections list the resource formats that each extension can preview and provide instructions for how to enable each extension. @@ -33,6 +33,8 @@ For more information on this topic see `Writing Extensions `_. +.. _image-preview: + Viewing images and text files ----------------------------- @@ -76,6 +78,7 @@ Note that these documents will be directly linked by the browser, so the way in which they are shown may vary. If you want to ensure for instance that XML based documents are correctly previewed, have a look at `Viewing highlighted XML, JSON and plain text data`_. + .. _data-explorer: Viewing structured data: the Data Explorer @@ -113,6 +116,8 @@ Or: reliable than viewing data that is in the DataStore. +.. _text-preview: + Viewing highlighted XML, JSON and plain text data ------------------------------------------------- @@ -146,6 +151,8 @@ enabled as well `same origin policy `_). +.. _pdf-preview: + Viewing PDF documents --------------------- @@ -161,6 +168,8 @@ the `Filestore `_) as well as any external ``pdf`` documents. This extension uses Mozilla's `pdf.js `_ library. +.. _embed-previews: + Embedding Previews In Other Web Pages ------------------------------------- From 668d1991ae9a21b2150a3efa049baada8b56e705 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Wed, 17 Jul 2013 12:07:26 +0200 Subject: [PATCH 18/88] [#973] Restructured Datastore description. Introduction is much simpler and shorter. --- doc/datastore.rst | 45 +++++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/doc/datastore.rst b/doc/datastore.rst index 1876d9d4a11..dfa8cb3e871 100644 --- a/doc/datastore.rst +++ b/doc/datastore.rst @@ -2,24 +2,18 @@ DataStore Extension =================== -The CKAN DataStore provides an *ad hoc* database for structured storage of data together -with a powerful Web-accessible Data API, all seamlessly integrated into the CKAN -API and authorization system. At the same time, we kept the layer between the -underlying database and the user as thin as possible. -The CKAN DataStore offers an API for reading, searching and filtering data without -the need to download the entire file first. The DataStore is an ad hoc database which -means that it is a collection of tables with unknown relationships. This allows -you to search in one DataStore resource (a *table* in the database) as well as queries -across DataStore resources. +The CKAN DataStore extension provides an *ad hoc* database for storage of structured data from +CKAN resources. Data can be pulled out of resource files and stored in +the DataStore. -Data can be written incrementally to the DataStore through the API. New data can be -inserted, existing data can be updated or deleted. You can also add a new column to -an existing table even if the DataStore resource already contains some data. +When a resource is added to the DataStore, you get: -A DataStore resource can not be created on its own. It is always required to have an -associated CKAN resource. If data is stored in the DataStore, it will automatically be -previewed by the :ref:`recline preview extension `. +* Automatic data previews on the resource's page, using the :ref:`Data Explorer extension ` +* The `The DataStore API`_: search, filter and update the data, without having to download + and upload the entire data file + +The DataStore is integrated into the :doc:`CKAN API ` and authorization system. .. contents:: :depth: 1 @@ -229,9 +223,24 @@ There is no need for a read-only user or special permissions. Therefore the lega The DataStore API ----------------- -The DataStore API allows tabular data to be stored inside CKAN quickly and -easily. Each resource in a CKAN instance can have an associated DataStore -table. The API for using the DataStore is outlined below. +The CKAN DataStore offers an API for reading, searching and filtering data without +the need to download the entire file first. The DataStore is an ad hoc database which +means that it is a collection of tables with unknown relationships. This allows +you to search in one DataStore resource (a *table* in the database) as well as queries +across DataStore resources. + +Data can be written incrementally to the DataStore through the API. New data can be +inserted, existing data can be updated or deleted. You can also add a new column to +an existing table even if the DataStore resource already contains some data. + +You will notice that we tried to keep the layer between the underlying PostgreSQL +database and the API as thin as possible to allow you to use the features you would +expect from a powerful database management system. + +A DataStore resource can not be created on its own. It is always required to have an +associated CKAN resource. If data is stored in the DataStore, it will automatically be +previewed by the :ref:`recline preview extension `. + Making a DataStore API Request ============================== From e1ccc77c87016caeb37365f765d84be95a84c1f5 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Wed, 17 Jul 2013 12:12:44 +0200 Subject: [PATCH 19/88] [#973] Fix merge --- doc/data-viewer.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/data-viewer.rst b/doc/data-viewer.rst index af88cc9f701..50f9169ec22 100644 --- a/doc/data-viewer.rst +++ b/doc/data-viewer.rst @@ -144,7 +144,7 @@ set to one of the resource formats from above (case insensitive). .. seealso:: - :ref:`The resourceproxy extension ` + :ref:`The resourceproxy extension ` If you want to preview linked-to text files (and not only files that have been uploaded to CKAN) you need to enable the ``resource_proxy`` extension @@ -168,7 +168,7 @@ have been added to a CKAN instance. This extension uses Mozilla's `pdf.js .. seealso:: - :ref:`The resourceproxy extension ` + :ref:`The resourceproxy extension ` If you want to preview linked-to PDF files (and not only files that have been uploaded to CKAN) you need to enable the ``resource_proxy`` extension From 9568bb30408b31a6fabc1c520386bda3e46a8497 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Wed, 17 Jul 2013 14:43:48 +0200 Subject: [PATCH 20/88] [#1106] Don't accept invalid URLs in resource proxy --- ckanext/resourceproxy/controller.py | 7 ++++++- ckanext/resourceproxy/tests/test_proxy.py | 12 +++++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/ckanext/resourceproxy/controller.py b/ckanext/resourceproxy/controller.py index 81a8123e3b1..0e8fd9bfd11 100644 --- a/ckanext/resourceproxy/controller.py +++ b/ckanext/resourceproxy/controller.py @@ -1,4 +1,5 @@ from logging import getLogger +import urlparse import requests @@ -15,13 +16,17 @@ def proxy_resource(context, data_dict): ''' Chunked proxy for resources. To make sure that the file is not too large, first, we try to get the content length from the headers. If the headers to not contain a content length (if it is a chinked - response), we only transfer as long as the transfered data is less + response), we only transfer as long as the transferred data is less than the maximum file size. ''' resource_id = data_dict['resource_id'] log.info('Proxify resource {id}'.format(id=resource_id)) resource = logic.get_action('resource_show')(context, {'id': resource_id}) url = resource['url'] + parts = urlparse.urlsplit(url) + if not parts.scheme or not parts.netloc: + base.abort(409, detail='Invalid URL.') + try: # first we try a HEAD request which may not be supported did_get = False diff --git a/ckanext/resourceproxy/tests/test_proxy.py b/ckanext/resourceproxy/tests/test_proxy.py index 83aa033e7c4..419c75440d8 100644 --- a/ckanext/resourceproxy/tests/test_proxy.py +++ b/ckanext/resourceproxy/tests/test_proxy.py @@ -130,7 +130,17 @@ def test_large_file_streaming(self): assert result.status == 409, result.status assert 'too large' in result.body, result.body - def test_resource_proxy_non_existent(self): + @httpretty.activate + def test_invalid_url(self): + self.data_dict = set_resource_url('javascript:downloadFile(foo)') + + proxied_url = proxy.get_proxified_resource_url(self.data_dict) + result = self.app.get(proxied_url, status='*') + assert result.status == 409, result.status + assert 'Invalid URL' in result.body, result.body + + + def test_non_existent_url(self): self.data_dict = set_resource_url('http://foo.bar') def f1(): From 624c91bc7212ad6437b6b578279fc3081ccd224c Mon Sep 17 00:00:00 2001 From: John Martin Date: Thu, 18 Jul 2013 13:52:40 +0100 Subject: [PATCH 21/88] [#518] Adds date-time tooltip to the activity time --- ckan/templates/snippets/activity_item.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckan/templates/snippets/activity_item.html b/ckan/templates/snippets/activity_item.html index 1923dee083c..b90d8621918 100644 --- a/ckan/templates/snippets/activity_item.html +++ b/ckan/templates/snippets/activity_item.html @@ -5,6 +5,6 @@

{{ h.literal(activity.msg.format(**activity.data)) }} - {{ h.time_ago_in_words_from_str(activity.timestamp, 'hour') }} ago + {{ h.time_ago_in_words_from_str(activity.timestamp, 'hour') }} ago

\ No newline at end of file From 486a09b1e56bbd7310be59ebd5d45e016dfb322c Mon Sep 17 00:00:00 2001 From: tobes Date: Thu, 18 Jul 2013 16:55:42 +0100 Subject: [PATCH 22/88] [#518] formatter now give x months ago etc --- ckan/lib/formatters.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/ckan/lib/formatters.py b/ckan/lib/formatters.py index 193580aa28f..dde93325f4e 100644 --- a/ckan/lib/formatters.py +++ b/ckan/lib/formatters.py @@ -80,6 +80,22 @@ def localised_nice_date(datetime_, show_date=False, with_hours=False): :rtype: sting ''' + + def months_between(date1, date2): + if date1 > date2: + date1, date2 = date2, date1 + m1 = date1.year * 12 + date1.month + m2 = date2.year * 12 + date2.month + months = m2 - m1 + if date1.day > date2.day: + months -= 1 + elif date1.day == date2.day: + seconds1 = date1.hour * 3600 + date1.minute + date1.second + seconds2 = date2.hour * 3600 + date2.minute + date2.second + if seconds1 > seconds2: + months -= 1 + return months + if not show_date: now = datetime.datetime.now() date_diff = now - datetime_ @@ -98,9 +114,16 @@ def localised_nice_date(datetime_, show_date=False, with_hours=False): return ungettext('{hours} hour ago', '{hours} hours ago', seconds / 3600).format(hours=seconds / 3600) # more than one day - if days < 31: + months = months_between(datetime_, now) + + if months < 1: return ungettext('{days} day ago', '{days} days ago', days).format(days=days) + if months < 13: + return ungettext('{months} month ago', '{months} months ago', + months).format(months=months) + return ungettext('over {years} year ago', 'over {years} years ago', + months / 12).format(years=months / 12) # actual date details = { 'min': datetime_.minute, From 194be7c9750feff5cc442feba8aeb0cfee6e4bd3 Mon Sep 17 00:00:00 2001 From: tobes Date: Thu, 18 Jul 2013 16:56:28 +0100 Subject: [PATCH 23/88] [#518] Refactor render_datetime --- ckan/lib/helpers.py | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/ckan/lib/helpers.py b/ckan/lib/helpers.py index 250fbf67e73..c5af21f767d 100644 --- a/ckan/lib/helpers.py +++ b/ckan/lib/helpers.py @@ -48,6 +48,28 @@ log = logging.getLogger(__name__) +def _datestamp_to_datetime(datetime_): + ''' Converts a datestamp to a datetime. If a datetime is provided it + just gets returned. + + :param datetime_: the timestamp + :type datetime_: string or datetime + + :rtype: datetime + ''' + if isinstance(datetime_, basestring): + try: + datetime_ = date_str_to_datetime(datetime_) + except TypeError: + return None + except ValueError: + return None + # check we are now a datetime + if not isinstance(datetime_, datetime.datetime): + return None + return datetime_ + + def redirect_to(*args, **kw): '''A routes.redirect_to wrapper to retain the i18n settings''' kw['__ckan_no_root'] = True @@ -793,15 +815,8 @@ def render_datetime(datetime_, date_format=None, with_hours=False): :rtype: string ''' - if isinstance(datetime_, basestring): - try: - datetime_ = date_str_to_datetime(datetime_) - except TypeError: - return '' - except ValueError: - return '' - # check we are now a datetime - if not isinstance(datetime_, datetime.datetime): + datetime_ = _datestamp_to_datetime(datetime_) + if not datetime_: return '' # if date_format was supplied we use it if date_format: From 003a2119010130b3fff2cffca97cb1d738ebd3e0 Mon Sep 17 00:00:00 2001 From: tobes Date: Thu, 18 Jul 2013 16:57:02 +0100 Subject: [PATCH 24/88] [#518] Add time_ago_from_datestamp() helper --- ckan/lib/helpers.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/ckan/lib/helpers.py b/ckan/lib/helpers.py index c5af21f767d..8453a131adc 100644 --- a/ckan/lib/helpers.py +++ b/ckan/lib/helpers.py @@ -934,6 +934,21 @@ def time_ago_in_words_from_str(date_str, granularity='month'): return _('Unknown') +def time_ago_from_timestamp(timestamp): + ''' Returns a string like `5 months ago` for a datetime relative to now + :param timestamp: the timestamp or datetime + :type timestamp: string or datetime + + :rtype: string + ''' + datetime_ = _datestamp_to_datetime(timestamp) + if not datetime_: + return _('Unknown') + + # the localised date + return formatters.localised_nice_date(datetime_, show_date=False) + + def button_attr(enable, type='primary'): if enable: return 'class="btn %s"' % type @@ -1696,6 +1711,7 @@ def new_activities(): 'localised_filesize', 'list_dict_filter', 'new_activities', + 'time_ago_from_timestamp', # imported into ckan.lib.helpers 'literal', 'link_to', From 7792e2e6de61d9398f202d68de37723143bca7f2 Mon Sep 17 00:00:00 2001 From: tobes Date: Thu, 18 Jul 2013 16:58:00 +0100 Subject: [PATCH 25/88] [#518] Deprecate time_ago_in_words_from_str() helper --- ckan/lib/helpers.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ckan/lib/helpers.py b/ckan/lib/helpers.py index 8453a131adc..3403ce7e58c 100644 --- a/ckan/lib/helpers.py +++ b/ckan/lib/helpers.py @@ -925,8 +925,11 @@ def dst(self, dt): def tzname(self, dt): return None - +@maintain.deprecated('h.time_ago_in_words_from_str is deprecated in 2.2 ' + 'and will be removed. Please use ' + 'h.time_ago_from_timestamp instead') def time_ago_in_words_from_str(date_str, granularity='month'): + '''Deprecated in 2.2 use time_ago_from_timestamp''' if date_str: return date.time_ago_in_words(date_str_to_datetime(date_str), granularity=granularity) From b67615d423142b5f7e45b677ad3811a8d668b257 Mon Sep 17 00:00:00 2001 From: tobes Date: Thu, 18 Jul 2013 16:58:25 +0100 Subject: [PATCH 26/88] [#518] Fixes for activity stream item template --- ckan/templates/snippets/activity_item.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ckan/templates/snippets/activity_item.html b/ckan/templates/snippets/activity_item.html index b90d8621918..e2bf699a7f4 100644 --- a/ckan/templates/snippets/activity_item.html +++ b/ckan/templates/snippets/activity_item.html @@ -5,6 +5,6 @@

{{ h.literal(activity.msg.format(**activity.data)) }} - {{ h.time_ago_in_words_from_str(activity.timestamp, 'hour') }} ago + {{ h.time_ago_from_timestamp(activity.timestamp) }}

- \ No newline at end of file + From 14458a9823fb03ec5e3eeca70040f09cad0fb090 Mon Sep 17 00:00:00 2001 From: Vitor Baptista Date: Mon, 29 Jul 2013 17:51:13 -0300 Subject: [PATCH 27/88] [#1141] Fix bug where we weren't raising Exception when we couldn't find plugin --- ckan/plugins/core.py | 16 +++++++--------- ckan/tests/test_plugins.py | 4 ++++ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/ckan/plugins/core.py b/ckan/plugins/core.py index 62290d38503..8f3752117ea 100644 --- a/ckan/plugins/core.py +++ b/ckan/plugins/core.py @@ -237,15 +237,13 @@ def _get_service(plugin_name): if isinstance(plugin_name, basestring): for group in GROUPS: - try: - (plugin,) = iter_entry_points( - group=group, - name=plugin_name - ) + iterator = iter_entry_points( + group=group, + name=plugin_name + ) + plugin = next(iterator, None) + if plugin: return plugin.load()(name=plugin_name) - except ValueError: - pass - else: - raise PluginNotFoundException(plugin_name) + raise PluginNotFoundException(plugin_name) else: raise TypeError('Expected a plugin name', plugin_name) diff --git a/ckan/tests/test_plugins.py b/ckan/tests/test_plugins.py index b18810cf213..f8a5d9929b0 100644 --- a/ckan/tests/test_plugins.py +++ b/ckan/tests/test_plugins.py @@ -164,3 +164,7 @@ def test_auth_plugin_override(self): with plugins.use_plugin('auth_plugin'): assert new_authz.is_authorized('package_list', {}) != package_list_original assert new_authz.is_authorized('package_list', {}) == package_list_original + + @raises(plugins.PluginNotFoundException) + def test_inexistent_plugin_loading(self): + plugins.load('inexistent-plugin') From ad172b52de7116f95e226e4073de105a9e988ef5 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Wed, 31 Jul 2013 15:57:12 +0200 Subject: [PATCH 28/88] [#981] Add auth function to datapusher command --- ckanext/datastore/logic/action.py | 3 ++- ckanext/datastore/logic/auth.py | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/ckanext/datastore/logic/action.py b/ckanext/datastore/logic/action.py index a29dbb3128b..9a27f37f396 100644 --- a/ckanext/datastore/logic/action.py +++ b/ckanext/datastore/logic/action.py @@ -422,7 +422,8 @@ def datapusher_submit(context, data_dict): to the :ref:`datastore dump ` URL after the data has been imported. :type set_url_to_dump: boolean ''' - pass + + p.toolkit.check_access('datapusher_submit', context, data_dict) def _resource_exists(context, data_dict): diff --git a/ckanext/datastore/logic/auth.py b/ckanext/datastore/logic/auth.py index 641eb5cdddf..4d044755035 100644 --- a/ckanext/datastore/logic/auth.py +++ b/ckanext/datastore/logic/auth.py @@ -34,5 +34,9 @@ def datastore_search(context, data_dict): return _datastore_auth(context, data_dict, 'resource_show') +def datapusher_submit(context, data_dict): + return _datastore_auth(context, data_dict) + + def datastore_change_permissions(context, data_dict): return _datastore_auth(context, data_dict) From ecd9240745ab16a9c2e95be19f7e8be9092edbf3 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Wed, 31 Jul 2013 17:54:57 +0200 Subject: [PATCH 29/88] [#981] Use resource instead of package_id --- ckanext/datastore/logic/action.py | 54 +++++++++++++++++--------- ckanext/datastore/logic/schema.py | 1 - ckanext/datastore/tests/test_create.py | 6 +-- 3 files changed, 38 insertions(+), 23 deletions(-) diff --git a/ckanext/datastore/logic/action.py b/ckanext/datastore/logic/action.py index a1aff2b4e1b..5db32eedca1 100644 --- a/ckanext/datastore/logic/action.py +++ b/ckanext/datastore/logic/action.py @@ -26,16 +26,20 @@ def datastore_create(context, data_dict): times to initially insert more data, add fields, change the aliases or indexes as well as the primary keys. - To create a datastore resource and a CKAN resource at the same time, - provide a valid ``package_id`` and omit the ``resource_id``. + To create an empty datastore resource and a CKAN resource at the same time, + provide ``resource`` with a valid ``package_id`` and omit the ``resource_id``. + + If you want to create a datastore resource from the content of a file, + provide ``resource`` with a valid ``url``. See :ref:`fields` and :ref:`records` for details on how to lay out records. :param resource_id: resource id that the data is going to be stored against. :type resource_id: string - :param package_id: package in which a new resource will be crated. - use instead of ``resource_id``. - :type package_id: string + :param resource: resource dictionary that is passed to + :meth:`~ckan.logic.action.create.resource_create`. + Use instead of ``resource_id`` (optional) + :type resource: dictionary :param aliases: names for read only aliases of the resource. (optional) :type aliases: list or comma separated string :param fields: fields/columns and their extra metadata. (optional) @@ -60,36 +64,48 @@ def datastore_create(context, data_dict): ''' schema = context.get('schema', dsschema.datastore_create_schema()) records = data_dict.pop('records', None) + resource = data_dict.pop('resource', None) data_dict, errors = _validate(data_dict, schema, context) if records: data_dict['records'] = records + if resource: + data_dict['resource'] = resource if errors: raise p.toolkit.ValidationError(errors) p.toolkit.check_access('datastore_create', context, data_dict) - if 'package_id' in data_dict and 'resource_id' in data_dict: + if 'resource' in data_dict and 'resource_id' in data_dict: raise p.toolkit.ValidationError({ - 'resource_id': ['package_id cannot be used with resource_id'] + 'resource': ['resource cannot be used with resource_id'] }) - if not 'package_id' in data_dict and not 'resource_id' in data_dict: + if not 'resource' in data_dict and not 'resource_id' in data_dict: raise p.toolkit.ValidationError({ - 'resource_id': ['resource_id or package_id required'] + 'resource_id': ['resource_id or resource required'] }) - # create ckan resource if package_id was provided - if 'package_id' in data_dict: - res = p.toolkit.get_action('resource_create')(context, { - 'package_id': data_dict['package_id'], - 'url': '_tmp' - }) - res['url'] = h.url_for( - controller='ckanext.datastore.controller:DatastoreController', - action='dump', resource_id=res['id']) - p.toolkit.get_action('resource_update')(context, res) + if 'resource' in data_dict: + has_url = 'url' in data_dict['resource'] + data_dict['resource'].setdefault('url', '_tmp') + res = p.toolkit.get_action('resource_create')(context, + data_dict['resource']) data_dict['resource_id'] = res['id'] + # create resource from file + if has_url: + p.toolkit.get_action('datapusher_submit')(context, { + 'resource_id': res['id'], + 'set_url_to_dump': True + }) + # create empty resource + # have to set the url here because we need the resource id + else: + res['url'] = h.url_for( + controller='ckanext.datastore.controller:DatastoreController', + action='dump', resource_id=res['id']) + p.toolkit.get_action('resource_update')(context, res) + data_dict['connection_url'] = pylons.config['ckan.datastore.write_url'] # validate aliases diff --git a/ckanext/datastore/logic/schema.py b/ckanext/datastore/logic/schema.py index 824f7cbf1fd..018eb249d79 100644 --- a/ckanext/datastore/logic/schema.py +++ b/ckanext/datastore/logic/schema.py @@ -68,7 +68,6 @@ def json_validator(value, context): def datastore_create_schema(): schema = { 'resource_id': [ignore_missing, unicode, resource_id_exists], - 'package_id': [ignore_missing, unicode, package_id_exists], 'id': [ignore_missing], 'aliases': [ignore_missing, list_of_strings_or_string], 'fields': { diff --git a/ckanext/datastore/tests/test_create.py b/ckanext/datastore/tests/test_create.py index 9387183acbd..93dda0f6cfc 100644 --- a/ckanext/datastore/tests/test_create.py +++ b/ckanext/datastore/tests/test_create.py @@ -524,7 +524,7 @@ def test_create_basic(self): def test_create_ckan_resource_in_package(self): package = model.Package.get('annakarenina') data = { - 'package_id': package.id + 'resource': {'package_id': package.id} } postparams = '%s=1' % json.dumps(data) auth = {'Authorization': str(self.sysadmin_user.apikey)} @@ -540,12 +540,12 @@ def test_create_ckan_resource_in_package(self): # disabled until #547 fixes problems with the plugins in tests #assert res['url'] == '/datastore/dump/' + res['id'], res - def test_cant_provide_resource_and_package_id(self): + def test_cant_provide_resource_and_resource_id(self): package = model.Package.get('annakarenina') resource = package.resources[0] data = { 'resource_id': resource.id, - 'package_id': package.id + 'resource': {'package_id': package.id} } postparams = '%s=1' % json.dumps(data) auth = {'Authorization': str(self.sysadmin_user.apikey)} From ade27e4f20cc8881f26cc9492ebfa731fe049cc3 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Wed, 31 Jul 2013 18:49:21 +0200 Subject: [PATCH 30/88] [#981] Require latest httpretty --- dev-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 41611c81aec..055eb2669c5 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,7 +1,7 @@ # These are packages that required when running ckan tests -e git+https://github.com/okfn/ckanclient@a315a72eef74dda4831acd022ef84a1246803c73#egg=ckanclient-dev docutils==0.8.1 -httpretty==0.6.0 +httpretty==0.6.2 nose==1.2.1 pep8==1.4.5 pip-tools==0.3.1 From ad19d847702fe1268b7449166f06f84e9ef24ae6 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Wed, 31 Jul 2013 18:49:52 +0200 Subject: [PATCH 31/88] [#981] Test whether we properly call the datapusher --- ckanext/datastore/logic/action.py | 7 ++++++- ckanext/datastore/tests/test_create.py | 20 ++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/ckanext/datastore/logic/action.py b/ckanext/datastore/logic/action.py index 5db32eedca1..495ffeadb1a 100644 --- a/ckanext/datastore/logic/action.py +++ b/ckanext/datastore/logic/action.py @@ -1,6 +1,8 @@ import logging -import pylons +import json +import pylons +import requests import sqlalchemy import ckan.lib.navl.dictization_functions @@ -441,6 +443,9 @@ def datapusher_submit(context, data_dict): ''' p.toolkit.check_access('datapusher_submit', context, data_dict) + requests.post('http://datapusher.ckan.org/job', data=json.dumps({ + 'foo': 'bar' + })) def _resource_exists(context, data_dict): diff --git a/ckanext/datastore/tests/test_create.py b/ckanext/datastore/tests/test_create.py index 93dda0f6cfc..a1d3c7f90e6 100644 --- a/ckanext/datastore/tests/test_create.py +++ b/ckanext/datastore/tests/test_create.py @@ -1,4 +1,5 @@ import json +import httpretty import nose from nose.tools import assert_equal @@ -540,6 +541,25 @@ def test_create_ckan_resource_in_package(self): # disabled until #547 fixes problems with the plugins in tests #assert res['url'] == '/datastore/dump/' + res['id'], res + @httpretty.activate + def test_providing_res_with_url_calls_datapusher_correctly(self): + httpretty.HTTPretty.register_uri(httpretty.HTTPretty.POST, + 'http://datapusher.ckan.org/job', + content_type='application/json', + body=json.dumps({'job_id': 'foo', 'job_key': 'bar'})) + + package = model.Package.get('annakarenina') + data = { + 'resource': {'package_id': package.id, 'url': 'demo.ckan.org'} + } + postparams = '%s=1' % json.dumps(data) + auth = {'Authorization': str(self.sysadmin_user.apikey)} + self.app.post('/api/action/datastore_create', params=postparams, + extra_environ=auth, status=200) + + body = json.loads(httpretty.last_request().body) + assert body == {'foo': 'bar'}, body + def test_cant_provide_resource_and_resource_id(self): package = model.Package.get('annakarenina') resource = package.resources[0] From 10efe1c43062db8c102bb6c0d21baff3abaacf20 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Thu, 1 Aug 2013 09:42:05 +0200 Subject: [PATCH 32/88] [#981] Call datapusher according to API --- ckanext/datastore/logic/action.py | 18 ++++++++++++++++-- ckanext/datastore/tests/test_create.py | 9 ++++++--- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/ckanext/datastore/logic/action.py b/ckanext/datastore/logic/action.py index 495ffeadb1a..64c6758be35 100644 --- a/ckanext/datastore/logic/action.py +++ b/ckanext/datastore/logic/action.py @@ -1,5 +1,6 @@ import logging import json +import urlparse import pylons import requests @@ -442,9 +443,22 @@ def datapusher_submit(context, data_dict): :type set_url_to_dump: boolean ''' + # ToDo: add task + # ToDo: handle set_url_to_dump + p.toolkit.check_access('datapusher_submit', context, data_dict) - requests.post('http://datapusher.ckan.org/job', data=json.dumps({ - 'foo': 'bar' + + datapusher_url = pylons.config.get( + 'datapusher.url', 'http://datapusher.ckan.org/') + + user = p.toolkit.get_action('user_show')(context, {'id': context['user']}) + requests.post(urlparse.urljoin(datapusher_url, 'job'), data=json.dumps({ + 'api_key': user['apikey'], + 'job_type': 'push_to_datastore', + 'metadata': { + 'ckan_url': pylons.config['ckan.site_url'], + 'resource_id': data_dict['resource_id'] + } })) diff --git a/ckanext/datastore/tests/test_create.py b/ckanext/datastore/tests/test_create.py index a1d3c7f90e6..6d0b819c62c 100644 --- a/ckanext/datastore/tests/test_create.py +++ b/ckanext/datastore/tests/test_create.py @@ -543,7 +543,8 @@ def test_create_ckan_resource_in_package(self): @httpretty.activate def test_providing_res_with_url_calls_datapusher_correctly(self): - httpretty.HTTPretty.register_uri(httpretty.HTTPretty.POST, + httpretty.HTTPretty.register_uri( + httpretty.HTTPretty.POST, 'http://datapusher.ckan.org/job', content_type='application/json', body=json.dumps({'job_id': 'foo', 'job_key': 'bar'})) @@ -557,8 +558,10 @@ def test_providing_res_with_url_calls_datapusher_correctly(self): self.app.post('/api/action/datastore_create', params=postparams, extra_environ=auth, status=200) - body = json.loads(httpretty.last_request().body) - assert body == {'foo': 'bar'}, body + assert len(package.resources) == 4, len(package.resources) + resource = package.resources[3] + data = json.loads(httpretty.last_request().body) + assert data['metadata']['resource_id'] == resource.id, data def test_cant_provide_resource_and_resource_id(self): package = model.Package.get('annakarenina') From cd23a461c95cd80d1b47aa5b4fc426e8e01065ef Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Thu, 1 Aug 2013 11:34:48 +0200 Subject: [PATCH 33/88] [#981] Add hack to avoid hanging tests in python 2.6 --- ckanext/datastore/tests/test_create.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ckanext/datastore/tests/test_create.py b/ckanext/datastore/tests/test_create.py index 6d0b819c62c..ad0b0dabafe 100644 --- a/ckanext/datastore/tests/test_create.py +++ b/ckanext/datastore/tests/test_create.py @@ -1,6 +1,7 @@ import json import httpretty import nose +import sys from nose.tools import assert_equal import pylons @@ -15,6 +16,12 @@ from ckanext.datastore.tests.helpers import rebuild_all_dbs +# avoid hanging tests https://github.com/gabrielfalcao/HTTPretty/issues/34 +if sys.version_info < (2, 7, 0): + import socket + socket.setdefaulttimeout(1) + + class TestDatastoreCreate(tests.WsgiAppCase): sysadmin_user = None normal_user = None From 955e42276e71ab072c27c2fc5cbf3e4c9aee1194 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Thu, 1 Aug 2013 12:08:12 +0200 Subject: [PATCH 34/88] [#981] Fix test to use ckan app by @tobes --- ckanext/datastore/logic/action.py | 3 +-- ckanext/datastore/tests/test_create.py | 8 +++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/ckanext/datastore/logic/action.py b/ckanext/datastore/logic/action.py index 64c6758be35..010f805369c 100644 --- a/ckanext/datastore/logic/action.py +++ b/ckanext/datastore/logic/action.py @@ -9,7 +9,6 @@ import ckan.lib.navl.dictization_functions import ckan.logic as logic import ckan.plugins as p -import ckan.lib.helpers as h import ckanext.datastore.db as db import ckanext.datastore.logic.schema as dsschema @@ -104,7 +103,7 @@ def datastore_create(context, data_dict): # create empty resource # have to set the url here because we need the resource id else: - res['url'] = h.url_for( + res['url'] = p.toolkit.url_for( controller='ckanext.datastore.controller:DatastoreController', action='dump', resource_id=res['id']) p.toolkit.get_action('resource_update')(context, res) diff --git a/ckanext/datastore/tests/test_create.py b/ckanext/datastore/tests/test_create.py index ad0b0dabafe..b34624892b3 100644 --- a/ckanext/datastore/tests/test_create.py +++ b/ckanext/datastore/tests/test_create.py @@ -5,12 +5,15 @@ from nose.tools import assert_equal import pylons +from pylons import config import sqlalchemy.orm as orm +import paste.fixture import ckan.plugins as p import ckan.lib.create_test_data as ctd import ckan.model as model import ckan.tests as tests +from ckan.config.middleware import make_app import ckanext.datastore.db as db from ckanext.datastore.tests.helpers import rebuild_all_dbs @@ -28,6 +31,9 @@ class TestDatastoreCreate(tests.WsgiAppCase): @classmethod def setup_class(cls): + + wsgiapp = make_app(config['global_conf'], **config) + cls.app = paste.fixture.TestApp(wsgiapp) if not tests.is_datastore_supported(): raise nose.SkipTest("Datastore not supported") p.load('datastore') @@ -546,7 +552,7 @@ def test_create_ckan_resource_in_package(self): res = tests.call_action_api( self.app, 'resource_show', id=res_dict['result']['resource_id']) # disabled until #547 fixes problems with the plugins in tests - #assert res['url'] == '/datastore/dump/' + res['id'], res + assert res['url'] == '/datastore/dump/' + res['id'], res @httpretty.activate def test_providing_res_with_url_calls_datapusher_correctly(self): From 9d132f4481afd238efc28019b45e481cb7607e8b Mon Sep 17 00:00:00 2001 From: John Martin Date: Thu, 1 Aug 2013 14:46:33 +0100 Subject: [PATCH 35/88] [#1095] Changed 'title' to 'name' in group and org form templates --- ckan/templates/group/snippets/group_form.html | 2 +- ckan/templates/organization/snippets/organization_form.html | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ckan/templates/group/snippets/group_form.html b/ckan/templates/group/snippets/group_form.html index f8b45f2f3d9..6d4d3216b26 100644 --- a/ckan/templates/group/snippets/group_form.html +++ b/ckan/templates/group/snippets/group_form.html @@ -7,7 +7,7 @@ {% block basic_fields %} {% set attrs = {'data-module': 'slug-preview-target'} %} - {{ form.input('title', label=_('Title'), id='field-title', placeholder=_('My Group'), value=data.title, error=errors.title, classes=['control-full'], attrs=attrs) }} + {{ form.input('title', label=_('Name'), id='field-name', placeholder=_('My Group'), value=data.title, error=errors.title, classes=['control-full'], attrs=attrs) }} {# Perhaps these should be moved into the controller? #} {% set prefix = h.url_for(controller='group', action='read', id='') %} diff --git a/ckan/templates/organization/snippets/organization_form.html b/ckan/templates/organization/snippets/organization_form.html index 70eb35de227..7b05e7d1542 100644 --- a/ckan/templates/organization/snippets/organization_form.html +++ b/ckan/templates/organization/snippets/organization_form.html @@ -7,7 +7,7 @@ {% block basic_fields %} {% set attrs = {'data-module': 'slug-preview-target'} %} - {{ form.input('title', label=_('Title'), id='field-title', placeholder=_('My Organization'), value=data.title, error=errors.title, classes=['control-full'], attrs=attrs) }} + {{ form.input('title', label=_('Name'), id='field-name', placeholder=_('My Organization'), value=data.title, error=errors.title, classes=['control-full'], attrs=attrs) }} {# Perhaps these should be moved into the controller? #} {% set prefix = h.url_for(controller='organization', action='read', id='') %} From c5ee465fdcb058b07c2b277de639eb811dbcd2a3 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Mon, 5 Aug 2013 10:53:19 +0200 Subject: [PATCH 36/88] [#981] Change import, tab -> spaces --- ckanext/datastore/plugin.py | 2 +- ckanext/datastore/tests/test_create.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ckanext/datastore/plugin.py b/ckanext/datastore/plugin.py index 8dc03571956..e9c1d9781d4 100644 --- a/ckanext/datastore/plugin.py +++ b/ckanext/datastore/plugin.py @@ -224,7 +224,7 @@ def get_actions(self): 'datastore_upsert': action.datastore_upsert, 'datastore_delete': action.datastore_delete, 'datastore_search': action.datastore_search, - 'datapusher_submit': action.datapusher_submit, + 'datapusher_submit': action.datapusher_submit, 'resource_show': self.resource_show_action, } if not self.legacy_mode: diff --git a/ckanext/datastore/tests/test_create.py b/ckanext/datastore/tests/test_create.py index b34624892b3..157d8a0633e 100644 --- a/ckanext/datastore/tests/test_create.py +++ b/ckanext/datastore/tests/test_create.py @@ -13,7 +13,7 @@ import ckan.lib.create_test_data as ctd import ckan.model as model import ckan.tests as tests -from ckan.config.middleware import make_app +import ckan.config.middleware as middleware import ckanext.datastore.db as db from ckanext.datastore.tests.helpers import rebuild_all_dbs @@ -32,7 +32,7 @@ class TestDatastoreCreate(tests.WsgiAppCase): @classmethod def setup_class(cls): - wsgiapp = make_app(config['global_conf'], **config) + wsgiapp = middleware.make_app(config['global_conf'], **config) cls.app = paste.fixture.TestApp(wsgiapp) if not tests.is_datastore_supported(): raise nose.SkipTest("Datastore not supported") From 1b20a14e0a7c3a2ef7f3ec78157b20588eb194f7 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Wed, 7 Aug 2013 13:34:47 +0200 Subject: [PATCH 37/88] [#938] Save the status of the datapusher task --- ckanext/datastore/logic/action.py | 17 +++++++++++++++- ckanext/datastore/tests/test_create.py | 28 ++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/ckanext/datastore/logic/action.py b/ckanext/datastore/logic/action.py index 010f805369c..4588739ff7c 100644 --- a/ckanext/datastore/logic/action.py +++ b/ckanext/datastore/logic/action.py @@ -1,6 +1,7 @@ import logging import json import urlparse +import datetime import pylons import requests @@ -442,6 +443,10 @@ def datapusher_submit(context, data_dict): :type set_url_to_dump: boolean ''' + if 'id' in data_dict: + data_dict['resource_id'] = data_dict['id'] + res_id = _get_or_bust(data_dict, 'resource_id') + # ToDo: add task # ToDo: handle set_url_to_dump @@ -456,10 +461,20 @@ def datapusher_submit(context, data_dict): 'job_type': 'push_to_datastore', 'metadata': { 'ckan_url': pylons.config['ckan.site_url'], - 'resource_id': data_dict['resource_id'] + 'resource_id': res_id } })) + p.toolkit.get_action('task_status_update')(context, { + 'entity_id': res_id, + 'entity_type': 'resource', + 'task_type': 'datapusher', + 'key': 'datapusher', + 'value': datapusher_url, + 'last_updated': str(datetime.datetime.now()), + 'state': 'pending', + }) + def _resource_exists(context, data_dict): # Returns true if the resource exists in CKAN and in the datastore diff --git a/ckanext/datastore/tests/test_create.py b/ckanext/datastore/tests/test_create.py index 157d8a0633e..6e7bf9e65c9 100644 --- a/ckanext/datastore/tests/test_create.py +++ b/ckanext/datastore/tests/test_create.py @@ -591,6 +591,34 @@ def test_cant_provide_resource_and_resource_id(self): assert res_dict['error']['__type'] == 'Validation Error' + @httpretty.activate + def test_send_datapusher_creates_task(self): + httpretty.HTTPretty.register_uri( + httpretty.HTTPretty.POST, + 'http://datapusher.ckan.org/job', + content_type='application/json', + body=json.dumps({'job_id': 'foo', 'job_key': 'bar'})) + + package = model.Package.get('annakarenina') + resource = package.resources[0] + + context = { + 'ignore_auth': True, + 'user': self.sysadmin_user.name + } + + p.toolkit.get_action('datapusher_submit')(context, { + 'resource_id': resource.id + }) + + task = p.toolkit.get_action('task_status_show')(context, { + 'entity_id': resource.id, + 'task_type': 'datapusher', + 'key': 'datapusher' + }) + + assert task['state'] == 'pending', task + def test_guess_types(self): resource = model.Package.get('annakarenina').resources[1] From 25338eae046a22b93e2c68e238f3fc6649ab831a Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Wed, 7 Aug 2013 13:44:35 +0200 Subject: [PATCH 38/88] [#938] Don't push jobs to datapusher if datapusher url is not set --- ckanext/datastore/logic/action.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ckanext/datastore/logic/action.py b/ckanext/datastore/logic/action.py index 4588739ff7c..7d02aad482f 100644 --- a/ckanext/datastore/logic/action.py +++ b/ckanext/datastore/logic/action.py @@ -441,6 +441,11 @@ def datapusher_submit(context, data_dict): :param set_url_to_dump: If set to true, the URL of the resource will be set to the :ref:`datastore dump ` URL after the data has been imported. :type set_url_to_dump: boolean + + Returns ``True`` if the job has been submitted and ``False`` if the job + has not been submitted, i.e. when the datapusher is not configured. + + :rtype: boolean ''' if 'id' in data_dict: @@ -455,6 +460,10 @@ def datapusher_submit(context, data_dict): datapusher_url = pylons.config.get( 'datapusher.url', 'http://datapusher.ckan.org/') + # no datapusher url means the datapusher should not be used + if not datapusher_url: + return False + user = p.toolkit.get_action('user_show')(context, {'id': context['user']}) requests.post(urlparse.urljoin(datapusher_url, 'job'), data=json.dumps({ 'api_key': user['apikey'], @@ -475,6 +484,8 @@ def datapusher_submit(context, data_dict): 'state': 'pending', }) + return True + def _resource_exists(context, data_dict): # Returns true if the resource exists in CKAN and in the datastore From 41174250dd4fcffbe20a58736c9ca2a97d2d23bd Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Thu, 8 Aug 2013 13:24:36 +0200 Subject: [PATCH 39/88] [#932] Use the url_type to indicate when a resource is a pure datastore resource --- ckan/logic/action/get.py | 7 ++++++- ckan/plugins/interfaces.py | 15 +++++++++++++++ ckanext/datastore/logic/action.py | 6 ++---- ckanext/datastore/plugin.py | 11 +++++++++++ 4 files changed, 34 insertions(+), 5 deletions(-) diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 05350a0b061..cb1c8c0b373 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -794,7 +794,12 @@ def resource_show(context, data_dict): raise NotFound _check_access('resource_show', context, data_dict) - return model_dictize.resource_dictize(resource, context) + resource_dict = model_dictize.resource_dictize(resource, context) + + for item in plugins.PluginImplementations(plugins.IResourceController): + resource_dict = item.before_show(resource_dict) + + return resource_dict def resource_status_show(context, data_dict): '''Return the statuses of a resource's tasks. diff --git a/ckan/plugins/interfaces.py b/ckan/plugins/interfaces.py index 8acfc34aad6..de375b54ae2 100644 --- a/ckan/plugins/interfaces.py +++ b/ckan/plugins/interfaces.py @@ -14,6 +14,7 @@ 'IConfigurable', 'IConfigurer', 'IActions', 'IResourceUrlChange', 'IDatasetForm', 'IResourcePreview', + 'IResourceController', 'IGroupForm', 'ITagController', 'ITemplateHelpers', @@ -432,6 +433,20 @@ def before_view(self, pkg_dict): return pkg_dict +class IResourceController(Interface): + """ + Hook into the resource controller. + (see IGroupController) + """ + + def before_show(self, resource_dict): + ''' + Extensions will receive the validated data dict before the resource + is ready for display. + ''' + return resource_dict + + class IPluginObserver(Interface): """ Plugin to the plugin loading mechanism diff --git a/ckanext/datastore/logic/action.py b/ckanext/datastore/logic/action.py index 7d02aad482f..fa7c86e5ffc 100644 --- a/ckanext/datastore/logic/action.py +++ b/ckanext/datastore/logic/action.py @@ -102,11 +102,9 @@ def datastore_create(context, data_dict): 'set_url_to_dump': True }) # create empty resource - # have to set the url here because we need the resource id else: - res['url'] = p.toolkit.url_for( - controller='ckanext.datastore.controller:DatastoreController', - action='dump', resource_id=res['id']) + # no need to set the full url because it will be set in before_show + res['url_type'] = 'datastore' p.toolkit.get_action('resource_update')(context, res) data_dict['connection_url'] = pylons.config['ckan.datastore.write_url'] diff --git a/ckanext/datastore/plugin.py b/ckanext/datastore/plugin.py index e9c1d9781d4..c70c935c279 100644 --- a/ckanext/datastore/plugin.py +++ b/ckanext/datastore/plugin.py @@ -22,6 +22,7 @@ class DatastorePlugin(p.SingletonPlugin): p.implements(p.IAuthFunctions) p.implements(p.IDomainObjectModification, inherit=True) p.implements(p.IRoutes, inherit=True) + p.implements(p.IResourceController, inherit=True) legacy_mode = False resource_show_action = None @@ -246,3 +247,13 @@ def before_map(self, m): controller='ckanext.datastore.controller:DatastoreController', action='dump') return m + + def before_show(self, resource_dict): + ''' Modify the resource url of datastore resources so that + they link to the datastore dumps. + ''' + if resource_dict['url_type'] == 'datastore': + resource_dict['url'] = p.toolkit.url_for( + controller='ckanext.datastore.controller:DatastoreController', + action='dump', resource_id=resource_dict['id']) + return resource_dict From af012e5352c0fa7cbe7b1b71454242d7376de9fa Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Thu, 8 Aug 2013 17:04:32 +0200 Subject: [PATCH 40/88] [#981] Declare auth function --- ckanext/datastore/plugin.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ckanext/datastore/plugin.py b/ckanext/datastore/plugin.py index c70c935c279..452347a5fd7 100644 --- a/ckanext/datastore/plugin.py +++ b/ckanext/datastore/plugin.py @@ -240,7 +240,8 @@ def get_auth_functions(self): 'datastore_upsert': auth.datastore_upsert, 'datastore_delete': auth.datastore_delete, 'datastore_search': auth.datastore_search, - 'datastore_change_permissions': auth.datastore_change_permissions} + 'datastore_change_permissions': auth.datastore_change_permissions, + 'datapusher_submit': auth.datapusher_submit} def before_map(self, m): m.connect('/datastore/dump/{resource_id}', From 80496913d4cddaeef940d749ee51f1afa04276a5 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Thu, 8 Aug 2013 17:05:43 +0200 Subject: [PATCH 41/88] [#940] Add datapusher callback hook --- ckanext/datastore/logic/action.py | 25 ++++++++++++++++ ckanext/datastore/plugin.py | 1 + ckanext/datastore/tests/test_create.py | 40 ++++++++++++++++++++++++++ 3 files changed, 66 insertions(+) diff --git a/ckanext/datastore/logic/action.py b/ckanext/datastore/logic/action.py index fa7c86e5ffc..56384a30243 100644 --- a/ckanext/datastore/logic/action.py +++ b/ckanext/datastore/logic/action.py @@ -485,6 +485,31 @@ def datapusher_submit(context, data_dict): return True +def datapusher_hook(context, data_dict): + """ Update datapusher task. This action is typically called by the + datapusher whenever the status of a job changes. + + Expects a job with ``status``, ``metadata``. + """ + + # TODO: use a schema to validate + + p.toolkit.check_access('datapusher_submit', context, data_dict) + + res_id = data_dict['metadata']['resource_id'] + + task = p.toolkit.get_action('task_status_show')(context, { + 'entity_id': res_id, + 'task_type': 'datapusher', + 'key': 'datapusher' + }) + + task['state'] = data_dict['status'] + task['last_updated'] = str(datetime.datetime.now()) + + p.toolkit.get_action('task_status_update')(context, task) + + def _resource_exists(context, data_dict): # Returns true if the resource exists in CKAN and in the datastore model = _get_or_bust(context, 'model') diff --git a/ckanext/datastore/plugin.py b/ckanext/datastore/plugin.py index 452347a5fd7..279f59b8b6f 100644 --- a/ckanext/datastore/plugin.py +++ b/ckanext/datastore/plugin.py @@ -226,6 +226,7 @@ def get_actions(self): 'datastore_delete': action.datastore_delete, 'datastore_search': action.datastore_search, 'datapusher_submit': action.datapusher_submit, + 'datapusher_hook': action.datapusher_hook, 'resource_show': self.resource_show_action, } if not self.legacy_mode: diff --git a/ckanext/datastore/tests/test_create.py b/ckanext/datastore/tests/test_create.py index 6e7bf9e65c9..91249e3ac04 100644 --- a/ckanext/datastore/tests/test_create.py +++ b/ckanext/datastore/tests/test_create.py @@ -2,6 +2,7 @@ import httpretty import nose import sys +import datetime from nose.tools import assert_equal import pylons @@ -619,6 +620,45 @@ def test_send_datapusher_creates_task(self): assert task['state'] == 'pending', task + def test_datapusher_hook(self): + package = model.Package.get('annakarenina') + resource = package.resources[0] + + context = { + 'user': self.sysadmin_user.name + } + + p.toolkit.get_action('task_status_update')(context, { + 'entity_id': resource.id, + 'entity_type': 'resource', + 'task_type': 'datapusher', + 'key': 'datapusher', + 'value': True, + 'last_updated': str(datetime.datetime.now()), + 'state': 'pending' + }) + + data = { + 'status': 'success', + 'metadata': { + 'resource_id': resource.id + } + } + postparams = '%s=1' % json.dumps(data) + auth = {'Authorization': str(self.sysadmin_user.apikey)} + res = self.app.post('/api/action/datapusher_hook', params=postparams, + extra_environ=auth, status=200) + print res.body + res_dict = json.loads(res.body) + + assert res_dict['success'] is True + + task = tests.call_action_api( + self.app, 'task_status_show', entity_id=resource.id, + task_type='datapusher', key='datapusher') + + assert task['state'] == 'success', task + def test_guess_types(self): resource = model.Package.get('annakarenina').resources[1] From bb2e05f8b7091c0a5f7e7c97c9e98c2af7c584bd Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Thu, 8 Aug 2013 17:04:32 +0200 Subject: [PATCH 42/88] [#981] Declare auth function --- ckanext/datastore/plugin.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ckanext/datastore/plugin.py b/ckanext/datastore/plugin.py index c70c935c279..452347a5fd7 100644 --- a/ckanext/datastore/plugin.py +++ b/ckanext/datastore/plugin.py @@ -240,7 +240,8 @@ def get_auth_functions(self): 'datastore_upsert': auth.datastore_upsert, 'datastore_delete': auth.datastore_delete, 'datastore_search': auth.datastore_search, - 'datastore_change_permissions': auth.datastore_change_permissions} + 'datastore_change_permissions': auth.datastore_change_permissions, + 'datapusher_submit': auth.datapusher_submit} def before_map(self, m): m.connect('/datastore/dump/{resource_id}', From 8dde66ed7dc5ab6a9314407239384a5c92af4c4f Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Thu, 8 Aug 2013 17:07:30 +0200 Subject: [PATCH 43/88] [#981] Clean up --- ckanext/datastore/logic/action.py | 5 ++--- ckanext/datastore/tests/test_create.py | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/ckanext/datastore/logic/action.py b/ckanext/datastore/logic/action.py index fa7c86e5ffc..24095ce832f 100644 --- a/ckanext/datastore/logic/action.py +++ b/ckanext/datastore/logic/action.py @@ -450,8 +450,7 @@ def datapusher_submit(context, data_dict): data_dict['resource_id'] = data_dict['id'] res_id = _get_or_bust(data_dict, 'resource_id') - # ToDo: add task - # ToDo: handle set_url_to_dump + # TODO: handle set_url_to_dump p.toolkit.check_access('datapusher_submit', context, data_dict) @@ -479,7 +478,7 @@ def datapusher_submit(context, data_dict): 'key': 'datapusher', 'value': datapusher_url, 'last_updated': str(datetime.datetime.now()), - 'state': 'pending', + 'state': 'pending' }) return True diff --git a/ckanext/datastore/tests/test_create.py b/ckanext/datastore/tests/test_create.py index 6e7bf9e65c9..d014d0ee6aa 100644 --- a/ckanext/datastore/tests/test_create.py +++ b/ckanext/datastore/tests/test_create.py @@ -551,7 +551,6 @@ def test_create_ckan_resource_in_package(self): res = tests.call_action_api( self.app, 'resource_show', id=res_dict['result']['resource_id']) - # disabled until #547 fixes problems with the plugins in tests assert res['url'] == '/datastore/dump/' + res['id'], res @httpretty.activate From b7f21ed09dadfb27d4535926f5e358bb760ce318 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Thu, 8 Aug 2013 18:45:47 +0200 Subject: [PATCH 44/88] [#940] Set the correct datapusher callback url --- ckanext/datastore/logic/action.py | 5 +++++ ckanext/datastore/tests/test_create.py | 13 ++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/ckanext/datastore/logic/action.py b/ckanext/datastore/logic/action.py index 56384a30243..c64523b83ca 100644 --- a/ckanext/datastore/logic/action.py +++ b/ckanext/datastore/logic/action.py @@ -462,10 +462,15 @@ def datapusher_submit(context, data_dict): if not datapusher_url: return False + callback_url = p.toolkit.url_for( + controller='api', action='action', logic_function='datapusher_hook', + ver=3, qualified=True) + user = p.toolkit.get_action('user_show')(context, {'id': context['user']}) requests.post(urlparse.urljoin(datapusher_url, 'job'), data=json.dumps({ 'api_key': user['apikey'], 'job_type': 'push_to_datastore', + 'result_url': callback_url, 'metadata': { 'ckan_url': pylons.config['ckan.site_url'], 'resource_id': res_id diff --git a/ckanext/datastore/tests/test_create.py b/ckanext/datastore/tests/test_create.py index 91249e3ac04..d97ea4f24e1 100644 --- a/ckanext/datastore/tests/test_create.py +++ b/ckanext/datastore/tests/test_create.py @@ -564,18 +564,17 @@ def test_providing_res_with_url_calls_datapusher_correctly(self): body=json.dumps({'job_id': 'foo', 'job_key': 'bar'})) package = model.Package.get('annakarenina') - data = { - 'resource': {'package_id': package.id, 'url': 'demo.ckan.org'} - } - postparams = '%s=1' % json.dumps(data) - auth = {'Authorization': str(self.sysadmin_user.apikey)} - self.app.post('/api/action/datastore_create', params=postparams, - extra_environ=auth, status=200) + + tests.call_action_api( + self.app, 'datastore_create', apikey=self.sysadmin_user.apikey, + resource=dict(package_id=package.id, url='demo.ckan.org')) assert len(package.resources) == 4, len(package.resources) resource = package.resources[3] data = json.loads(httpretty.last_request().body) assert data['metadata']['resource_id'] == resource.id, data + assert data['result_url'].endswith('/action/datapusher_hook'), data + assert data['result_url'].startswith('http://'), data def test_cant_provide_resource_and_resource_id(self): package = model.Package.get('annakarenina') From 3c154dd1940dab74a1990c4d4bc5dad0f9130c2a Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Thu, 8 Aug 2013 19:41:20 +0200 Subject: [PATCH 45/88] Typo --- ckanext/datastore/logic/action.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/datastore/logic/action.py b/ckanext/datastore/logic/action.py index fc2954078f2..03fdbbd3b58 100644 --- a/ckanext/datastore/logic/action.py +++ b/ckanext/datastore/logic/action.py @@ -384,7 +384,7 @@ def datastore_make_private(context, data_dict): private or a new DataStore table is created for a CKAN resource that belongs to a private dataset. - :param resource_id: if of resource that should become private + :param resource_id: id of resource that should become private :type resource_id: string ''' if 'id' in data_dict: From 3a36088468900905f0d8616f19bd0163a6a93a6e Mon Sep 17 00:00:00 2001 From: Vitor Baptista Date: Fri, 9 Aug 2013 11:41:33 -0300 Subject: [PATCH 46/88] Move logic of getting ActivityDetails' by activity_id to its model --- ckan/logic/action/get.py | 3 +-- ckan/model/activity.py | 6 ++++++ ckan/tests/models/test_activity.py | 16 ++++++++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 ckan/tests/models/test_activity.py diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index b87e3acc795..7e5c5a07d4a 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -2124,8 +2124,7 @@ def activity_detail_list(context, data_dict): # authorized to read. model = context['model'] activity_id = _get_or_bust(data_dict, 'id') - activity_detail_objects = model.Session.query( - model.activity.ActivityDetail).filter_by(activity_id=activity_id).all() + activity_detail_objects = model.ActivityDetail.by_activity_id(activity_id) return model_dictize.activity_detail_list_dictize(activity_detail_objects, context) diff --git a/ckan/model/activity.py b/ckan/model/activity.py index 5ab31a27514..04edaf6819c 100644 --- a/ckan/model/activity.py +++ b/ckan/model/activity.py @@ -2,6 +2,7 @@ from sqlalchemy import orm, types, Column, Table, ForeignKey, desc, or_ +import ckan.model import meta import types as _types import domain_object @@ -62,6 +63,11 @@ def __init__(self, activity_id, object_id, object_type, activity_type, else: self.data = data + @classmethod + def by_activity_id(cls, activity_id): + return ckan.model.Session.query(cls) \ + .filter_by(activity_id = activity_id).all() + meta.mapper(ActivityDetail, activity_detail_table, properties = { 'activity':orm.relation ( Activity, backref=orm.backref('activity_detail')) diff --git a/ckan/tests/models/test_activity.py b/ckan/tests/models/test_activity.py new file mode 100644 index 00000000000..2aa020987fb --- /dev/null +++ b/ckan/tests/models/test_activity.py @@ -0,0 +1,16 @@ +import ckan.model as model + +Activity = model.Activity +ActivityDetail = model.ActivityDetail + + +class TestActivityDetail(object): + def test_by_activity_id(self): + activity = Activity('user-id', 'object-id', + 'revision-id', 'activity-type') + activity.save() + activity_detail = ActivityDetail(activity.id, 'object-id', + 'object-type', 'activity-type') + activity_detail.save() + activities = ActivityDetail.by_activity_id(activity.id) + assert activities == [activity_detail], activity_detail From cab2d8765bf703ee16b2abadbda8c8cfcd93ca3c Mon Sep 17 00:00:00 2001 From: joetsoi Date: Sun, 11 Aug 2013 16:06:14 +0100 Subject: [PATCH 47/88] [#1179] add offset/limit to package_list action --- ckan/logic/action/get.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index c0fe9639764..6e1955ef4a6 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -72,6 +72,12 @@ def package_list(context, data_dict): _check_access('package_list', context, data_dict) + schema = context.get('schema', logic.schema.default_pagination_schema()) + data_dict, errors = _validate(data_dict, schema, context) + if errors: + raise ValidationError(errors) + + package_revision_table = model.package_revision_table col = (package_revision_table.c.id if api == 2 else package_revision_table.c.name) @@ -79,6 +85,14 @@ def package_list(context, data_dict): query = query.where(_and_(package_revision_table.c.state=='active', package_revision_table.c.current==True)) query = query.order_by(col) + + limit = data_dict.get('limit') + if limit: + query = query.limit(limit) + + offset = data_dict.get('offset') + if offset: + query = query.offset(offset) return list(zip(*query.execute())[0]) def current_package_list_with_resources(context, data_dict): From bc18309f7234fdc1e28ffa23dea0af99c25164dd Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Mon, 12 Aug 2013 12:01:52 +0200 Subject: [PATCH 48/88] [#938] Automatically push data with datapusher --- ckanext/datastore/plugin.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/ckanext/datastore/plugin.py b/ckanext/datastore/plugin.py index 279f59b8b6f..16926df3d80 100644 --- a/ckanext/datastore/plugin.py +++ b/ckanext/datastore/plugin.py @@ -20,6 +20,7 @@ class DatastorePlugin(p.SingletonPlugin): p.implements(p.IConfigurable, inherit=True) p.implements(p.IActions) p.implements(p.IAuthFunctions) + p.implements(p.IResourceUrlChange) p.implements(p.IDomainObjectModification, inherit=True) p.implements(p.IRoutes, inherit=True) p.implements(p.IResourceController, inherit=True) @@ -95,12 +96,32 @@ def new_resource_show(context, data_dict): self.resource_show_action = new_resource_show - def notify(self, entity, operation): + def notify(self, entity, operation=None): + ''' + if not isinstance(entity, model.Resource): + return + if operation: + if operation == model.domain_object.DomainObjectOperation.new: + self._create_datastorer_task(entity) + else: + # if operation is None, resource URL has been changed, as the + # notify function in IResourceUrlChange only takes 1 parameter + self._create_datastorer_task(entity) + ''' + context = {'model': model, 'ignore_auth': True} + if isinstance(entity, model.Resource): + if (operation == model.domain_object.DomainObjectOperation.new + or not operation): + # if operation is None, resource URL has been changed, as + # the notify function in IResourceUrlChange only takes + # 1 parameter + p.toolkit.get_action('datapusher_submit')(context, { + 'resource_id': entity.id + }) if not isinstance(entity, model.Package) or self.legacy_mode: return # if a resource is new, it cannot have a datastore resource, yet if operation == model.domain_object.DomainObjectOperation.changed: - context = {'model': model, 'ignore_auth': True} if entity.private: func = p.toolkit.get_action('datastore_make_private') else: From fe0c1751dec057fd25d4a9e5f350d443266611ff Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Mon, 12 Aug 2013 12:02:57 +0200 Subject: [PATCH 49/88] [#938] Don't push private resources automatically --- ckanext/datastore/plugin.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ckanext/datastore/plugin.py b/ckanext/datastore/plugin.py index 16926df3d80..bf09c70671b 100644 --- a/ckanext/datastore/plugin.py +++ b/ckanext/datastore/plugin.py @@ -115,9 +115,13 @@ def notify(self, entity, operation=None): # if operation is None, resource URL has been changed, as # the notify function in IResourceUrlChange only takes # 1 parameter - p.toolkit.get_action('datapusher_submit')(context, { - 'resource_id': entity.id + package = p.toolkit.get_action('package_show')(context, { + 'id': entity.get_package_id() }) + if not package['private']: + p.toolkit.get_action('datapusher_submit')(context, { + 'resource_id': entity.id + }) if not isinstance(entity, model.Package) or self.legacy_mode: return # if a resource is new, it cannot have a datastore resource, yet From c67e2f3611b602167f6762bab9565eb2117902bb Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Mon, 12 Aug 2013 12:13:10 +0200 Subject: [PATCH 50/88] [#938] Only push certain formats to datapusher --- ckanext/datastore/plugin.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ckanext/datastore/plugin.py b/ckanext/datastore/plugin.py index bf09c70671b..4477ad70526 100644 --- a/ckanext/datastore/plugin.py +++ b/ckanext/datastore/plugin.py @@ -11,6 +11,8 @@ log = logging.getLogger(__name__) _get_or_bust = logic.get_or_bust +DEFAULT_FORMATS = [] + class DatastoreException(Exception): pass @@ -40,6 +42,9 @@ def configure(self, config): # datastore runs on PG prior to 9.0 (for example 8.4). self.legacy_mode = 'ckan.datastore.read_url' not in self.config + datapusher_formats = config.get('datapusher.formats', '').split() + self.datapusher_formats = datapusher_formats or DEFAULT_FORMATS + # Check whether we are running one of the paster commands which means # that we should ignore the following tests. if sys.argv[0].split('/')[-1] == 'paster' and 'datastore' in sys.argv[1:]: @@ -118,7 +123,8 @@ def notify(self, entity, operation=None): package = p.toolkit.get_action('package_show')(context, { 'id': entity.get_package_id() }) - if not package['private']: + if (not package['private'] and + entity.format in self.datapusher_formats): p.toolkit.get_action('datapusher_submit')(context, { 'resource_id': entity.id }) From 2ce606d2ad9bb6ae88d8f4c9f975e3e5af274fb8 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Mon, 12 Aug 2013 12:18:37 +0200 Subject: [PATCH 51/88] [#938] Start documentation about datapusher configuration --- ckan/config/deployment.ini_tmpl | 6 ++++++ doc/configuration.rst | 24 ++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/ckan/config/deployment.ini_tmpl b/ckan/config/deployment.ini_tmpl index 738ff22858d..97c4f9412ba 100644 --- a/ckan/config/deployment.ini_tmpl +++ b/ckan/config/deployment.ini_tmpl @@ -133,6 +133,12 @@ ckan.feeds.author_link = #ofs.aws_access_key_id = .... #ofs.aws_secret_access_key = .... +## Datapusher settings + +# Make sure you have set up the DataStore + +datapusher.formats = csv +datapusher.url = http://datapusher.ckan.org/ ## Activity Streams Settings diff --git a/doc/configuration.rst b/doc/configuration.rst index ac6081899d9..284dbbfde76 100644 --- a/doc/configuration.rst +++ b/doc/configuration.rst @@ -1062,6 +1062,30 @@ Only used with the Google storage backend. .. todo:: Expand +DataPusher Settings +------------------- + +.. _datapusher.formats: + +datapusher.formats +^^^^^^^^^^^^^^^^^^ + +Example:: + datapusher.formats = csv xls xlsx + +.. todo:: Expand + +.. _datapusher.url + +datapusher.url +^^^^^^^^^^^^^^ + +Example:: + datapusher.url = http://datapusher.ckan.org/ + +.. todo:: Expand + + Activity Streams Settings ------------------------- From 8e9238a7f3479e1903cadc3b5769783db6804a96 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Mon, 12 Aug 2013 13:43:18 +0200 Subject: [PATCH 52/88] [#938] Raise exception when posting to the datapusher fails --- ckanext/datastore/logic/action.py | 39 ++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/ckanext/datastore/logic/action.py b/ckanext/datastore/logic/action.py index 03fdbbd3b58..6021c4da700 100644 --- a/ckanext/datastore/logic/action.py +++ b/ckanext/datastore/logic/action.py @@ -466,15 +466,36 @@ def datapusher_submit(context, data_dict): ver=3, qualified=True) user = p.toolkit.get_action('user_show')(context, {'id': context['user']}) - requests.post(urlparse.urljoin(datapusher_url, 'job'), data=json.dumps({ - 'api_key': user['apikey'], - 'job_type': 'push_to_datastore', - 'result_url': callback_url, - 'metadata': { - 'ckan_url': pylons.config['ckan.site_url'], - 'resource_id': res_id - } - })) + try: + r = requests.post( + urlparse.urljoin(datapusher_url, 'job'), + headers={ + #'Content-Type': 'application/json' + }, + data=json.dumps({ + 'api_key': user['apikey'], + 'job_type': 'push_to_datastore', + 'result_url': callback_url, + 'metadata': { + 'ckan_url': pylons.config['ckan.site_url'], + 'resource_id': res_id + } + })) + r.raise_for_status() + except requests.exceptions.ConnectionError, e: + raise p.toolkit.ValidationError({'datapusher': { + 'message': 'Could not connect to DataPusher.', + 'details': str(e)}}) + except requests.exceptions.HTTPError, e: + m = 'An Error occurred while sending the job: {0}'.format(e.message) + try: + body = e.response.json() + except ValueError: + body = e.response.text + raise p.toolkit.ValidationError({'datapusher': { + 'message': m, + 'details': body, + 'status_code': r.status_code}}) p.toolkit.get_action('task_status_update')(context, { 'entity_id': res_id, From da177c2b5f3f286776e06fe72241cd985985045d Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Tue, 13 Aug 2013 12:04:13 +0200 Subject: [PATCH 53/88] [#981] Add before_show from IResourceController interface to package_show as suggested by @kindly --- ckan/logic/action/get.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index fd03e7230d8..38695644df3 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -760,6 +760,10 @@ def package_show(context, data_dict): for item in plugins.PluginImplementations(plugins.IPackageController): item.read(pkg) + for resource_dict in package_dict['resources']: + for item in plugins.PluginImplementations(plugins.IResourceController): + resource_dict = item.before_show(resource_dict) + package_plugin = lib_plugins.lookup_package_plugin(package_dict['type']) if 'schema' in context: schema = context['schema'] From b2387881834dbd2780420b6c4979a7c041511f83 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Tue, 13 Aug 2013 12:41:25 +0200 Subject: [PATCH 54/88] [#1179] Add test for limit in package_list --- ckan/tests/logic/test_action.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/ckan/tests/logic/test_action.py b/ckan/tests/logic/test_action.py index 73a3b838f0c..860eb6245e7 100644 --- a/ckan/tests/logic/test_action.py +++ b/ckan/tests/logic/test_action.py @@ -60,8 +60,8 @@ def _add_basic_package(self, package_name=u'test_package', **kwargs): return json.loads(res.body)['result'] def test_01_package_list(self): - postparams = '%s=1' % json.dumps({}) - res = json.loads(self.app.post('/api/action/package_list', params=postparams).body) + res = json.loads(self.app.post('/api/action/package_list', + headers={'content-type': 'application/json'}).body) assert res['success'] is True assert len(res['result']) == 2 assert 'warandpeace' in res['result'] @@ -69,6 +69,13 @@ def test_01_package_list(self): assert res['help'].startswith( "Return a list of the names of the site's datasets (packages).") + postparams = '%s=1' % json.dumps({'limit': 1}) + res = json.loads(self.app.post('/api/action/package_list', + params=postparams).body) + assert res['success'] is True + assert len(res['result']) == 1 + assert 'warandpeace' in res['result'] or 'annakarenina' in res['result'] + # Test GET request res = json.loads(self.app.get('/api/action/package_list').body) assert len(res['result']) == 2 From 0d7470b507ff4a648876b4512b37059c23f98200 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Tue, 13 Aug 2013 14:57:28 +0200 Subject: [PATCH 55/88] [#938] Store job id and job key for datapusher jobs --- ckanext/datastore/logic/action.py | 36 +++++++++++++++++++------- ckanext/datastore/tests/test_create.py | 24 ++++++++++++++--- 2 files changed, 46 insertions(+), 14 deletions(-) diff --git a/ckanext/datastore/logic/action.py b/ckanext/datastore/logic/action.py index 6021c4da700..c81a47ac740 100644 --- a/ckanext/datastore/logic/action.py +++ b/ckanext/datastore/logic/action.py @@ -470,7 +470,7 @@ def datapusher_submit(context, data_dict): r = requests.post( urlparse.urljoin(datapusher_url, 'job'), headers={ - #'Content-Type': 'application/json' + 'Content-Type': 'application/json' }, data=json.dumps({ 'api_key': user['apikey'], @@ -497,15 +497,22 @@ def datapusher_submit(context, data_dict): 'details': body, 'status_code': r.status_code}}) - p.toolkit.get_action('task_status_update')(context, { + empty_task = { 'entity_id': res_id, 'entity_type': 'resource', 'task_type': 'datapusher', - 'key': 'datapusher', - 'value': datapusher_url, 'last_updated': str(datetime.datetime.now()), 'state': 'pending' - }) + } + + tasks = [] + for (k, v) in [('job_id', r.json()['job_id']), + ('job_key', r.json()['job_key'])]: + t = empty_task.copy() + t['key'] = k + t['value'] = v + tasks.append(t) + p.toolkit.get_action('task_status_update_many')(context, {'data': tasks}) return True @@ -523,16 +530,25 @@ def datapusher_hook(context, data_dict): res_id = data_dict['metadata']['resource_id'] - task = p.toolkit.get_action('task_status_show')(context, { + task_id = p.toolkit.get_action('task_status_show')(context, { 'entity_id': res_id, 'task_type': 'datapusher', - 'key': 'datapusher' + 'key': 'job_id' }) - task['state'] = data_dict['status'] - task['last_updated'] = str(datetime.datetime.now()) + task_key = p.toolkit.get_action('task_status_show')(context, { + 'entity_id': res_id, + 'task_type': 'datapusher', + 'key': 'job_key' + }) + + tasks = [task_id, task_key] + + for task in tasks: + task['state'] = data_dict['status'] + task['last_updated'] = str(datetime.datetime.now()) - p.toolkit.get_action('task_status_update')(context, task) + p.toolkit.get_action('task_status_update_many')(context, {'data': tasks}) def _resource_exists(context, data_dict): diff --git a/ckanext/datastore/tests/test_create.py b/ckanext/datastore/tests/test_create.py index 7692373c2ef..589a6c26e3a 100644 --- a/ckanext/datastore/tests/test_create.py +++ b/ckanext/datastore/tests/test_create.py @@ -613,7 +613,7 @@ def test_send_datapusher_creates_task(self): task = p.toolkit.get_action('task_status_show')(context, { 'entity_id': resource.id, 'task_type': 'datapusher', - 'key': 'datapusher' + 'key': 'job_id' }) assert task['state'] == 'pending', task @@ -630,8 +630,18 @@ def test_datapusher_hook(self): 'entity_id': resource.id, 'entity_type': 'resource', 'task_type': 'datapusher', - 'key': 'datapusher', - 'value': True, + 'key': 'job_id', + 'value': 'my_id', + 'last_updated': str(datetime.datetime.now()), + 'state': 'pending' + }) + + p.toolkit.get_action('task_status_update')(context, { + 'entity_id': resource.id, + 'entity_type': 'resource', + 'task_type': 'datapusher', + 'key': 'job_key', + 'value': 'my_key', 'last_updated': str(datetime.datetime.now()), 'state': 'pending' }) @@ -653,7 +663,13 @@ def test_datapusher_hook(self): task = tests.call_action_api( self.app, 'task_status_show', entity_id=resource.id, - task_type='datapusher', key='datapusher') + task_type='datapusher', key='job_id') + + assert task['state'] == 'success', task + + task = tests.call_action_api( + self.app, 'task_status_show', entity_id=resource.id, + task_type='datapusher', key='job_key') assert task['state'] == 'success', task From 7c9509474c2a2fefd6c3d7692f2164d26654b6b4 Mon Sep 17 00:00:00 2001 From: joetsoi Date: Tue, 13 Aug 2013 15:02:55 +0100 Subject: [PATCH 56/88] [#1179] doc strings for package_list --- ckan/logic/action/get.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 6e1955ef4a6..c9ff5224d9a 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -64,6 +64,13 @@ def site_read(context,data_dict=None): def package_list(context, data_dict): '''Return a list of the names of the site's datasets (packages). + :param limit: if given, the list of datasets will be broken into pages of + at most ``limit`` datasets per page and only one page will be returned + at a time (optional) + :type limit: int + :param offset: when ``limit`` is given, the offset to start returning packages from + :type offset: int + :rtype: list of strings ''' From c5184ee2d290daef4a534534ce9f59dbe99d27a9 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Tue, 13 Aug 2013 17:54:26 +0200 Subject: [PATCH 57/88] [#938] Set url_type if requested --- ckanext/datastore/logic/action.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/ckanext/datastore/logic/action.py b/ckanext/datastore/logic/action.py index c81a47ac740..4b79157355e 100644 --- a/ckanext/datastore/logic/action.py +++ b/ckanext/datastore/logic/action.py @@ -436,9 +436,10 @@ def datapusher_submit(context, data_dict): :param resource_id: The resource id of the resource that the data should be imported in. The resource's URL will be used to get the data. :type resource_id: string - :param set_url_to_dump: If set to true, the URL of the resource will be set - to the :ref:`datastore dump ` URL after the data has been imported. - :type set_url_to_dump: boolean + :param set_url_type: If set to true, the ``url_type`` of the resource will + be set to ``datastore`` and the resource URL will automatically point + to the :ref:`datastore dump ` URL. (optional, default: False) + :type set_url_type: boolean Returns ``True`` if the job has been submitted and ``False`` if the job has not been submitted, i.e. when the datapusher is not configured. @@ -450,12 +451,9 @@ def datapusher_submit(context, data_dict): data_dict['resource_id'] = data_dict['id'] res_id = _get_or_bust(data_dict, 'resource_id') - # TODO: handle set_url_to_dump - p.toolkit.check_access('datapusher_submit', context, data_dict) - datapusher_url = pylons.config.get( - 'datapusher.url', 'http://datapusher.ckan.org/') + datapusher_url = pylons.config.get('datapusher.url') # no datapusher url means the datapusher should not be used if not datapusher_url: @@ -478,7 +476,8 @@ def datapusher_submit(context, data_dict): 'result_url': callback_url, 'metadata': { 'ckan_url': pylons.config['ckan.site_url'], - 'resource_id': res_id + 'resource_id': res_id, + 'set_url_type': data_dict.get('set_url_type', False) } })) r.raise_for_status() @@ -521,7 +520,7 @@ def datapusher_hook(context, data_dict): """ Update datapusher task. This action is typically called by the datapusher whenever the status of a job changes. - Expects a job with ``status``, ``metadata``. + Expects a job with ``status`` and ``metadata`` with a ``resource_id``. """ # TODO: use a schema to validate From 6564b45ed344c1bc3d28a03486f6184cf9933de9 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Tue, 13 Aug 2013 18:38:11 +0200 Subject: [PATCH 58/88] [#938] Fix datapusher test --- ckanext/datastore/tests/test_create.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ckanext/datastore/tests/test_create.py b/ckanext/datastore/tests/test_create.py index 589a6c26e3a..d7b893235ed 100644 --- a/ckanext/datastore/tests/test_create.py +++ b/ckanext/datastore/tests/test_create.py @@ -556,6 +556,7 @@ def test_create_ckan_resource_in_package(self): @httpretty.activate def test_providing_res_with_url_calls_datapusher_correctly(self): + pylons.config['datapusher.url'] = 'http://datapusher.ckan.org' httpretty.HTTPretty.register_uri( httpretty.HTTPretty.POST, 'http://datapusher.ckan.org/job', From b958f93465051cb33b1ab7086c96b1b70aecf109 Mon Sep 17 00:00:00 2001 From: David Lemayian Date: Wed, 14 Aug 2013 01:11:08 +0300 Subject: [PATCH 59/88] Update copyright year to 2013 --- doc/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/conf.py b/doc/conf.py index 06cc6161f3b..e2443cc94b6 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -84,7 +84,7 @@ # General information about the project. project = u'CKAN Documentation' project_short_name = u'CKAN' -copyright = u'''© 2009-2012, Open Knowledge Foundation. +copyright = u'''© 2009-2013, Open Knowledge Foundation. Licensed under Creative Commons Attribution ShareAlike (Unported) v3.0 License.
From d4cac8b0b464d2c2675c08587d97177d13bcaba6 Mon Sep 17 00:00:00 2001 From: Nigel Babu Date: Wed, 14 Aug 2013 18:36:06 +0530 Subject: [PATCH 60/88] [#1180] Raise 404 for org or group if called wrong Raise a 404 if a group is called by organization_show and vice versa. --- ckan/logic/action/get.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index c0fe9639764..7e04f46d32e 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -850,6 +850,10 @@ def _group_or_org_show(context, data_dict, is_org=False): if group is None: raise NotFound + if is_org and not group.is_organization: + raise NotFound + if not is_org and group.is_organization: + raise NotFound if is_org: _check_access('organization_show',context, data_dict) From 1337eed8e6b003c4eb7967450e4659a9a2ea35f6 Mon Sep 17 00:00:00 2001 From: kindly Date: Thu, 15 Aug 2013 09:02:07 +0100 Subject: [PATCH 61/88] [#1192] ust get to get headers --- ckan/config/middleware.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ckan/config/middleware.py b/ckan/config/middleware.py index bdf1ff016a0..4df0aa84cb0 100644 --- a/ckan/config/middleware.py +++ b/ckan/config/middleware.py @@ -338,8 +338,8 @@ def __call__(self, environ, start_response): key = ''.join([ environ['HTTP_USER_AGENT'], environ['REMOTE_ADDR'], - environ['HTTP_ACCEPT_LANGUAGE'], - environ['HTTP_ACCEPT_ENCODING'], + environ.get('HTTP_ACCEPT_LANGUAGE', ''), + environ.get('HTTP_ACCEPT_ENCODING', ''), ]) key = hashlib.md5(key).hexdigest() # store key/data here From f674d2aa525daca747b8b1145858b53a06b1fec8 Mon Sep 17 00:00:00 2001 From: amercader Date: Thu, 15 Aug 2013 15:53:32 +0100 Subject: [PATCH 62/88] [#1184] Remove auth_sysadmins_check decorator from package_create There is no reason why sysadmins should need to use the auth function. --- ckan/logic/auth/create.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ckan/logic/auth/create.py b/ckan/logic/auth/create.py index bf9c3d17ea3..b7c8860c7a6 100644 --- a/ckan/logic/auth/create.py +++ b/ckan/logic/auth/create.py @@ -4,10 +4,9 @@ from ckan.common import _ -@logic.auth_sysadmins_check def package_create(context, data_dict=None): user = context['user'] - if not new_authz.auth_is_registered_user(): + if not new_authz.auth_is_registered_user() and not user: check1 = new_authz.check_config_permission('anon_create_dataset') else: check1 = new_authz.check_config_permission('create_dataset_if_not_in_organization') \ From 243cd1c68435fe9b121bd3ead3f774bef21dc5c8 Mon Sep 17 00:00:00 2001 From: amercader Date: Thu, 15 Aug 2013 18:24:43 +0100 Subject: [PATCH 63/88] [#1184] Update user checks in package create and update auth functions The `auth_is_registered_user` function's name is misleading, as it only checks if there is a user on the Pylons context object (ie if it is logged in). It has been renamed to `auth_is_loggedin_user`, keeping the old as deprecated. The function is not used anymore on the auth functions, as the user should be always present in the context dict passed to the functions (The controller sets context['user'] to c.user). --- ckan/logic/auth/create.py | 4 ++-- ckan/logic/auth/update.py | 2 +- ckan/new_authz.py | 10 +++++++++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/ckan/logic/auth/create.py b/ckan/logic/auth/create.py index b7c8860c7a6..dd102083591 100644 --- a/ckan/logic/auth/create.py +++ b/ckan/logic/auth/create.py @@ -6,7 +6,7 @@ def package_create(context, data_dict=None): user = context['user'] - if not new_authz.auth_is_registered_user() and not user: + if not user: check1 = new_authz.check_config_permission('anon_create_dataset') else: check1 = new_authz.check_config_permission('create_dataset_if_not_in_organization') \ @@ -31,7 +31,7 @@ def package_create(context, data_dict=None): def file_upload(context, data_dict=None): user = context['user'] - if not new_authz.auth_is_registered_user(): + if not user: return {'success': False, 'msg': _('User %s not authorized to create packages') % user} return {'success': True} diff --git a/ckan/logic/auth/update.py b/ckan/logic/auth/update.py index 59ec935a52a..c5865549007 100644 --- a/ckan/logic/auth/update.py +++ b/ckan/logic/auth/update.py @@ -23,7 +23,7 @@ def package_update(context, data_dict): ) else: # If dataset is not owned then we can edit if config permissions allow - if new_authz.auth_is_registered_user(): + if user: check1 = new_authz.check_config_permission( 'create_dataset_if_not_in_organization') else: diff --git a/ckan/new_authz.py b/ckan/new_authz.py index 01fb07927ac..b3943771382 100644 --- a/ckan/new_authz.py +++ b/ckan/new_authz.py @@ -9,6 +9,8 @@ import ckan.model as model from ckan.common import OrderedDict, _, c +import ckan.lib.maintain as maintain + log = getLogger(__name__) @@ -334,8 +336,14 @@ def check_config_permission(permission): return CONFIG_PERMISSIONS[permission] return False - +@maintain.deprecated('Use auth_is_loggedin_user instead') def auth_is_registered_user(): + ''' + This function is deprecated, please use the auth_is_loggedin_user instead + ''' + return auth_is_loggedin_user() + +def auth_is_loggedin_user(): ''' Do we have a logged in user ''' try: context_user = c.user From 1117ca25e0a675cfc9024165e412899f0615fee0 Mon Sep 17 00:00:00 2001 From: amercader Date: Fri, 16 Aug 2013 16:52:48 +0100 Subject: [PATCH 64/88] [#1184] Fix failing test due to not providing the user --- ckan/tests/functional/test_package.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckan/tests/functional/test_package.py b/ckan/tests/functional/test_package.py index 1cff46110ec..662ad0c2458 100644 --- a/ckan/tests/functional/test_package.py +++ b/ckan/tests/functional/test_package.py @@ -1083,7 +1083,7 @@ def test_new(self): prefix = '' fv[prefix + 'name'] = 'annakarenina' self.pkg_names.append('annakarenina') - res = fv.submit('save') + res = fv.submit('save', extra_environ=self.extra_environ_tester) assert not 'Error' in res, res def test_new_bad_name(self): From 11eff7eb3cfe8bc6627a84cdf45170e574a9cdb2 Mon Sep 17 00:00:00 2001 From: amercader Date: Fri, 16 Aug 2013 17:44:48 +0100 Subject: [PATCH 65/88] [#1184] Revert 243cd1c with a better anon user check Due to how the controllers set up the user in the context it was impossible with the current logic to identify an anonymous request on the auth functions. On ckan/lib/base.py:232 the following are set on the pylons context object (c): * Anon request (not logged in): c.user = None c.author = IP (or 'Unknown IP Address') * Logged in user: c.user = User name c.author = User name Once in the controllers, these are normally used in: context = {'user': c.user or c.auhtor} That means that once in the auth functions we need way to check if a call is anonymous that works both for requests made via the web (object c) or called directly from an extension, where the user is defined directly on the context. The new `auth_is_anon_user` function does that. Ideally this should be handled automatically at a higher level, and the logic layer should always work with users defined on the context object. --- ckan/logic/auth/create.py | 5 +++-- ckan/logic/auth/update.py | 2 +- ckan/new_authz.py | 17 +++++++++++++++++ 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/ckan/logic/auth/create.py b/ckan/logic/auth/create.py index dd102083591..e96b789bcf5 100644 --- a/ckan/logic/auth/create.py +++ b/ckan/logic/auth/create.py @@ -6,7 +6,8 @@ def package_create(context, data_dict=None): user = context['user'] - if not user: + + if new_authz.auth_is_anon_user(context): check1 = new_authz.check_config_permission('anon_create_dataset') else: check1 = new_authz.check_config_permission('create_dataset_if_not_in_organization') \ @@ -31,7 +32,7 @@ def package_create(context, data_dict=None): def file_upload(context, data_dict=None): user = context['user'] - if not user: + if new_authz.auth_is_anon_user(context): return {'success': False, 'msg': _('User %s not authorized to create packages') % user} return {'success': True} diff --git a/ckan/logic/auth/update.py b/ckan/logic/auth/update.py index c5865549007..5f8f4b03c25 100644 --- a/ckan/logic/auth/update.py +++ b/ckan/logic/auth/update.py @@ -23,7 +23,7 @@ def package_update(context, data_dict): ) else: # If dataset is not owned then we can edit if config permissions allow - if user: + if not new_authz.auth_is_anon_user(context): check1 = new_authz.check_config_permission( 'create_dataset_if_not_in_organization') else: diff --git a/ckan/new_authz.py b/ckan/new_authz.py index b3943771382..00a7dfe0e17 100644 --- a/ckan/new_authz.py +++ b/ckan/new_authz.py @@ -350,3 +350,20 @@ def auth_is_loggedin_user(): except TypeError: context_user = None return bool(context_user) + +def auth_is_anon_user(context): + ''' Is this an anonymous user? + eg Not logged in if a web request and not user defined in context + if logic functions called directly + + See ckan/lib/base.py:232 for pylons context object logic + ''' + try: + is_anon_user = (not bool(c.user) and bool(c.author)) + except TypeError: + # No c object set, this is not a call done via the web interface, + # but directly, eg from an extension + context_user = context.get('user') + is_anon_user = not bool(context_user) + + return is_anon_user From a52c216f1f5c4cbf3e37b5acba1a3c74b5e49fd4 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Mon, 19 Aug 2013 11:28:10 +0200 Subject: [PATCH 66/88] [#1197] Add requirements file to fix the docs build on readthedocs --- dev-requirements.txt | 2 +- pip-requirements-docs.txt | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 pip-requirements-docs.txt diff --git a/dev-requirements.txt b/dev-requirements.txt index a445712d239..9a1919bea2e 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,4 +1,4 @@ -# These are packages that required when running ckan tests +# These are packages that required when running ckan tests and building the docs -e git+https://github.com/okfn/ckanclient@a315a72eef74dda4831acd022ef84a1246803c73#egg=ckanclient-dev docutils==0.8.1 diff --git a/pip-requirements-docs.txt b/pip-requirements-docs.txt new file mode 100644 index 00000000000..e5f043cbb7a --- /dev/null +++ b/pip-requirements-docs.txt @@ -0,0 +1,4 @@ +# This will install the requirements used to build the docs + +-r requirements.txt +-r dev-requirements.txt From 5c5369403e401eb926bf57ab2e9efd31cf55a0ae Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Mon, 19 Aug 2013 12:01:33 +0200 Subject: [PATCH 67/88] [#1197] Remove duplicate requirement which might prevent builds on rtd --- dev-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 9a1919bea2e..c7c3e6f833c 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -3,7 +3,7 @@ -e git+https://github.com/okfn/ckanclient@a315a72eef74dda4831acd022ef84a1246803c73#egg=ckanclient-dev docutils==0.8.1 httpretty==0.6.2 -nose==1.3.0 +# nose==1.3.0 # already in requirements.txt pep8==1.4.6 Sphinx==1.2b1 polib==1.0.3 From 7aa597b498aea6fb8f0ca2d7f193ebafbbc7cbb9 Mon Sep 17 00:00:00 2001 From: amercader Date: Mon, 19 Aug 2013 14:02:40 +0100 Subject: [PATCH 68/88] [#1184] Check if user exists before filling creator_user_id In some cases, eg anonymous package creation, users won't actually exist, so we need to avoid a KeyError. --- ckan/logic/action/create.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ckan/logic/action/create.py b/ckan/logic/action/create.py index 1fc0fb6e058..fcb55ceae64 100644 --- a/ckan/logic/action/create.py +++ b/ckan/logic/action/create.py @@ -155,8 +155,9 @@ def package_create(context, data_dict): admins = [] if user: user_obj = model.User.by_name(user.decode('utf8')) - admins = [user_obj] - data['creator_user_id'] = user_obj.id + if user_obj: + admins = [user_obj] + data['creator_user_id'] = user_obj.id pkg = model_save.package_dict_save(data, context) From f4306eebe0cbbddf708e33e9f0189c023ab3cf5e Mon Sep 17 00:00:00 2001 From: amercader Date: Wed, 21 Aug 2013 16:07:55 +0100 Subject: [PATCH 69/88] [#1197] Downgrade Sphinx to fix search on RTD There is a bug on RTD that prevents the search when using Sphinx 1.2b1. All details here: https://github.com/rtfd/readthedocs.org/issues/443 --- dev-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index c7c3e6f833c..2c1c6cb5041 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -5,5 +5,5 @@ docutils==0.8.1 httpretty==0.6.2 # nose==1.3.0 # already in requirements.txt pep8==1.4.6 -Sphinx==1.2b1 +Sphinx==1.1.3 polib==1.0.3 From 1871a73cfcc1889768cda6f82cf03b1c46942eda Mon Sep 17 00:00:00 2001 From: kindly Date: Thu, 22 Aug 2013 02:51:19 +0100 Subject: [PATCH 70/88] [#1078] metadata_modified matching so that only get out from solr when datetimes are similar and minor exception cleanup --- ckan/lib/search/query.py | 5 +++-- ckan/logic/action/get.py | 6 +++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/ckan/lib/search/query.py b/ckan/lib/search/query.py index 21ab4261607..881ddc96938 100644 --- a/ckan/lib/search/query.py +++ b/ckan/lib/search/query.py @@ -279,11 +279,12 @@ def get_index(self,reference): data = json.loads(solr_response) if data['response']['numFound'] == 0: - raise SearchError('Dataset not found in the search index: %s' % reference) + raise SearchError('Dataset not found in the search index: %s' % reference) else: return data['response']['docs'][0] except Exception, e: - log.exception(e) + if not isinstance(e, SearchError): + log.exception(e) raise SearchError(e) finally: conn.close() diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 88f0020b94b..ebb73c90262 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -786,7 +786,11 @@ def package_show(context, data_dict): else: package_dict = json.loads(search_result['data_dict']) package_dict_validated = False - if pkg.revision_id != package_dict.get('revision_id'): + metadata_modified = pkg.metadata_modified.isoformat() + search_metadata_modified = search_result['metadata_modified'] + # solr stores less precice datetime, + # truncate to 22 charactors to get good enough match + if metadata_modified[:22] != search_metadata_modified[:22]: package_dict = None if not package_dict: From 3b1b86838c4a7101e33c4b5babd059454efb4a70 Mon Sep 17 00:00:00 2001 From: Sean Hammond Date: Mon, 26 Aug 2013 11:25:55 +0200 Subject: [PATCH 71/88] [#1211] Add exception guidelines to Python coding standards --- doc/python-coding-standards.rst | 69 +++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/doc/python-coding-standards.rst b/doc/python-coding-standards.rst index 8e4f590bfae..05b973fe055 100644 --- a/doc/python-coding-standards.rst +++ b/doc/python-coding-standards.rst @@ -104,6 +104,75 @@ as it changes over time. So: - Keep docstrings simple: use plain, concise English. - Try to avoid repetition. + +Documenting exceptions raised with ``:raises`` +`````````````````````````````````````````````` + +There are a few guidelines that CKAN code should follow regarding exceptions: + +1. **All public functions that CKAN exports for third-party code to use + should document any exceptions they raise**. See below for how to document + exceptions raised. + + For example the template helper functions in :py:mod:`ckan.lib.helpers`, + anything imported into :py:mod:`ckan.plugins.toolkit`, and all of the + action API functions defined in :py:mod:`ckan.logic.action`, should list + exceptions raised in their docstrings. + + This is because CKAN themes, extensions and API clients need to be able to + call CKAN code without crashing, so they need to know what exceptions they + should handle (and extension developers shouldn't have to understand the + CKAN core source code). + +2. On the other hand, **internal functions that are only used within CKAN + shouldn't list exceptions in their docstrings**. + + This is because it would be difficult to keep all the exception lists up to + date with the actual code behaviour, so the docstrings would become more + misleading than useful. + +3. **Code should only raise exceptions from within its allowed set**. + + Each module in CKAN has a set of zero or more exceptions, defined somewhere + near the module, that code in that module is allowed to raise. For example + ``ckan/logic/__init__.py`` defines a number of exception types for code + in ``ckan/logic/`` to use. CKAN code should never raise exceptions types + defined elsewhere in CKAN, in third-party code or in the Python standard + library. + +4. **All code should catch any exceptions raised by called functions**, and + either handle the exception, re-raise the exception (if it's from the code's + set of allowed exception types), or wrap the exception in an allowed + exception type and re-raise it. + + This is to make it easy for a CKAN core developer to look at the source code + of an internal function, scan it for the keyword ``raise``, and see what + types of exception the function may raise, so they know what exceptions they + need to catch if they're going to call the function. Developers shouldn't + have to read the source of all the functions that a function calls (and + the functions they call...) to find out what exceptions she needs to catch + to call a function without crashing. + +.. todo:: + + Insert examples of how to re-raise and how to wrap-and-re-raise an + exception. + +Use ``:raises:`` to document exceptions raised by public functions. The +docstring should say what type of exception is raised and under what +conditions. Use ``:py:class:`` to reference exception types. For example:: + + def member_list(context, data_dict=None): + '''Return the members of a group. + + ... (parameters and return values documented here) ... + + :raises: :py:class:`ckan.logic.NotFound`: if the group doesn't exist + + ''' + + + PEP 257 (Docstring Conventions) ``````````````````````````````` From 13bd60c3014f01df5ad44b4bdc2e3df46257f532 Mon Sep 17 00:00:00 2001 From: Sean Hammond Date: Mon, 26 Aug 2013 11:27:21 +0200 Subject: [PATCH 72/88] Add logging example to Python coding standards --- doc/python-coding-standards.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/doc/python-coding-standards.rst b/doc/python-coding-standards.rst index 05b973fe055..6256e06d7bd 100644 --- a/doc/python-coding-standards.rst +++ b/doc/python-coding-standards.rst @@ -61,6 +61,17 @@ Imports Logging ------- +We use `the Python standard library's logging module `_ +to log messages in CKAN, e.g.:: + + import logging + ... + logger = logging.getLogger(__name__) + ... + logger.debug('some debug message') + +When logging: + - Keep log messages short. - Don't include object representations in the log message. It *is* useful From 560f86787d635d1f27169ce3086c428c87178b5c Mon Sep 17 00:00:00 2001 From: Sean Hammond Date: Mon, 26 Aug 2013 11:27:48 +0200 Subject: [PATCH 73/88] Correct some Sphinx formatting --- doc/python-coding-standards.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/python-coding-standards.rst b/doc/python-coding-standards.rst index 6256e06d7bd..29381df0900 100644 --- a/doc/python-coding-standards.rst +++ b/doc/python-coding-standards.rst @@ -85,7 +85,7 @@ When logging: String Formatting ------------------ -Don't use the old `%s` style string formatting, e.g. ``"i am a %s" % sub``. +Don't use the old ``%s`` style string formatting, e.g. ``"i am a %s" % sub``. This kind of string formatting is not helpful for internationalization and is going away in Python 3. From 79f7deaa73c4dbb8a5a6d5fff8a083e8ebfb769e Mon Sep 17 00:00:00 2001 From: Sean Hammond Date: Mon, 26 Aug 2013 11:28:00 +0200 Subject: [PATCH 74/88] Correct some grammar. --- doc/python-coding-standards.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/python-coding-standards.rst b/doc/python-coding-standards.rst index 29381df0900..ee7b79d6e39 100644 --- a/doc/python-coding-standards.rst +++ b/doc/python-coding-standards.rst @@ -111,7 +111,7 @@ as it changes over time. So: - All modules and all public functions, classes and methods exported by a module should normally have docstrings (see `PEP 257`_). -- Keep docstrings short, describe only what's necessary and no more, +- Keep docstrings short, describe only what's necessary and no more. - Keep docstrings simple: use plain, concise English. - Try to avoid repetition. From 6164aea8ddb59d81b16d58d440cca0f9d0016f15 Mon Sep 17 00:00:00 2001 From: Sean Hammond Date: Mon, 26 Aug 2013 11:30:22 +0200 Subject: [PATCH 75/88] Fix the title styles in a docs file --- doc/python-coding-standards.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/doc/python-coding-standards.rst b/doc/python-coding-standards.rst index ee7b79d6e39..e76f9185ac1 100644 --- a/doc/python-coding-standards.rst +++ b/doc/python-coding-standards.rst @@ -1,5 +1,5 @@ ======================= -Python Coding Standards +Python coding standards ======================= For Python code style follow `PEP 8`_ plus the guidelines below. @@ -12,7 +12,7 @@ Some good links about Python code style: - `Google Python Style Guide `_ -Commit Formatting Cleanups on master +Commit formatting cleanups on master ------------------------------------ Clean up formatting and PEP 8 issues on master, not on a feature branch. @@ -20,7 +20,7 @@ Unless of course you're changing that piece of code anyway. This will help avoid spurious merge conflicts, and aid in reading pull requests. -Use Single Quotes +Use single quotes ----------------- Use single-quotes for string literals, e.g. ``'my-identifier'``, *but* use @@ -82,7 +82,7 @@ When logging: .. _Python's Logging HOWTO: http://docs.python.org/2/howto/logging.html -String Formatting +String formatting ------------------ Don't use the old ``%s`` style string formatting, e.g. ``"i am a %s" % sub``. @@ -198,7 +198,7 @@ CKAN docstrings deviate from PEP 257 in a couple of ways: - We use Sphinx directives for documenting parameters, exceptions and return values (see below) -Sphinx Field Lists +Sphinx field lists `````````````````` Use `Sphinx field lists`_ for documenting the parameters, exceptions and @@ -262,7 +262,7 @@ You can also use a little inline `reStructuredText markup`_ in docstrings, e.g. .. _Action API Docstrings: -Action API Docstrings +Action API docstrings ````````````````````` Docstrings from CKAN's action API are processed with `autodoc`_ and @@ -311,7 +311,7 @@ Example of a ckan.logic.action API docstring: .. _Autodoc: http://sphinx.pocoo.org/ext/autodoc.html -Some Helpful Tools for Python Code Quality +Some helpful tools for Python code quality ------------------------------------------ There are various tools that can help you to check your Python code for PEP8 From 0a45c1e5df86f04ec66a805cc98900b7f357dce2 Mon Sep 17 00:00:00 2001 From: Sean Hammond Date: Mon, 26 Aug 2013 11:52:08 +0200 Subject: [PATCH 76/88] Add docstring guidelines for cross-referencing code objects --- doc/python-coding-standards.rst | 50 +++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/doc/python-coding-standards.rst b/doc/python-coding-standards.rst index e76f9185ac1..95d79b00dd8 100644 --- a/doc/python-coding-standards.rst +++ b/doc/python-coding-standards.rst @@ -116,6 +116,56 @@ as it changes over time. So: - Try to avoid repetition. +Referencing other code objects with ``:py:`` +-------------------------------------------- + +If you want to refer to another Python or JavaScript module, function or class +etc. in a docstring (or from a ``.rst`` file), use `Sphinx domain object +cross-references +`_, for +example:: + + See :py:mod:`ckan.lib.helpers`. + + See :py:func:`ckan.logic.action.create.package_create`. + + See :py:class:`ckan.logic.NotFound`. + +For the full list of types of cross-reference, see the +`Sphinx docs `_. + + +.. note:: + + These kinds of cross-references can also be used to reference other types + of object besides Python objects, for example `JavaScript objects `_ + or even command-line scripts and options and environment variables. See + `the Sphinx docs `_ for the full + details. + + +Cross-referencing objects like this means that Sphinx will style the reference +with the right CSS, and hyperlink the reference to the docs for the referenced +object. Sphinx can also generate error messages when non-existent objects are +referenced, which helps to keep the docs up to date as the code changes. + +.. tip:: + + Sphinx will render a cross-reference like + ``:py:func:`ckan.logic.action.create.package_create``` as the full name of + the function: :py:func:`ckan.logic.action.create.package_create`. If you want the + docs to contain only the local name of the function (e.g. just + :py:func:`~ckan.logic.action.create.package_create`), put a ``~`` at the + start:: + + :py:func:`~ckan.logic.action.create.package_create` + + (But you should always use the fully qualified name in your docstring or + ``*.rst`` file.) + + + + Documenting exceptions raised with ``:raises`` `````````````````````````````````````````````` From 08f28c2e758c4c40f4b9ce8d8cbc6dac8c325441 Mon Sep 17 00:00:00 2001 From: Sean Hammond Date: Mon, 26 Aug 2013 11:54:05 +0200 Subject: [PATCH 77/88] Remove formatting cleanups stuff from coding standards This coding standard was wrong --- doc/python-coding-standards.rst | 8 -------- 1 file changed, 8 deletions(-) diff --git a/doc/python-coding-standards.rst b/doc/python-coding-standards.rst index 95d79b00dd8..24e0aa98854 100644 --- a/doc/python-coding-standards.rst +++ b/doc/python-coding-standards.rst @@ -12,14 +12,6 @@ Some good links about Python code style: - `Google Python Style Guide `_ -Commit formatting cleanups on master ------------------------------------- - -Clean up formatting and PEP 8 issues on master, not on a feature branch. -Unless of course you're changing that piece of code anyway. This will help -avoid spurious merge conflicts, and aid in reading pull requests. - - Use single quotes ----------------- From b5fe93d4fe2cf0eda2880512bcc1c39448677c23 Mon Sep 17 00:00:00 2001 From: Sean Hammond Date: Mon, 26 Aug 2013 11:56:23 +0200 Subject: [PATCH 78/88] Reorder some docs subsections --- doc/python-coding-standards.rst | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/doc/python-coding-standards.rst b/doc/python-coding-standards.rst index 24e0aa98854..bbd6a2b6dd0 100644 --- a/doc/python-coding-standards.rst +++ b/doc/python-coding-standards.rst @@ -108,6 +108,21 @@ as it changes over time. So: - Try to avoid repetition. +PEP 257 (Docstring Conventions) +``````````````````````````````` + +Generally, follow `PEP 257`_ for docstrings. We'll only describe the ways that +CKAN differs from or extends PEP 257 below. + +CKAN docstrings deviate from PEP 257 in a couple of ways: + +- We use ``'''triple single quotes'''`` around docstrings, not ``"""triple + double quotes"""`` (put triple single quotes around one-line docstrings as + well as multi-line ones, it makes them easier to expand later) +- We use Sphinx directives for documenting parameters, exceptions and return + values (see below) + + Referencing other code objects with ``:py:`` -------------------------------------------- @@ -156,8 +171,6 @@ referenced, which helps to keep the docs up to date as the code changes. ``*.rst`` file.) - - Documenting exceptions raised with ``:raises`` `````````````````````````````````````````````` @@ -225,21 +238,6 @@ conditions. Use ``:py:class:`` to reference exception types. For example:: ''' - -PEP 257 (Docstring Conventions) -``````````````````````````````` - -Generally, follow `PEP 257`_ for docstrings. We'll only describe the ways that -CKAN differs from or extends PEP 257 below. - -CKAN docstrings deviate from PEP 257 in a couple of ways: - -- We use ``'''triple single quotes'''`` around docstrings, not ``"""triple - double quotes"""`` (put triple single quotes around one-line docstrings as - well as multi-line ones, it makes them easier to expand later) -- We use Sphinx directives for documenting parameters, exceptions and return - values (see below) - Sphinx field lists `````````````````` @@ -285,7 +283,6 @@ Example of a longer docstring: ''' - The phrases that follow ``:param foo:``, ``:type foo:``, or ``:returns:`` should not start with capital letters or end with full stops. These should be short phrases and not full sentences. If more detail is required put it in the From 55aa9598641655d9abfb23b4d68a8098de5a3a6f Mon Sep 17 00:00:00 2001 From: Sean Hammond Date: Mon, 26 Aug 2013 11:57:25 +0200 Subject: [PATCH 79/88] Docs: add a bullet-point --- doc/python-coding-standards.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/python-coding-standards.rst b/doc/python-coding-standards.rst index bbd6a2b6dd0..fafad029220 100644 --- a/doc/python-coding-standards.rst +++ b/doc/python-coding-standards.rst @@ -119,6 +119,8 @@ CKAN docstrings deviate from PEP 257 in a couple of ways: - We use ``'''triple single quotes'''`` around docstrings, not ``"""triple double quotes"""`` (put triple single quotes around one-line docstrings as well as multi-line ones, it makes them easier to expand later) +- We use Sphinx domain object cross-references to cross-reference to other + code objects (see below) - We use Sphinx directives for documenting parameters, exceptions and return values (see below) From 700cee77702e0b6a14e241fcc1f130430a212407 Mon Sep 17 00:00:00 2001 From: Sean Hammond Date: Mon, 26 Aug 2013 12:12:51 +0200 Subject: [PATCH 80/88] Add a cross-ref in the docs --- doc/documentation-guidelines.rst | 3 +++ doc/python-coding-standards.rst | 2 ++ 2 files changed, 5 insertions(+) diff --git a/doc/documentation-guidelines.rst b/doc/documentation-guidelines.rst index 809292e38b6..10a3050e1cf 100644 --- a/doc/documentation-guidelines.rst +++ b/doc/documentation-guidelines.rst @@ -386,6 +386,9 @@ or to define a URL once and then link to it in multiple places, do:: see `Hyperlinks `_ for details. +Use ``:py:`` to reference other Python or JavaScript functions, modules, +classes, etc. See :ref:`Referencing other code objects`. + .. _sphinx substitutions: diff --git a/doc/python-coding-standards.rst b/doc/python-coding-standards.rst index fafad029220..ad9d1877c0e 100644 --- a/doc/python-coding-standards.rst +++ b/doc/python-coding-standards.rst @@ -125,6 +125,8 @@ CKAN docstrings deviate from PEP 257 in a couple of ways: values (see below) +.. _Referencing other code objects: + Referencing other code objects with ``:py:`` -------------------------------------------- From 36f95afb1cd2fc91e2815c8a120d1229e813c9bd Mon Sep 17 00:00:00 2001 From: Sean Hammond Date: Mon, 26 Aug 2013 17:04:55 +0200 Subject: [PATCH 81/88] [#973] Fix a broken Sphinx reference --- ckan/plugins/interfaces.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckan/plugins/interfaces.py b/ckan/plugins/interfaces.py index de375b54ae2..3b40a15c3a2 100644 --- a/ckan/plugins/interfaces.py +++ b/ckan/plugins/interfaces.py @@ -222,7 +222,7 @@ def can_preview(self, data_dict): resource or the url if your preview requires the resource to be on the same domain because of the same origin policy. To find out how to preview resources that are on a - different domain, read :ref:`resource_proxy`. + different domain, read :ref:`resource-proxy`. ''' def setup_template_variables(self, context, data_dict): From 3f4db25d3a14ec66db8f0194c2595e725f131237 Mon Sep 17 00:00:00 2001 From: Sean Hammond Date: Mon, 26 Aug 2013 17:16:17 +0200 Subject: [PATCH 82/88] Fix broken IResourcePreview docstring Not spell-checked, not following CKAN's docstring guidelines, contained Sphinx errors. I also rewrote it for clarity. --- ckan/plugins/interfaces.py | 51 ++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/ckan/plugins/interfaces.py b/ckan/plugins/interfaces.py index 3b40a15c3a2..aa08fc64bab 100644 --- a/ckan/plugins/interfaces.py +++ b/ckan/plugins/interfaces.py @@ -196,33 +196,40 @@ def notify(self, resource): class IResourcePreview(Interface): - """ - Hook into the resource previews in helpers.py. This lets you - create custom previews for example for xml files. - """ + '''Add custom data previews for resource file-types. + ''' def can_preview(self, data_dict): - ''' - Returns info on whether the plugin can preview the resource. + '''Return info on whether the plugin can preview the resource. - This can be done in two ways. - The old way is to just return True or False. - The new way is to return a dict with the following - { - 'can_preview': bool - if the extension can preview the resource - 'fixable': string - if the extension cannot preview but could for - example if the resource_proxy was enabled. - 'quality': int - how good the preview is 1-poor, 2-average, 3-good - used if multiple extensions can preview - } + This can be done in two ways: - The ``data_dict`` contains the resource and the package. + 1. The old way is to just return ``True`` or ``False``. + + 2. The new way is to return a dict with three keys: + + ``'can_preview'`` (``boolean``) + ``True`` if the extension can preview the resource. + + ``'fixable'`` (``string``) + A string explaining how preview for the resource could be enabled, + for example if the ``resource_proxy`` plugin was enabled. + + ``'quality'`` (``int``) + How good the preview is: ``1`` (poor), ``2`` (average) or + ``3`` (good). When multiple preview extensions can preview the + same resource, this is used to determine which extension will + be used. + + :param data_dict: the resource to be previewed and the dataset that it + belongs to. + :type data_dict: dictionary + + Make sure to check the ``on_same_domain`` value of the resource or the + url if your preview requires the resource to be on the same domain + because of the same-origin policy. To find out how to preview + resources that are on a different domain, read :ref:`resource-proxy`. - Make sure to ckeck the ``on_same_domain`` value of the - resource or the url if your preview requires the resource to be on - the same domain because of the same origin policy. - To find out how to preview resources that are on a - different domain, read :ref:`resource-proxy`. ''' def setup_template_variables(self, context, data_dict): From 6e9421d2e3ef5d78febe6e9e67a6363ba205100b Mon Sep 17 00:00:00 2001 From: Sean Hammond Date: Mon, 26 Aug 2013 17:22:56 +0200 Subject: [PATCH 83/88] Fix a Sphinx warning --- doc/configuration.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/configuration.rst b/doc/configuration.rst index 284dbbfde76..0efe9573226 100644 --- a/doc/configuration.rst +++ b/doc/configuration.rst @@ -1075,7 +1075,7 @@ Example:: .. todo:: Expand -.. _datapusher.url +.. _datapusher.url: datapusher.url ^^^^^^^^^^^^^^ From e91bb956b44c242db2b9125f18a2aef6a82bee1a Mon Sep 17 00:00:00 2001 From: Nigel Babu Date: Tue, 27 Aug 2013 12:11:39 +0530 Subject: [PATCH 84/88] [#1180] Add tests --- ckan/tests/logic/test_action.py | 54 +++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/ckan/tests/logic/test_action.py b/ckan/tests/logic/test_action.py index 73a3b838f0c..691860afbb4 100644 --- a/ckan/tests/logic/test_action.py +++ b/ckan/tests/logic/test_action.py @@ -1644,3 +1644,57 @@ def test_02_bulk_delete(self): res = self.app.get('/api/action/package_search?q=*:*') assert json.loads(res.body)['result']['count'] == 0 + + +class TestGroupOrgView(WsgiAppCase): + + @classmethod + def setup_class(cls): + model.Session.add_all([ + model.User(name=u'sysadmin', apikey=u'sysadmin', + password=u'sysadmin', sysadmin=True), + ]) + model.Session.commit() + + org_dict = '%s=1' % json.dumps({ + 'name': 'org', + }) + res = cls.app.post('/api/action/organization_create', + extra_environ={'Authorization': 'sysadmin'}, + params=org_dict) + cls.org_id = json.loads(res.body)['result']['id'] + + group_dict = '%s=1' % json.dumps({ + 'name': 'group', + }) + res = cls.app.post('/api/action/group_create', + extra_environ={'Authorization': 'sysadmin'}, + params=group_dict) + cls.group_id = json.loads(res.body)['result']['id'] + + @classmethod + def teardown_class(self): + model.repo.rebuild_db() + + def test_1_view_org(self): + res = cls.app.post('/api/action/organization_show', + params=self.org_dict) + res_json = json.loads(res.body) + assert res['success'] = True + + res = cls.app.post('/api/action/group_show', + params=self.org_dict) + res_json = json.loads(res.body) + assert res['success'] = True + + def test_2_view_group(self): + res = cls.app.post('/api/action/group_show', + params=self.group_dict) + res_json = json.loads(res.body) + assert res['success'] = True + + res = cls.app.post('/api/action/organization_show', + params=self.group_dict) + res_json = json.loads(res.body) + assert res['success'] = True + From 68c0d821edfa4819d25876582660141bebaa309a Mon Sep 17 00:00:00 2001 From: Nigel Babu Date: Tue, 27 Aug 2013 12:38:42 +0530 Subject: [PATCH 85/88] Correct syntax --- ckan/tests/logic/test_action.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ckan/tests/logic/test_action.py b/ckan/tests/logic/test_action.py index 691860afbb4..23ad1337da2 100644 --- a/ckan/tests/logic/test_action.py +++ b/ckan/tests/logic/test_action.py @@ -1680,21 +1680,21 @@ def test_1_view_org(self): res = cls.app.post('/api/action/organization_show', params=self.org_dict) res_json = json.loads(res.body) - assert res['success'] = True + assert res['success'] is True res = cls.app.post('/api/action/group_show', params=self.org_dict) res_json = json.loads(res.body) - assert res['success'] = True + assert res['success'] is True def test_2_view_group(self): res = cls.app.post('/api/action/group_show', params=self.group_dict) res_json = json.loads(res.body) - assert res['success'] = True + assert res['success'] is True res = cls.app.post('/api/action/organization_show', params=self.group_dict) res_json = json.loads(res.body) - assert res['success'] = True + assert res['success'] is True From 5acddc8c3baed91e235fe09bcbebfabaf33936e6 Mon Sep 17 00:00:00 2001 From: Nigel Babu Date: Tue, 27 Aug 2013 13:22:07 +0530 Subject: [PATCH 86/88] [#1180] Change typos and get the tests to pass --- ckan/tests/logic/test_action.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/ckan/tests/logic/test_action.py b/ckan/tests/logic/test_action.py index 23ad1337da2..e3bcaf7d94d 100644 --- a/ckan/tests/logic/test_action.py +++ b/ckan/tests/logic/test_action.py @@ -1677,24 +1677,24 @@ def teardown_class(self): model.repo.rebuild_db() def test_1_view_org(self): - res = cls.app.post('/api/action/organization_show', - params=self.org_dict) + res = self.app.post('/api/action/organization_show', + params=self.org_id) res_json = json.loads(res.body) assert res['success'] is True - res = cls.app.post('/api/action/group_show', - params=self.org_dict) + res = self.app.post('/api/action/group_show', + params=self.org_id) res_json = json.loads(res.body) - assert res['success'] is True + assert res['success'] is False def test_2_view_group(self): - res = cls.app.post('/api/action/group_show', - params=self.group_dict) + res = self.app.post('/api/action/group_show', + params=self.group_id) res_json = json.loads(res.body) assert res['success'] is True - res = cls.app.post('/api/action/organization_show', - params=self.group_dict) + res = self.app.post('/api/action/organization_show', + params=self.group_id) res_json = json.loads(res.body) - assert res['success'] is True + assert res['success'] is False From 2c45b77b3220e9cb0c41a6669e2ee97f1e2d30ec Mon Sep 17 00:00:00 2001 From: Nigel Babu Date: Tue, 27 Aug 2013 15:09:17 +0530 Subject: [PATCH 87/88] [#1180] *Really* fix tests --- ckan/tests/logic/test_action.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/ckan/tests/logic/test_action.py b/ckan/tests/logic/test_action.py index e3bcaf7d94d..c80515f1027 100644 --- a/ckan/tests/logic/test_action.py +++ b/ckan/tests/logic/test_action.py @@ -1677,24 +1677,24 @@ def teardown_class(self): model.repo.rebuild_db() def test_1_view_org(self): - res = self.app.post('/api/action/organization_show', - params=self.org_id) + res = self.app.get('/api/action/organization_show', + params={'id': self.org_id}) res_json = json.loads(res.body) - assert res['success'] is True + assert res_json['success'] is True - res = self.app.post('/api/action/group_show', - params=self.org_id) + res = self.app.get('/api/action/group_show', + params={'id': self.org_id}, expect_errors=True) res_json = json.loads(res.body) - assert res['success'] is False + assert res_json['success'] is False def test_2_view_group(self): - res = self.app.post('/api/action/group_show', - params=self.group_id) + res = self.app.get('/api/action/group_show', + params={'id': self.group_id}) res_json = json.loads(res.body) - assert res['success'] is True + assert res_json['success'] is True - res = self.app.post('/api/action/organization_show', - params=self.group_id) + res = self.app.get('/api/action/organization_show', + params={'id': self.group_id}, expect_errors=True) res_json = json.loads(res.body) - assert res['success'] is False + assert res_json['success'] is False From 6fb6072d1daf3846767124623cab91ab659cd283 Mon Sep 17 00:00:00 2001 From: Sean Hammond Date: Thu, 29 Aug 2013 11:46:52 +0200 Subject: [PATCH 88/88] [#1211] Correct a typo --- doc/python-coding-standards.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/python-coding-standards.rst b/doc/python-coding-standards.rst index ad9d1877c0e..ed396398f38 100644 --- a/doc/python-coding-standards.rst +++ b/doc/python-coding-standards.rst @@ -220,7 +220,7 @@ There are a few guidelines that CKAN code should follow regarding exceptions: types of exception the function may raise, so they know what exceptions they need to catch if they're going to call the function. Developers shouldn't have to read the source of all the functions that a function calls (and - the functions they call...) to find out what exceptions she needs to catch + the functions they call...) to find out what exceptions they needs to catch to call a function without crashing. .. todo::