From 34f3f18e8893b5fc33047df94c88d4852c9a1f6b Mon Sep 17 00:00:00 2001 From: amercader Date: Mon, 8 Aug 2016 12:01:29 +0100 Subject: [PATCH 01/65] [#3194] Consolidate a single redirect function Only use helpers.redirect_to (which uses routes.redirect_to), not pylons.redirect or pylons.redirect_to. This takes care of lang bits in the url and is the one exposed on the toolkit. --- ckan/controllers/package.py | 2 +- ckan/controllers/util.py | 2 +- ckan/lib/base.py | 1 - ckan/lib/helpers.py | 8 +++++++- 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/ckan/controllers/package.py b/ckan/controllers/package.py index 115a06981f2..be198c9265e 100644 --- a/ckan/controllers/package.py +++ b/ckan/controllers/package.py @@ -30,7 +30,7 @@ render = base.render abort = base.abort -redirect = base.redirect +redirect = h.redirect_to NotFound = logic.NotFound NotAuthorized = logic.NotAuthorized diff --git a/ckan/controllers/util.py b/ckan/controllers/util.py index 97c857b0ff5..4a41bfb0dc2 100644 --- a/ckan/controllers/util.py +++ b/ckan/controllers/util.py @@ -18,7 +18,7 @@ def redirect(self): base.abort(400, _('Missing Value') + ': url') if h.url_is_local(url): - return base.redirect(url) + return h.redirect_to(url) else: base.abort(403, _('Redirecting to external site is not allowed.')) diff --git a/ckan/lib/base.py b/ckan/lib/base.py index 3334a662599..d6e5ff2fb2d 100644 --- a/ckan/lib/base.py +++ b/ckan/lib/base.py @@ -11,7 +11,6 @@ from pylons import cache, session from pylons.controllers import WSGIController from pylons.controllers.util import abort as _abort -from pylons.controllers.util import redirect_to, redirect from pylons.decorators import jsonify from pylons.i18n import N_, gettext, ngettext from pylons.templating import cached_template, pylons_globals diff --git a/ckan/lib/helpers.py b/ckan/lib/helpers.py index fb90e5551c8..673954ca414 100644 --- a/ckan/lib/helpers.py +++ b/ckan/lib/helpers.py @@ -140,7 +140,13 @@ def redirect_to(*args, **kw): ''' if are_there_flash_messages(): kw['__no_cache__'] = True - return _redirect_to(url_for(*args, **kw)) + + # Routes router doesn't like unicode args + uargs = map(lambda arg: str(arg) if isinstance(arg, unicode) else arg, + args) + _url = url_for(*uargs, **kw) + + return _redirect_to(_url) @maintain.deprecated('h.url is deprecated please use h.url_for') From 7a0623c7be0598db3aab7604ca7b1014157cf871 Mon Sep 17 00:00:00 2001 From: amercader Date: Mon, 8 Aug 2016 12:16:10 +0100 Subject: [PATCH 02/65] [#3194] Ensure that relative redirects use the host on ckan.site_url --- ckan/lib/helpers.py | 2 ++ ckan/tests/controllers/test_tags.py | 2 +- ckan/tests/controllers/test_util.py | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/ckan/lib/helpers.py b/ckan/lib/helpers.py index 673954ca414..ccccf43ff8c 100644 --- a/ckan/lib/helpers.py +++ b/ckan/lib/helpers.py @@ -145,6 +145,8 @@ def redirect_to(*args, **kw): uargs = map(lambda arg: str(arg) if isinstance(arg, unicode) else arg, args) _url = url_for(*uargs, **kw) + if _url.startswith('/'): + _url = str(config['ckan.site_url'].rstrip('/') + _url) return _redirect_to(_url) diff --git a/ckan/tests/controllers/test_tags.py b/ckan/tests/controllers/test_tags.py index 77d7e083311..46b4b255b6f 100644 --- a/ckan/tests/controllers/test_tags.py +++ b/ckan/tests/controllers/test_tags.py @@ -124,7 +124,7 @@ def test_tag_read_redirects_to_dataset_search(self): tag_url = url_for(controller='tag', action='read', id='find-me') tag_response = app.get(tag_url, status=302) assert_equal(tag_response.headers['Location'], - 'http://localhost/dataset?tags=find-me') + 'http://test.ckan.net/dataset?tags=find-me') def test_tag_read_not_found(self): '''Attempting access to non-existing tag returns a 404''' diff --git a/ckan/tests/controllers/test_util.py b/ckan/tests/controllers/test_util.py index adb02e03657..db76196e677 100644 --- a/ckan/tests/controllers/test_util.py +++ b/ckan/tests/controllers/test_util.py @@ -18,7 +18,7 @@ def test_redirect_ok(self): status=302, ) assert_equal(response.headers.get('Location'), - 'http://localhost/dataset') + 'http://test.ckan.net/dataset') def test_redirect_external(self): app = self._get_test_app() From 901af2757f9599a61bf136830d792d556a9b2069 Mon Sep 17 00:00:00 2001 From: amercader Date: Mon, 8 Aug 2016 12:39:52 +0100 Subject: [PATCH 03/65] [#3194] Set HTTP_HOST to ckan.site_url on submit_and_follow So redirects use the correct host --- ckan/tests/helpers.py | 7 +++++++ ckanext/example_igroupform/tests/test_controllers.py | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/ckan/tests/helpers.py b/ckan/tests/helpers.py index bfda83a1a1e..0c3910b1a75 100644 --- a/ckan/tests/helpers.py +++ b/ckan/tests/helpers.py @@ -19,6 +19,7 @@ This module is reserved for these very useful functions. ''' +import urlparse import webtest import nose.tools from nose.tools import assert_in, assert_not_in @@ -213,6 +214,12 @@ def submit_and_follow(app, form, extra_environ=None, name=None, Call webtest_submit with name/value passed expecting a redirect and return the response from following that redirect. ''' + if extra_environ is None: + extra_environ = {} + if not extra_environ.get('HTTP_HOST'): + extra_environ['HTTP_HOST'] = str( + urlparse.urlparse(config['ckan.site_url']).netloc) + response = webtest_submit(form, name, value=value, status=302, extra_environ=extra_environ, **args) return app.get(url=response.headers['Location'], diff --git a/ckanext/example_igroupform/tests/test_controllers.py b/ckanext/example_igroupform/tests/test_controllers.py index be598a13aae..6f64973ff5a 100644 --- a/ckanext/example_igroupform/tests/test_controllers.py +++ b/ckanext/example_igroupform/tests/test_controllers.py @@ -136,7 +136,7 @@ def test_save(self): response = submit_and_follow(app, form, env, 'save') # check correct redirect assert_equal(response.req.url, - 'http://localhost/%s/saved' % custom_group_type) + 'http://test.ckan.net/%s/saved' % custom_group_type) # check saved ok group = model.Group.by_name(u'saved') assert_equal(group.title, u'') @@ -172,7 +172,7 @@ def test_save(self): response = submit_and_follow(app, form, env, 'save') # check correct redirect assert_equal(response.req.url, - 'http://localhost/%s/saved' % group_type) + 'http://test.ckan.net/%s/saved' % group_type) # check saved ok group = model.Group.by_name(u'saved') assert_equal(group.title, u'') From dae599a211f358855b3f5ecaed7fd30aea164824 Mon Sep 17 00:00:00 2001 From: amercader Date: Mon, 8 Aug 2016 12:40:48 +0100 Subject: [PATCH 04/65] Use own url_for rather than routes' As we handle the host correctly. This will be done to all tests when the new url_for work is merged --- ckan/tests/legacy/functional/test_user.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckan/tests/legacy/functional/test_user.py b/ckan/tests/legacy/functional/test_user.py index f61b8be35c7..d432e0789ac 100644 --- a/ckan/tests/legacy/functional/test_user.py +++ b/ckan/tests/legacy/functional/test_user.py @@ -1,6 +1,6 @@ # encoding: utf-8 -from routes import url_for +from ckan.lib.helpers import url_for from nose.tools import assert_equal from ckan.common import config import hashlib From b67555f0ca7d6e3546f03c5eb714208f496371fa Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Thu, 18 Aug 2016 12:26:52 -0400 Subject: [PATCH 05/65] [#3192] IPermissionLabels interface stub --- ckan/plugins/interfaces.py | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/ckan/plugins/interfaces.py b/ckan/plugins/interfaces.py index 2651c394a87..74344214db1 100644 --- a/ckan/plugins/interfaces.py +++ b/ckan/plugins/interfaces.py @@ -25,7 +25,8 @@ 'IFacets', 'IAuthenticator', 'ITranslation', - 'IUploader' + 'IUploader', + 'IPermissionLabels', ] from inspect import isclass @@ -1568,3 +1569,37 @@ def get_resource_uploader(self): :type id: string ''' + + +class IPermissionLabels(Interface): + ''' + Extensions implementing this interface can override the permission + labels applied to datasets to precisely control which datasets are + visible to each user. + ''' + + def get_dataset_labels(self, dataset): + ''' + Return a list of unicode strings to be stored in the search index + as the permission lables for a dataset dict. + + :param dataset: dataset fields + :type dataset: dict + + :returns: permission labels + :rtype: list of unicode strings + ''' + + def get_user_dataset_labels(self, user): + ''' + Return the permission labels that give a user permission to view + a dataset. If any of the labels returned from this method match + any of the labels returned from :py:meth:`.get_dataset_labels` + then this user is permitted to view that dataset. + + :param user: user fields + :type user: dict + + :returns: permission labels + :rtype: list of unicode strings + ''' From 8ff3f1ffdc30675af0c04ec0a2eb32409fd515b8 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Thu, 18 Aug 2016 17:29:53 -0400 Subject: [PATCH 06/65] [#3192] DefaultPermissionLabels --- ckan/lib/plugins.py | 29 +++++++++++++++++++++++++++++ ckan/plugins/interfaces.py | 4 ++++ 2 files changed, 33 insertions(+) diff --git a/ckan/lib/plugins.py b/ckan/lib/plugins.py index 0f0067bb901..f5cc625612c 100644 --- a/ckan/lib/plugins.py +++ b/ckan/lib/plugins.py @@ -574,3 +574,32 @@ def i18n_domain(self): ckanext-{extension name}, hence your pot, po and mo files should be named ckanext-{extension name}.mo''' return 'ckanext-{name}'.format(name=self.name) + + +class DefaultPermissionLabels(object): + u''' + Default permissions for package_search/package_show: + - everyone can read public datasets "public" + - users can read their own drafts "creator-(user id)" + - users can read datasets belonging to their orgs "member-(org id)" + ''' + def get_dataset_labels(self, dataset): + if dataset['state'] == 'active' and not dataset['private']: + return [u'public'] + + if dataset['private']: + return [u'member-%s' % dataset['owner_org']] + + return [u'creator-%s' % dataset['creator_user_id']] + + def get_user_dataset_labels(self, user): + labels = [u'public'] + if not user: + return labels + + labels.append(u'creator-%s' % user['id']) + + orgs = logic.get_action('organization_list_for_user')( + {'user': user['id']}, {'permission': 'read'}) + labels.extend(u'member-%s' % o['id'] for o in orgs) + return labels diff --git a/ckan/plugins/interfaces.py b/ckan/plugins/interfaces.py index 74344214db1..623d9699e3e 100644 --- a/ckan/plugins/interfaces.py +++ b/ckan/plugins/interfaces.py @@ -1576,6 +1576,10 @@ class IPermissionLabels(Interface): Extensions implementing this interface can override the permission labels applied to datasets to precisely control which datasets are visible to each user. + + Implementations might want to consider mixing in + ckan.lib.plugins.DefaultPermissionLabels which provides + default behaviours for these methods. ''' def get_dataset_labels(self, dataset): From e34f74fb8b5a783320a4ddf2634f462d98017e7f Mon Sep 17 00:00:00 2001 From: amercader Date: Fri, 19 Aug 2016 16:23:28 +0100 Subject: [PATCH 07/65] [#3194] Consolidate some further redirect calls --- ckan/controllers/package.py | 45 +++++++++++++++--------------------- ckanext/datapusher/plugin.py | 4 ++-- 2 files changed, 21 insertions(+), 28 deletions(-) diff --git a/ckan/controllers/package.py b/ckan/controllers/package.py index be198c9265e..89013d0cd17 100644 --- a/ckan/controllers/package.py +++ b/ckan/controllers/package.py @@ -30,7 +30,6 @@ render = base.render abort = base.abort -redirect = h.redirect_to NotFound = logic.NotFound NotAuthorized = logic.NotAuthorized @@ -573,8 +572,8 @@ def resource_edit(self, id, resource_id, data=None, errors=None, errors, error_summary) except NotAuthorized: abort(403, _('Unauthorized to edit this resource')) - redirect(h.url_for(controller='package', action='resource_read', - id=id, resource_id=resource_id)) + h.redirect_to(controller='package', action='resource_read', id=id, + resource_id=resource_id) context = {'model': model, 'session': model.Session, 'api_version': 3, 'for_edit': True, @@ -643,8 +642,7 @@ def new_resource(self, id, data=None, errors=None, error_summary=None): if not data_provided and save_action != "go-dataset-complete": if save_action == 'go-dataset': # go to final stage of adddataset - redirect(h.url_for(controller='package', - action='edit', id=id)) + h.redirect_to(controller='package', action='edit', id=id) # see if we have added any resources try: data_dict = get_action('package_show')(context, {'id': id}) @@ -659,8 +657,8 @@ def new_resource(self, id, data=None, errors=None, error_summary=None): # On new templates do not use flash message if g.legacy_templates: h.flash_error(msg) - redirect(h.url_for(controller='package', - action='new_resource', id=id)) + h.redirect_to(controller='package', + action='new_resource', id=id) else: errors = {} error_summary = {_('Error'): msg} @@ -671,8 +669,7 @@ def new_resource(self, id, data=None, errors=None, error_summary=None): get_action('package_update')( dict(context, allow_state_change=True), dict(data_dict, state='active')) - redirect(h.url_for(controller='package', - action='read', id=id)) + h.redirect_to(controller='package', action='read', id=id) data['package_id'] = id try: @@ -696,20 +693,17 @@ def new_resource(self, id, data=None, errors=None, error_summary=None): get_action('package_update')( dict(context, allow_state_change=True), dict(data_dict, state='active')) - redirect(h.url_for(controller='package', - action='read', id=id)) + h.redirect_to(controller='package', action='read', id=id) elif save_action == 'go-dataset': # go to first stage of add dataset - redirect(h.url_for(controller='package', - action='edit', id=id)) + h.redirect_to(controller='package', action='edit', id=id) elif save_action == 'go-dataset-complete': # go to first stage of add dataset - redirect(h.url_for(controller='package', - action='read', id=id)) + h.redirect_to(controller='package', action='read', id=id) else: # add more resources - redirect(h.url_for(controller='package', - action='new_resource', id=id)) + h.redirect_to(controller='package', action='new_resource', + id=id) # get resources for sidebar context = {'model': model, 'session': model.Session, @@ -904,7 +898,7 @@ def _save_new(self, context, package_type=None): url = h.url_for(controller='package', action='new_resource', id=pkg_dict['name']) - redirect(url) + h.redirect_to(url) # Make sure we don't index this dataset if request.params['save'] not in ['go-resource', 'go-metadata']: @@ -921,7 +915,7 @@ def _save_new(self, context, package_type=None): url = h.url_for(controller='package', action='new_resource', id=pkg_dict['name']) - redirect(url) + h.redirect_to(url) self._form_save_redirect(pkg_dict['name'], 'new', package_type=package_type) @@ -1008,7 +1002,7 @@ def _form_save_redirect(self, pkgname, action, package_type=None): id=pkgname) else: url = h.url_for('{0}_read'.format(package_type), id=pkgname) - redirect(url) + h.redirect_to(url) def delete(self, id): @@ -1163,7 +1157,7 @@ def resource_download(self, id, resource_id, filename=None): return app_iter elif not 'url' in rsc: abort(404, _('No download is available')) - redirect(rsc['url']) + h.redirect_to(rsc['url']) def follow(self, id): '''Start following this dataset.''' @@ -1262,8 +1256,7 @@ def groups(self, id): get_action('member_delete')(context, data_dict) except NotFound: abort(404, _('Group not found')) - redirect(h.url_for(controller='package', - action='groups', id=id)) + h.redirect_to(controller='package', action='groups', id=id) context['is_member'] = True users_groups = get_action('group_list_authz')(context, data_dict) @@ -1475,9 +1468,9 @@ def edit_view(self, id, resource_id, view_id=None): abort(403, _('Unauthorized to edit resource')) else: if not to_preview: - redirect(h.url_for(controller='package', - action='resource_views', - id=id, resource_id=resource_id)) + h.redirect_to(controller='package', + action='resource_views', + id=id, resource_id=resource_id) ## view_id exists only when updating if view_id: diff --git a/ckanext/datapusher/plugin.py b/ckanext/datapusher/plugin.py index ea9a5fc8bb4..a7db0f66aa7 100644 --- a/ckanext/datapusher/plugin.py +++ b/ckanext/datapusher/plugin.py @@ -41,11 +41,11 @@ def resource_data(self, id, resource_id): except logic.ValidationError: pass - base.redirect(core_helpers.url_for( + core_helpers.redirect_to( controller='ckanext.datapusher.plugin:ResourceDataController', action='resource_data', id=id, - resource_id=resource_id) + resource_id=resource_id ) try: From c76644047209aaa41163c278976e685f435405e7 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Sun, 21 Aug 2016 11:36:31 -0400 Subject: [PATCH 08/65] [#3192] implement package_show auth with permission labels --- ckan/lib/plugins.py | 25 ++++++++++++++++--------- ckan/logic/auth/get.py | 24 +++++++++--------------- ckan/plugins/interfaces.py | 12 ++++++------ 3 files changed, 31 insertions(+), 30 deletions(-) diff --git a/ckan/lib/plugins.py b/ckan/lib/plugins.py index f5cc625612c..0fbe63f53bb 100644 --- a/ckan/lib/plugins.py +++ b/ckan/lib/plugins.py @@ -243,6 +243,13 @@ def plugin_validate(plugin, context, data_dict, schema, action): return toolkit.navl_validate(data_dict, schema, context) +def get_permission_labels(): + '''Return the permission label plugin (or default implementation)''' + for plugin in plugins.PluginImplementations(plugins.IPermissionLabels): + return plugin + return DefaultPermissionLabels() + + class DefaultDatasetForm(object): '''The default implementation of :py:class:`~ckan.plugins.interfaces.IDatasetForm`. @@ -583,23 +590,23 @@ class DefaultPermissionLabels(object): - users can read their own drafts "creator-(user id)" - users can read datasets belonging to their orgs "member-(org id)" ''' - def get_dataset_labels(self, dataset): - if dataset['state'] == 'active' and not dataset['private']: + def get_dataset_labels(self, dataset_obj): + if dataset_obj.state == 'active' and not dataset_obj.private: return [u'public'] - if dataset['private']: - return [u'member-%s' % dataset['owner_org']] + if dataset_obj.owner_org: + return [u'member-%s' % dataset_obj.owner_org] - return [u'creator-%s' % dataset['creator_user_id']] + return [u'creator-%s' % dataset_obj.creator_user_id] - def get_user_dataset_labels(self, user): + def get_user_dataset_labels(self, user_obj): labels = [u'public'] - if not user: + if not user_obj: return labels - labels.append(u'creator-%s' % user['id']) + labels.append(u'creator-%s' % user_obj.id) orgs = logic.get_action('organization_list_for_user')( - {'user': user['id']}, {'permission': 'read'}) + {'user': user_obj.id}, {'permission': 'read'}) labels.extend(u'member-%s' % o['id'] for o in orgs) return labels diff --git a/ckan/logic/auth/get.py b/ckan/logic/auth/get.py index 011b19c2e84..c1abdb9e15c 100644 --- a/ckan/logic/auth/get.py +++ b/ckan/logic/auth/get.py @@ -5,6 +5,7 @@ from ckan.lib.base import _ from ckan.logic.auth import (get_package_object, get_group_object, get_resource_object) +from ckan.lib.plugins import get_permission_labels def sysadmin(context, data_dict): @@ -112,22 +113,15 @@ def package_relationships_list(context, data_dict): def package_show(context, data_dict): user = context.get('user') package = get_package_object(context, data_dict) - # draft state indicates package is still in the creation process - # so we need to check we have creation rights. - if package.state.startswith('draft'): - auth = authz.is_authorized('package_update', - context, data_dict) - authorized = auth.get('success') - elif package.owner_org is None and package.state == 'active': - return {'success': True} - else: - # anyone can see a public package - if not package.private and package.state == 'active': - return {'success': True} - authorized = authz.has_user_permission_for_group_or_org( - package.owner_org, user, 'read') + labels = get_permission_labels() + user_labels = labels.get_user_dataset_labels(context['auth_user_obj']) + authorized = any( + dl in user_labels for dl in labels.get_dataset_labels(package)) + if not authorized: - return {'success': False, 'msg': _('User %s not authorized to read package %s') % (user, package.id)} + return { + 'success': False, + 'msg': _('User %s not authorized to read package %s') % (user, package.id)} else: return {'success': True} diff --git a/ckan/plugins/interfaces.py b/ckan/plugins/interfaces.py index 623d9699e3e..71d9aed8570 100644 --- a/ckan/plugins/interfaces.py +++ b/ckan/plugins/interfaces.py @@ -1582,27 +1582,27 @@ class IPermissionLabels(Interface): default behaviours for these methods. ''' - def get_dataset_labels(self, dataset): + def get_dataset_labels(self, dataset_obj): ''' Return a list of unicode strings to be stored in the search index as the permission lables for a dataset dict. - :param dataset: dataset fields - :type dataset: dict + :param dataset_obj: dataset details + :type dataset_obj: Package model object :returns: permission labels :rtype: list of unicode strings ''' - def get_user_dataset_labels(self, user): + def get_user_dataset_labels(self, user_obj): ''' Return the permission labels that give a user permission to view a dataset. If any of the labels returned from this method match any of the labels returned from :py:meth:`.get_dataset_labels` then this user is permitted to view that dataset. - :param user: user fields - :type user: dict + :param user_obj: user details + :type user_obj: User model object or None :returns: permission labels :rtype: list of unicode strings From 59661989d67281e123a7eeaf5d566f6fe19e92fa Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Sun, 21 Aug 2016 12:01:40 -0400 Subject: [PATCH 09/65] [#3192] remove old solr schemas --- ckan/config/solr/schema-1.2.xml | 170 ---------------------------- ckan/config/solr/schema-1.3.xml | 179 ------------------------------ ckan/config/solr/schema-1.4.xml | 190 -------------------------------- ckan/config/solr/schema-2.0.xml | 173 ----------------------------- 4 files changed, 712 deletions(-) delete mode 100644 ckan/config/solr/schema-1.2.xml delete mode 100644 ckan/config/solr/schema-1.3.xml delete mode 100644 ckan/config/solr/schema-1.4.xml delete mode 100644 ckan/config/solr/schema-2.0.xml diff --git a/ckan/config/solr/schema-1.2.xml b/ckan/config/solr/schema-1.2.xml deleted file mode 100644 index 4f9b11a580b..00000000000 --- a/ckan/config/solr/schema-1.2.xml +++ /dev/null @@ -1,170 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -id -text - - - - - - - - - - - - - - - - - - - - diff --git a/ckan/config/solr/schema-1.3.xml b/ckan/config/solr/schema-1.3.xml deleted file mode 100644 index 89fea815356..00000000000 --- a/ckan/config/solr/schema-1.3.xml +++ /dev/null @@ -1,179 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -index_id -text - - - - - - - - - - - - - - - - - - - - diff --git a/ckan/config/solr/schema-1.4.xml b/ckan/config/solr/schema-1.4.xml deleted file mode 100644 index 98cbf378dbe..00000000000 --- a/ckan/config/solr/schema-1.4.xml +++ /dev/null @@ -1,190 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -index_id -text - - - - - - - - - - - - - - - - - - - - - diff --git a/ckan/config/solr/schema-2.0.xml b/ckan/config/solr/schema-2.0.xml deleted file mode 100644 index b306bb5d8c7..00000000000 --- a/ckan/config/solr/schema-2.0.xml +++ /dev/null @@ -1,173 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -index_id -text - - - - - - - - - - - - - - - - - - - - - From 35893486d0791335ef437732e1c47b18ab43a85f Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Sun, 21 Aug 2016 12:05:22 -0400 Subject: [PATCH 10/65] [#3192] add permission_labels to solr schema --- ckan/config/solr/schema.xml | 3 ++- ckan/lib/search/__init__.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ckan/config/solr/schema.xml b/ckan/config/solr/schema.xml index e8893f70ff9..4578c164b10 100644 --- a/ckan/config/solr/schema.xml +++ b/ckan/config/solr/schema.xml @@ -24,7 +24,7 @@ - + @@ -112,6 +112,7 @@ schema. In this case the version should be set to the next CKAN version number. + diff --git a/ckan/lib/search/__init__.py b/ckan/lib/search/__init__.py index 65f064093a2..ec30d8187c1 100644 --- a/ckan/lib/search/__init__.py +++ b/ckan/lib/search/__init__.py @@ -34,7 +34,7 @@ def text_traceback(): SIMPLE_SEARCH = asbool(config.get('ckan.simple_search', False)) -SUPPORTED_SCHEMA_VERSIONS = ['2.3'] +SUPPORTED_SCHEMA_VERSIONS = ['2.7'] DEFAULT_OPTIONS = { 'limit': 20, From 43cbe8fbff2374c06b98d9099774ecaea4ce095d Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Sun, 21 Aug 2016 14:29:05 -0400 Subject: [PATCH 11/65] [#3192] store and use permission_labels in package_search --- ckan/lib/search/index.py | 6 ++++++ ckan/lib/search/query.py | 42 +++++++++++++++++++++++++++++---------- ckan/logic/action/get.py | 43 ++++++++++------------------------------ 3 files changed, 48 insertions(+), 43 deletions(-) diff --git a/ckan/lib/search/index.py b/ckan/lib/search/index.py index 878a5fa3a8c..8eaea3847a6 100644 --- a/ckan/lib/search/index.py +++ b/ckan/lib/search/index.py @@ -279,6 +279,12 @@ def index_package(self, pkg_dict, defer_commit=False): assert pkg_dict, 'Plugin must return non empty package dict on index' + # permission labels determine visibility in search, can't be set + # in original dataset or before_index plugins + labels = lib_plugins.get_permission_labels() + pkg_dict['permission_labels'] = labels.get_dataset_labels( + model.Package.get(pkg_dict['id'])) + # send to solr: try: conn = make_connection() diff --git a/ckan/lib/search/query.py b/ckan/lib/search/query.py index c136b294ca4..6ec64588cab 100644 --- a/ckan/lib/search/query.py +++ b/ckan/lib/search/query.py @@ -280,12 +280,17 @@ def get_index(self,reference): return solr_response.docs[0] - def run(self, query): + def run(self, query, permission_labels=None, **kwargs): ''' Performs a dataset search using the given query. - @param query - dictionary with keys like: q, fq, sort, rows, facet - @return - dictionary with keys results and count + :param query: dictionary with keys like: q, fq, sort, rows, facet + :type query: dict + :param permission_labels: filter results to those that include at + least one of these labels. None to not filter (return everything) + :type permission_labels: list of unicode strings; or None + + :returns: dictionary with keys results and count May raise SearchQueryError or SearchError. ''' @@ -310,18 +315,23 @@ def run(self, query): rows_to_query = rows_to_return query['rows'] = rows_to_query + fq = [] + if 'fq' in query: + fq.append(query['fq']) + fq.extend(query.get('fq_list', [])) + # show only results from this CKAN instance - fq = query.get('fq', '') - if not '+site_id:' in fq: - fq += ' +site_id:"%s"' % config.get('ckan.site_id') + fq.append('+site_id:%s' % solr_literal(config.get('ckan.site_id'))) # filter for package status - if not '+state:' in fq: - fq += " +state:active" - query['fq'] = [fq] + if not '+state:' in query.get('fq', ''): + fq.append('+state:active') - fq_list = query.get('fq_list', []) - query['fq'].extend(fq_list) + # only return things we should be able to see + if permission_labels is not None: + fq.append('+permission_labels:(%s)' % ' OR '.join( + solr_literal(p) for p in permission_labels)) + query['fq'] = fq # faceting query['facet'] = query.get('facet', 'true') @@ -387,3 +397,13 @@ def run(self, query): self.facets[field] = dict(zip(values[0::2], values[1::2])) return {'results': self.results, 'count': self.count} + + +def solr_literal(t): + ''' + return a safe literal string for a solr query. Instead of escaping + each of + - && || ! ( ) { } [ ] ^ " ~ * ? : \ / we're just dropping + double quotes -- this method currently only used by tokens like site_id + and permission labels. + ''' + return u'"' + t.replace(u'"', u'') + u'"' diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 8d375e0f593..339d1d0e75c 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -1834,45 +1834,24 @@ def package_search(context, data_dict): # Remove before these hit solr FIXME: whitelist instead include_private = asbool(data_dict.pop('include_private', False)) include_drafts = asbool(data_dict.pop('include_drafts', False)) - - capacity_fq = 'capacity:"public"' - if include_private and authz.is_sysadmin(user): - capacity_fq = None - elif include_private and user: - orgs = logic.get_action('organization_list_for_user')( - {'user': user}, {'permission': 'read'}) - if orgs: - capacity_fq = '({0} OR owner_org:({1}))'.format( - capacity_fq, - ' OR '.join(org['id'] for org in orgs)) - if include_drafts: - capacity_fq = '({0} OR creator_user_id:({1}))'.format( - capacity_fq, - authz.get_user_id_for_username(user)) - - if capacity_fq: + if not include_private: fq = ' '.join(p for p in fq.split() if 'capacity:' not in p) - data_dict['fq'] = fq + ' ' + capacity_fq - - fq = data_dict.get('fq', '') + data_dict['fq'] = fq + ' capacity:"public"' if include_drafts: - user_id = authz.get_user_id_for_username(user, allow_none=True) - if authz.is_sysadmin(user): - data_dict['fq'] = fq + ' +state:(active OR draft)' - elif user_id: - # Query to return all active datasets, and all draft datasets - # for this user. - data_dict['fq'] = fq + \ - ' ((creator_user_id:{0} AND +state:(draft OR active))' \ - ' OR state:active)'.format(user_id) - elif not authz.is_sysadmin(user): - data_dict['fq'] = fq + ' +state:active' + data_dict['fq'] += ' +state:(active OR draft)' # Pop these ones as Solr does not need them extras = data_dict.pop('extras', None) + # enforce permission filter based on user + if authz.is_sysadmin(user): + labels = None + else: + labels = lib_plugins.get_permission_labels( + ).get_user_dataset_labels(context['auth_user_obj']) + query = search.query_for(model.Package) - query.run(data_dict) + query.run(data_dict, permission_labels=labels) # Add them back so extensions can use them on after_search data_dict['extras'] = extras From 8d4559792c5669d27ef48bad803ab5cc35a95699 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Sun, 21 Aug 2016 15:46:32 -0400 Subject: [PATCH 12/65] [#3192] need to check context ignore_auth --- ckan/logic/action/get.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 339d1d0e75c..a3deb4451c5 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -1827,16 +1827,12 @@ def package_search(context, data_dict): # return a list of package ids data_dict['fl'] = 'id {0}'.format(data_source) - # we should remove any mention of capacity from the fq and - # instead set it to only retrieve public datasets - fq = data_dict.get('fq', '') - # Remove before these hit solr FIXME: whitelist instead include_private = asbool(data_dict.pop('include_private', False)) include_drafts = asbool(data_dict.pop('include_drafts', False)) + data_dict.setdefault('fq', '') if not include_private: - fq = ' '.join(p for p in fq.split() if 'capacity:' not in p) - data_dict['fq'] = fq + ' capacity:"public"' + data_dict['fq'] += ' +capacity:public' if include_drafts: data_dict['fq'] += ' +state:(active OR draft)' @@ -1844,7 +1840,7 @@ def package_search(context, data_dict): extras = data_dict.pop('extras', None) # enforce permission filter based on user - if authz.is_sysadmin(user): + if context.get('ignore_auth') or (user and authz.is_sysadmin(user)): labels = None else: labels = lib_plugins.get_permission_labels( From 14360ed0283f37b2b58693102782a8bcb1589106 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Sun, 21 Aug 2016 16:24:54 -0400 Subject: [PATCH 13/65] [#3192] TestPackageSearchIndex-workaround --- ckan/lib/search/index.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ckan/lib/search/index.py b/ckan/lib/search/index.py index 8eaea3847a6..937dbefeeff 100644 --- a/ckan/lib/search/index.py +++ b/ckan/lib/search/index.py @@ -282,8 +282,9 @@ def index_package(self, pkg_dict, defer_commit=False): # permission labels determine visibility in search, can't be set # in original dataset or before_index plugins labels = lib_plugins.get_permission_labels() + dataset = model.Package.get(pkg_dict['id']) pkg_dict['permission_labels'] = labels.get_dataset_labels( - model.Package.get(pkg_dict['id'])) + dataset) if dataset else [] # TestPackageSearchIndex-workaround # send to solr: try: From 666f3a1df31edc6324c7710f1f9822dcee834b48 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Sun, 21 Aug 2016 17:34:41 -0400 Subject: [PATCH 14/65] [#3192] package_search with ignore_auth now==calling as sysadmin, update tests --- ckan/tests/logic/action/test_get.py | 62 ++++++++++++++--------------- 1 file changed, 30 insertions(+), 32 deletions(-) diff --git a/ckan/tests/logic/action/test_get.py b/ckan/tests/logic/action/test_get.py index 4aa1051edee..b1e285d28de 100644 --- a/ckan/tests/logic/action/test_get.py +++ b/ckan/tests/logic/action/test_get.py @@ -961,10 +961,9 @@ def test_package_search_with_fq_excludes_private(self): factories.Dataset(user=user, private=True, owner_org=org['name']) fq = "capacity:private" - results = helpers.call_action('package_search', fq=fq)['results'] + results = logic.get_action('package_search')({}, {'fq':fq})['results'] - eq(len(results), 1) - eq(results[0]['name'], dataset['name']) + eq(len(results), 0) def test_package_search_with_fq_excludes_drafts(self): ''' @@ -997,7 +996,8 @@ def test_package_search_with_include_drafts_option_excludes_drafts_for_anon_user draft_dataset = factories.Dataset(user=user, state='draft') factories.Dataset(user=user, private=True, owner_org=org['name']) - results = helpers.call_action('package_search', include_drafts=True)['results'] + results = logic.get_action('package_search')( + {}, {'include_drafts':True})['results'] eq(len(results), 1) nose.tools.assert_not_equals(results[0]['name'], draft_dataset['name']) @@ -1018,8 +1018,8 @@ def test_package_search_with_include_drafts_option_includes_drafts_for_sysadmin( other_draft_dataset = factories.Dataset(user=other_user, state='draft') factories.Dataset(user=user, private=True, owner_org=org['name']) - results = helpers.call_action('package_search', include_drafts=True, - context={'user': sysadmin['name']})['results'] + results = logic.get_action('package_search')( + {'user': sysadmin['name']}, {'include_drafts': True})['results'] eq(len(results), 3) names = [r['name'] for r in results] @@ -1042,8 +1042,8 @@ def test_package_search_with_include_drafts_false_option_doesnot_include_drafts_ other_draft_dataset = factories.Dataset(user=other_user, state='draft') factories.Dataset(user=user, private=True, owner_org=org['name']) - results = helpers.call_action('package_search', include_drafts=False, - context={'user': sysadmin['name']})['results'] + results = logic.get_action('package_search')( + {'user': sysadmin['name']}, {'include_drafts':False})['results'] eq(len(results), 1) names = [r['name'] for r in results] @@ -1066,8 +1066,8 @@ def test_package_search_with_include_drafts_option_includes_drafts_for_user(self other_draft_dataset = factories.Dataset(user=other_user, state='draft', name="other-draft-dataset") factories.Dataset(user=user, private=True, owner_org=org['name'], name="private-dataset") - results = helpers.call_action('package_search', include_drafts=True, - context={'user': user['name']})['results'] + results = logic.get_action('package_search')( + {'user': user['name']}, {'include_drafts':True})['results'] eq(len(results), 3) names = [r['name'] for r in results] @@ -1092,8 +1092,8 @@ def test_package_search_with_fq_for_create_user_id_will_include_datasets_for_oth factories.Dataset(user=user, private=True, owner_org=org['name'], name="private-dataset") fq = "creator_user_id:{0}".format(other_user['id']) - results = helpers.call_action('package_search', fq=fq, - context={'user': user['name']})['results'] + results = logic.get_action('package_search')( + {'user': user['name']}, {'fq': fq})['results'] eq(len(results), 1) names = [r['name'] for r in results] @@ -1118,8 +1118,9 @@ def test_package_search_with_fq_for_create_user_id_will_not_include_drafts_for_o factories.Dataset(user=user, private=True, owner_org=org['name'], name="private-dataset") fq = "(creator_user_id:{0} AND +state:draft)".format(other_user['id']) - results = helpers.call_action('package_search', fq=fq, - context={'user': user['name']})['results'] + results = logic.get_action('package_search')( + {'user': user['name']}, + {'fq': fq, 'include_drafts': True})['results'] eq(len(results), 0) @@ -1139,8 +1140,9 @@ def test_package_search_with_fq_for_creator_user_id_and_drafts_and_include_draft factories.Dataset(user=user, private=True, owner_org=org['name'], name="private-dataset") fq = "(creator_user_id:{0} AND +state:draft)".format(other_user['id']) - results = helpers.call_action('package_search', fq=fq, include_drafts=True, - context={'user': user['name']})['results'] + results = logic.get_action('package_search')( + {'user': user['name']}, + {'fq': fq, 'include_drafts': True})['results'] eq(len(results), 0) @@ -1160,8 +1162,9 @@ def test_package_search_with_fq_for_creator_user_id_and_include_drafts_option_wi factories.Dataset(user=user, private=True, owner_org=org['name'], name="private-dataset") fq = "creator_user_id:{0}".format(other_user['id']) - results = helpers.call_action('package_search', fq=fq, include_drafts=True, - context={'user': user['name']})['results'] + results = logic.get_action('package_search')( + {'user': user['name']}, + {'fq': fq, 'include_drafts': True})['results'] names = [r['name'] for r in results] eq(len(results), 1) @@ -1184,8 +1187,9 @@ def test_package_search_with_fq_for_create_user_id_will_include_drafts_for_other factories.Dataset(user=user, private=True, owner_org=org['name'], name="private-dataset") fq = "(creator_user_id:{0} AND +state:draft)".format(user['id']) - results = helpers.call_action('package_search', fq=fq, - context={'user': sysadmin['name']})['results'] + results = logic.get_action('package_search')( + {'user': sysadmin['name']}, + {'fq': fq})['results'] names = [r['name'] for r in results] eq(len(results), 1) @@ -1203,10 +1207,8 @@ def test_package_search_private_with_include_private(self): factories.Dataset(user=user, state='draft') private_dataset = factories.Dataset(user=user, private=True, owner_org=org['name']) - results = helpers.call_action( - 'package_search', - include_private=True, - context={'user': user['name']})['results'] + results = logic.get_action('package_search')( + {'user': user['name']}, {'include_private': True})['results'] eq([r['name'] for r in results], [private_dataset['name']]) @@ -1217,10 +1219,8 @@ def test_package_search_private_with_include_private_wont_show_other_orgs_privat org2 = factories.Organization(user=user2) private_dataset = factories.Dataset(user=user2, private=True, owner_org=org2['name']) - results = helpers.call_action( - 'package_search', - include_private=True, - context={'user': user['name']})['results'] + results = logic.get_action('package_search')( + {'user': user['name']}, {'include_private': True})['results'] eq([r['name'] for r in results], []) @@ -1230,10 +1230,8 @@ def test_package_search_private_with_include_private_syadmin(self): org = factories.Organization(user=user) private_dataset = factories.Dataset(user=user, private=True, owner_org=org['name']) - results = helpers.call_action( - 'package_search', - include_private=True, - context={'user': sysadmin['name']})['results'] + results = logic.get_action('package_search')( + {'user': sysadmin['name']}, {'include_private': True})['results'] eq([r['name'] for r in results], [private_dataset['name']]) From 659b2b77041456226f4f6c49a4594a301e8aea5f Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Sun, 21 Aug 2016 17:39:17 -0400 Subject: [PATCH 15/65] [#3192] pep8 --- ckan/tests/logic/action/test_get.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ckan/tests/logic/action/test_get.py b/ckan/tests/logic/action/test_get.py index b1e285d28de..51ea9ae41f4 100644 --- a/ckan/tests/logic/action/test_get.py +++ b/ckan/tests/logic/action/test_get.py @@ -961,7 +961,8 @@ def test_package_search_with_fq_excludes_private(self): factories.Dataset(user=user, private=True, owner_org=org['name']) fq = "capacity:private" - results = logic.get_action('package_search')({}, {'fq':fq})['results'] + results = logic.get_action('package_search')( + {}, {'fq': fq})['results'] eq(len(results), 0) @@ -997,7 +998,7 @@ def test_package_search_with_include_drafts_option_excludes_drafts_for_anon_user factories.Dataset(user=user, private=True, owner_org=org['name']) results = logic.get_action('package_search')( - {}, {'include_drafts':True})['results'] + {}, {'include_drafts': True})['results'] eq(len(results), 1) nose.tools.assert_not_equals(results[0]['name'], draft_dataset['name']) @@ -1043,7 +1044,7 @@ def test_package_search_with_include_drafts_false_option_doesnot_include_drafts_ factories.Dataset(user=user, private=True, owner_org=org['name']) results = logic.get_action('package_search')( - {'user': sysadmin['name']}, {'include_drafts':False})['results'] + {'user': sysadmin['name']}, {'include_drafts': False})['results'] eq(len(results), 1) names = [r['name'] for r in results] @@ -1067,7 +1068,7 @@ def test_package_search_with_include_drafts_option_includes_drafts_for_user(self factories.Dataset(user=user, private=True, owner_org=org['name'], name="private-dataset") results = logic.get_action('package_search')( - {'user': user['name']}, {'include_drafts':True})['results'] + {'user': user['name']}, {'include_drafts': True})['results'] eq(len(results), 3) names = [r['name'] for r in results] From d57d8be673f74eb53a8db8c193c57007eee4d0a6 Mon Sep 17 00:00:00 2001 From: amercader Date: Mon, 22 Aug 2016 13:46:03 +0100 Subject: [PATCH 16/65] [#3194] Add is_flask_request function (from #3197) --- ckan/common.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/ckan/common.py b/ckan/common.py index cfb7409004c..ac55e52a6e5 100644 --- a/ckan/common.py +++ b/ckan/common.py @@ -25,6 +25,22 @@ from sqlalchemy.util import OrderedDict +def is_flask_request(): + u''' + A centralized way to determine whether we are in the context of a + request being served by Flask or Pylons + ''' + try: + pylons.request.environ + pylons_request_available = True + except TypeError: + pylons_request_available = False + + return (flask.request and + (flask.request.environ.get(u'ckan.app') == u'flask_app' or + not pylons_request_available)) + + class CKANConfig(MutableMapping): u'''Main CKAN configuration object From 0884cdcaaf900d465723296cab666ba0f2bd30d2 Mon Sep 17 00:00:00 2001 From: amercader Date: Mon, 22 Aug 2016 13:46:33 +0100 Subject: [PATCH 17/65] [#3194] Support both Flask and Pylons redirects --- ckan/lib/helpers.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/ckan/lib/helpers.py b/ckan/lib/helpers.py index ccccf43ff8c..dabe561101b 100644 --- a/ckan/lib/helpers.py +++ b/ckan/lib/helpers.py @@ -26,8 +26,9 @@ from markdown import markdown from bleach import clean as clean_html from pylons import url as _pylons_default_url -from ckan.common import config -from routes import redirect_to as _redirect_to +from ckan.common import config, is_flask_request +from flask import redirect as _flask_redirect +from routes import redirect_to as _routes_redirect_to from routes import url_for as _routes_default_url_for import i18n @@ -148,7 +149,10 @@ def redirect_to(*args, **kw): if _url.startswith('/'): _url = str(config['ckan.site_url'].rstrip('/') + _url) - return _redirect_to(_url) + if is_flask_request(): + return _flask_redirect(_url) + else: + return _routes_redirect_to(_url) @maintain.deprecated('h.url is deprecated please use h.url_for') From 0a8fd9987013964e05c1e0b56bcf16619bb97c28 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Sat, 27 Aug 2016 15:53:32 -0400 Subject: [PATCH 18/65] [#3192] unicode literal pedantry --- ckan/lib/plugins.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ckan/lib/plugins.py b/ckan/lib/plugins.py index 0fbe63f53bb..dfb4df0c39f 100644 --- a/ckan/lib/plugins.py +++ b/ckan/lib/plugins.py @@ -591,7 +591,7 @@ class DefaultPermissionLabels(object): - users can read datasets belonging to their orgs "member-(org id)" ''' def get_dataset_labels(self, dataset_obj): - if dataset_obj.state == 'active' and not dataset_obj.private: + if dataset_obj.state == u'active' and not dataset_obj.private: return [u'public'] if dataset_obj.owner_org: @@ -606,7 +606,7 @@ def get_user_dataset_labels(self, user_obj): labels.append(u'creator-%s' % user_obj.id) - orgs = logic.get_action('organization_list_for_user')( - {'user': user_obj.id}, {'permission': 'read'}) - labels.extend(u'member-%s' % o['id'] for o in orgs) + orgs = logic.get_action(u'organization_list_for_user')( + {u'user': user_obj.id}, {u'permission': u'read'}) + labels.extend(u'member-%s' % o[u'id'] for o in orgs) return labels From 5f58296b0da54532d1dedb2f50b92442ae4f772c Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Sat, 27 Aug 2016 15:54:30 -0400 Subject: [PATCH 19/65] [#3192] example_ipermissionlabels extension --- ckanext/example_ipermissionlabels/__init__.py | 0 ckanext/example_ipermissionlabels/plugin.py | 38 +++++++++++++++++++ setup.py | 1 + 3 files changed, 39 insertions(+) create mode 100644 ckanext/example_ipermissionlabels/__init__.py create mode 100644 ckanext/example_ipermissionlabels/plugin.py diff --git a/ckanext/example_ipermissionlabels/__init__.py b/ckanext/example_ipermissionlabels/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/ckanext/example_ipermissionlabels/plugin.py b/ckanext/example_ipermissionlabels/plugin.py new file mode 100644 index 00000000000..4315da4de8e --- /dev/null +++ b/ckanext/example_ipermissionlabels/plugin.py @@ -0,0 +1,38 @@ +# encoding: utf-8 + +from ckan import plugins +from ckan.lib.plugins import DefaultPermissionLabels + +class ExampleIPermissionLabelsPlugin( + plugins.SingletonPlugin, DefaultPermissionLabels): + u''' + Example permission labels plugin that makes datasets whose + notes field starts with "Proposed:" visible only to their + creator and Admin users in the organization assigned to the + dataset. + ''' + plugins.implements(plugins.IPermissionLabels) + + def get_dataset_labels(self, dataset_obj): + u''' + Use creator-*, admin-* labels for proposed datasets + ''' + if dataset_obj.notes.startswith(u'Proposed:'): + labels = [u'creator-%s' % dataset_obj.creator_user_id] + if dataset_obj.owner_org: + return labels + [u'admin-%s' % dataset_obj.owner_org] + return labels + + return super(ExampleIPermissionLabelsPlugin, self).get_dataset_labels( + dataset_obj) + + def get_user_dataset_labels(self, user_obj): + u''' + Include admin-* labels for users in addition to default labels + creator-*, member-* and public + ''' + labels = super(ExampleIPermissionLabelsPlugin, self + ).get_user_dataset_labels(user_obj) + orgs = logic.get_action(u'organization_list_for_user')( + {u'user': user_obj.id}, {u'permission': u'admin'}) + return labels + [u'admin-%s' % o['id'] for o in orgs] diff --git a/setup.py b/setup.py index a07f9aff729..daf6d8bd86f 100644 --- a/setup.py +++ b/setup.py @@ -142,6 +142,7 @@ 'example_iconfigurer_v1 = ckanext.example_iconfigurer.plugin_v1:ExampleIConfigurerPlugin', 'example_iconfigurer_v2 = ckanext.example_iconfigurer.plugin_v2:ExampleIConfigurerPlugin', 'example_iuploader = ckanext.example_iuploader.plugin:ExampleIUploader', + 'example_ipermissionlabels = ckanext.example_ipermissionlabels:ExampleIPermissionLabels', ], 'ckan.system_plugins': [ 'domain_object_mods = ckan.model.modification:DomainObjectModificationExtension', From 64e8d08717ab5b500f989a6ced2fc7dbf42f7be1 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Sat, 27 Aug 2016 15:56:13 -0400 Subject: [PATCH 20/65] [#3192] example_ipermissionlabels tests --- .../tests/__init__.py | 0 .../tests/test_example_ipermissionlabels.py | 89 +++++++++++++++++++ 2 files changed, 89 insertions(+) create mode 100644 ckanext/example_ipermissionlabels/tests/__init__.py create mode 100644 ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py diff --git a/ckanext/example_ipermissionlabels/tests/__init__.py b/ckanext/example_ipermissionlabels/tests/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py b/ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py new file mode 100644 index 00000000000..894c322fc56 --- /dev/null +++ b/ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py @@ -0,0 +1,89 @@ +# encoding: utf-8 + +''' +Tests for the ckanext.example_ipermissionlabels extension +''' +from nose.tools import assert_raises, assert_equal + +import ckan.plugins +from ckan.plugins.toolkit import get_action, NotAuthorized +from ckan.tests.helpers import FunctionalTestBase, call_action, call_auth +from ckan.tests import factories + +class TestExampleIPermissionLabels(FunctionalTestBase): + @classmethod + def setup_class(cls): + # Test code should use CKAN's plugins.load() function to load plugins + # to be tested. + ckan.plugins.load('example_ipermissionlabels') + + @classmethod + def teardown_class(cls): + ckan.plugins.unload('example_ipermissionlabels') + + def test_normal_dataset_permissions_are_normal(self): + user = factories.User() + user2 = factories.User() + user3 = factories.User() + org = factories.Organization(user=user) + org2 = factories.Organization(user=user2) + call_action( + 'organization_member_create', None, username=user3['id'], + id=org2['id'], role='member') + + dataset = factories.Dataset( + name='d1', user=user, private=True, owner_org=org['id']) + dataset2 = factories.Dataset( + name='d2', user=user2, private=True, owner_org=org2['id']) + + results = get_action('package_search')( + {'user': user['id']}, {})['results'] + names = [r['name'] for r in results] + assert_equal(names, ['d1']) + + results = get_action('package_search')( + {'user': user3['id']}, {})['results'] + names = [r['name'] for r in results] + assert_equal(names, ['d2']) + + def test_proposed_overrides_public(self): + user = factories.User() + dataset = factories.Dataset( + name='d1', notes='Proposed:', user=user) + + results = get_action('package_search')({}, {})['results'] + names = [r['name'] for r in results] + assert_equal(names, []) + + assert_raises(NotAuthorized, call_auth, + 'package_show', {'user':'', 'model': model}, id='d1') + + def test_proposed_dataset_visible_to_creator(self): + user = factories.User() + dataset = factories.Dataset( + name='d1', notes='Proposed:', user=user) + + results = get_action('package_search')( + {'user': user['id']}, {})['results'] + names = [r['name'] for r in results] + assert_equal(names, ['d1']) + + ret = call_auth('package_show', + {'user': user['id'], 'model': model}, id='d1') + assert ret['success'], ret + + def test_proposed_dataset_visible_to_org_admin(self): + user = factories.User() + user2 = factories.User() + org = factories.Organization(user=user2) + dataset = factories.Dataset( + name='d1', notes='Proposed:', user=user, owner_org=org['id']) + + results = get_action('package_search')( + {'user': user2['id']}, {})['results'] + names = [r['name'] for r in results] + assert_equal(names, ['d1']) + + ret = call_auth('package_show', + {'user': user2['id'], 'model': model}, id='d1') + assert ret['success'], ret From c182b234d47d073a8a4448d230d8efb76e5059ac Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Sat, 27 Aug 2016 16:54:50 -0400 Subject: [PATCH 21/65] [#3192] passing tests --- ckanext/example_ipermissionlabels/plugin.py | 9 ++++--- .../tests/test_example_ipermissionlabels.py | 25 +++++++++++-------- setup.py | 2 +- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/ckanext/example_ipermissionlabels/plugin.py b/ckanext/example_ipermissionlabels/plugin.py index 4315da4de8e..0ca1a19a421 100644 --- a/ckanext/example_ipermissionlabels/plugin.py +++ b/ckanext/example_ipermissionlabels/plugin.py @@ -2,6 +2,7 @@ from ckan import plugins from ckan.lib.plugins import DefaultPermissionLabels +from ckan.plugins.toolkit import get_action class ExampleIPermissionLabelsPlugin( plugins.SingletonPlugin, DefaultPermissionLabels): @@ -33,6 +34,8 @@ def get_user_dataset_labels(self, user_obj): ''' labels = super(ExampleIPermissionLabelsPlugin, self ).get_user_dataset_labels(user_obj) - orgs = logic.get_action(u'organization_list_for_user')( - {u'user': user_obj.id}, {u'permission': u'admin'}) - return labels + [u'admin-%s' % o['id'] for o in orgs] + if user_obj: + orgs = get_action(u'organization_list_for_user')( + {u'user': user_obj.id}, {u'permission': u'admin'}) + labels.extend(u'admin-%s' % o['id'] for o in orgs) + return labels diff --git a/ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py b/ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py index 894c322fc56..df0ec3fc6ea 100644 --- a/ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py +++ b/ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py @@ -9,6 +9,7 @@ from ckan.plugins.toolkit import get_action, NotAuthorized from ckan.tests.helpers import FunctionalTestBase, call_action, call_auth from ckan.tests import factories +from ckan import model class TestExampleIPermissionLabels(FunctionalTestBase): @classmethod @@ -28,7 +29,7 @@ def test_normal_dataset_permissions_are_normal(self): org = factories.Organization(user=user) org2 = factories.Organization(user=user2) call_action( - 'organization_member_create', None, username=user3['id'], + 'organization_member_create', None, username=user3['name'], id=org2['id'], role='member') dataset = factories.Dataset( @@ -37,12 +38,12 @@ def test_normal_dataset_permissions_are_normal(self): name='d2', user=user2, private=True, owner_org=org2['id']) results = get_action('package_search')( - {'user': user['id']}, {})['results'] + {'user': user['name']}, {'include_private': True})['results'] names = [r['name'] for r in results] assert_equal(names, ['d1']) results = get_action('package_search')( - {'user': user3['id']}, {})['results'] + {'user': user3['name']}, {'include_private': True})['results'] names = [r['name'] for r in results] assert_equal(names, ['d2']) @@ -51,7 +52,8 @@ def test_proposed_overrides_public(self): dataset = factories.Dataset( name='d1', notes='Proposed:', user=user) - results = get_action('package_search')({}, {})['results'] + results = get_action('package_search')( + {}, {'include_private': True})['results'] names = [r['name'] for r in results] assert_equal(names, []) @@ -64,26 +66,29 @@ def test_proposed_dataset_visible_to_creator(self): name='d1', notes='Proposed:', user=user) results = get_action('package_search')( - {'user': user['id']}, {})['results'] + {'user': user['name']}, {'include_private': True})['results'] names = [r['name'] for r in results] assert_equal(names, ['d1']) ret = call_auth('package_show', - {'user': user['id'], 'model': model}, id='d1') - assert ret['success'], ret + {'user': user['name'], 'model': model}, id='d1') + assert ret def test_proposed_dataset_visible_to_org_admin(self): user = factories.User() user2 = factories.User() org = factories.Organization(user=user2) + call_action( + 'organization_member_create', None, username=user['name'], + id=org['id'], role='editor') dataset = factories.Dataset( name='d1', notes='Proposed:', user=user, owner_org=org['id']) results = get_action('package_search')( - {'user': user2['id']}, {})['results'] + {'user': user2['name']}, {'include_private': True})['results'] names = [r['name'] for r in results] assert_equal(names, ['d1']) ret = call_auth('package_show', - {'user': user2['id'], 'model': model}, id='d1') - assert ret['success'], ret + {'user': user2['name'], 'model': model}, id='d1') + assert ret diff --git a/setup.py b/setup.py index daf6d8bd86f..b8aa1c7ae1d 100644 --- a/setup.py +++ b/setup.py @@ -142,7 +142,7 @@ 'example_iconfigurer_v1 = ckanext.example_iconfigurer.plugin_v1:ExampleIConfigurerPlugin', 'example_iconfigurer_v2 = ckanext.example_iconfigurer.plugin_v2:ExampleIConfigurerPlugin', 'example_iuploader = ckanext.example_iuploader.plugin:ExampleIUploader', - 'example_ipermissionlabels = ckanext.example_ipermissionlabels:ExampleIPermissionLabels', + 'example_ipermissionlabels = ckanext.example_ipermissionlabels.plugin:ExampleIPermissionLabelsPlugin', ], 'ckan.system_plugins': [ 'domain_object_mods = ckan.model.modification:DomainObjectModificationExtension', From 20354cfc083ad9fe4caa6809b728b65f184b6d07 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Sat, 27 Aug 2016 16:57:13 -0400 Subject: [PATCH 22/65] [#3192] one more test --- .../tests/test_example_ipermissionlabels.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py b/ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py index df0ec3fc6ea..ea48ea14d17 100644 --- a/ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py +++ b/ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py @@ -92,3 +92,21 @@ def test_proposed_dataset_visible_to_org_admin(self): ret = call_auth('package_show', {'user': user2['name'], 'model': model}, id='d1') assert ret + + def test_proposed_dataset_invisible_to_another_editor(self): + user = factories.User() + user2 = factories.User() + org = factories.Organization(user=user2) + call_action( + 'organization_member_create', None, username=user['name'], + id=org['id'], role='editor') + dataset = factories.Dataset( + name='d1', notes='Proposed:', user=user2, owner_org=org['id']) + + results = get_action('package_search')( + {'user': user['name']}, {'include_private': True})['results'] + names = [r['name'] for r in results] + assert_equal(names, []) + + assert_raises(NotAuthorized, call_auth, + 'package_show', {'user':user['name'], 'model': model}, id='d1') From 68cccd248fc34a3fba1da23e4576921b67d80ad4 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Sat, 27 Aug 2016 17:01:18 -0400 Subject: [PATCH 23/65] [#3192] pep8 --- ckanext/example_ipermissionlabels/plugin.py | 3 ++- .../tests/test_example_ipermissionlabels.py | 21 +++++++++++-------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/ckanext/example_ipermissionlabels/plugin.py b/ckanext/example_ipermissionlabels/plugin.py index 0ca1a19a421..7987fe85b5b 100644 --- a/ckanext/example_ipermissionlabels/plugin.py +++ b/ckanext/example_ipermissionlabels/plugin.py @@ -4,6 +4,7 @@ from ckan.lib.plugins import DefaultPermissionLabels from ckan.plugins.toolkit import get_action + class ExampleIPermissionLabelsPlugin( plugins.SingletonPlugin, DefaultPermissionLabels): u''' @@ -33,7 +34,7 @@ def get_user_dataset_labels(self, user_obj): creator-*, member-* and public ''' labels = super(ExampleIPermissionLabelsPlugin, self - ).get_user_dataset_labels(user_obj) + ).get_user_dataset_labels(user_obj) if user_obj: orgs = get_action(u'organization_list_for_user')( {u'user': user_obj.id}, {u'permission': u'admin'}) diff --git a/ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py b/ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py index ea48ea14d17..b3a74f6aefb 100644 --- a/ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py +++ b/ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py @@ -11,6 +11,7 @@ from ckan.tests import factories from ckan import model + class TestExampleIPermissionLabels(FunctionalTestBase): @classmethod def setup_class(cls): @@ -57,8 +58,9 @@ def test_proposed_overrides_public(self): names = [r['name'] for r in results] assert_equal(names, []) - assert_raises(NotAuthorized, call_auth, - 'package_show', {'user':'', 'model': model}, id='d1') + assert_raises( + NotAuthorized, call_auth, 'package_show', + {'user': '', 'model': model}, id='d1') def test_proposed_dataset_visible_to_creator(self): user = factories.User() @@ -70,8 +72,8 @@ def test_proposed_dataset_visible_to_creator(self): names = [r['name'] for r in results] assert_equal(names, ['d1']) - ret = call_auth('package_show', - {'user': user['name'], 'model': model}, id='d1') + ret = call_auth( + 'package_show', {'user': user['name'], 'model': model}, id='d1') assert ret def test_proposed_dataset_visible_to_org_admin(self): @@ -89,10 +91,10 @@ def test_proposed_dataset_visible_to_org_admin(self): names = [r['name'] for r in results] assert_equal(names, ['d1']) - ret = call_auth('package_show', - {'user': user2['name'], 'model': model}, id='d1') + ret = call_auth( + 'package_show', {'user': user2['name'], 'model': model}, id='d1') assert ret - + def test_proposed_dataset_invisible_to_another_editor(self): user = factories.User() user2 = factories.User() @@ -108,5 +110,6 @@ def test_proposed_dataset_invisible_to_another_editor(self): names = [r['name'] for r in results] assert_equal(names, []) - assert_raises(NotAuthorized, call_auth, - 'package_show', {'user':user['name'], 'model': model}, id='d1') + assert_raises( + NotAuthorized, call_auth, 'package_show', + {'user': user['name'], 'model': model}, id='d1') From 4335fc18130bd5051b84e7d220175a6604c4aac1 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Sat, 27 Aug 2016 17:07:18 -0400 Subject: [PATCH 24/65] [#3192] IPermissionLabels docstring: mention example plugin --- ckan/plugins/interfaces.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ckan/plugins/interfaces.py b/ckan/plugins/interfaces.py index 71d9aed8570..aa4c3c4fce4 100644 --- a/ckan/plugins/interfaces.py +++ b/ckan/plugins/interfaces.py @@ -1578,8 +1578,10 @@ class IPermissionLabels(Interface): visible to each user. Implementations might want to consider mixing in - ckan.lib.plugins.DefaultPermissionLabels which provides + ``ckan.lib.plugins.DefaultPermissionLabels`` which provides default behaviours for these methods. + + See ``ckanext/example_ipermissionlabels`` for an example plugin. ''' def get_dataset_labels(self, dataset_obj): From 1f0a84221a16ad1b7f0e164a9efa6f7a487c3929 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Sat, 27 Aug 2016 17:58:21 -0400 Subject: [PATCH 25/65] [#3192] test_string_literals_are_prefixed: relax about data_dict['id'] --- ckan/tests/test_coding_standards.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ckan/tests/test_coding_standards.py b/ckan/tests/test_coding_standards.py index d5129476756..679ca764859 100644 --- a/ckan/tests/test_coding_standards.py +++ b/ckan/tests/test_coding_standards.py @@ -219,8 +219,10 @@ def find_unprefixed_string_literals(filename): break except ValueError: continue - first_char = lines[lineno][col_offset] - if first_char not in u'ub': # Don't allow capital U and B either + leading = lines[lineno][col_offset - 1:col_offset + 1] + if leading[:-1] == u'[': # data['id'] is unambiguous, ignore these + continue + if leading[-1:] not in u'ub': # Don't allow capital U and B either problems.append((lineno + 1, col_offset + 1)) return sorted(problems) From f2c3c84462fab95d0b4010088aeebcbcef6c3b87 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Sat, 27 Aug 2016 18:00:10 -0400 Subject: [PATCH 26/65] [#3192] unicode literal pedantry II: Pedantic Boogaloo --- .../tests/test_example_ipermissionlabels.py | 76 +++++++++---------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py b/ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py index b3a74f6aefb..9454b614a10 100644 --- a/ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py +++ b/ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py @@ -17,11 +17,11 @@ class TestExampleIPermissionLabels(FunctionalTestBase): def setup_class(cls): # Test code should use CKAN's plugins.load() function to load plugins # to be tested. - ckan.plugins.load('example_ipermissionlabels') + ckan.plugins.load(u'example_ipermissionlabels') @classmethod def teardown_class(cls): - ckan.plugins.unload('example_ipermissionlabels') + ckan.plugins.unload(u'example_ipermissionlabels') def test_normal_dataset_permissions_are_normal(self): user = factories.User() @@ -30,50 +30,50 @@ def test_normal_dataset_permissions_are_normal(self): org = factories.Organization(user=user) org2 = factories.Organization(user=user2) call_action( - 'organization_member_create', None, username=user3['name'], - id=org2['id'], role='member') + u'organization_member_create', None, username=user3['name'], + id=org2['id'], role=u'member') dataset = factories.Dataset( - name='d1', user=user, private=True, owner_org=org['id']) + name=u'd1', user=user, private=True, owner_org=org['id']) dataset2 = factories.Dataset( - name='d2', user=user2, private=True, owner_org=org2['id']) + name=u'd2', user=user2, private=True, owner_org=org2['id']) - results = get_action('package_search')( - {'user': user['name']}, {'include_private': True})['results'] + results = get_action(u'package_search')( + {u'user': user['name']}, {u'include_private': True})['results'] names = [r['name'] for r in results] - assert_equal(names, ['d1']) + assert_equal(names, [u'd1']) - results = get_action('package_search')( - {'user': user3['name']}, {'include_private': True})['results'] + results = get_action(u'package_search')( + {u'user': user3['name']}, {u'include_private': True})['results'] names = [r['name'] for r in results] - assert_equal(names, ['d2']) + assert_equal(names, [u'd2']) def test_proposed_overrides_public(self): user = factories.User() dataset = factories.Dataset( - name='d1', notes='Proposed:', user=user) + name=u'd1', notes=u'Proposed:', user=user) - results = get_action('package_search')( - {}, {'include_private': True})['results'] + results = get_action(u'package_search')( + {}, {u'include_private': True})['results'] names = [r['name'] for r in results] assert_equal(names, []) assert_raises( - NotAuthorized, call_auth, 'package_show', - {'user': '', 'model': model}, id='d1') + NotAuthorized, call_auth, u'package_show', + {u'user': u'', u'model': model}, id=u'd1') def test_proposed_dataset_visible_to_creator(self): user = factories.User() dataset = factories.Dataset( - name='d1', notes='Proposed:', user=user) + name=u'd1', notes=u'Proposed:', user=user) - results = get_action('package_search')( - {'user': user['name']}, {'include_private': True})['results'] + results = get_action(u'package_search')( + {u'user': user['name']}, {u'include_private': True})['results'] names = [r['name'] for r in results] - assert_equal(names, ['d1']) + assert_equal(names, [u'd1']) - ret = call_auth( - 'package_show', {'user': user['name'], 'model': model}, id='d1') + ret = call_auth(u'package_show', + {u'user': user['name'], u'model': model}, id=u'd1') assert ret def test_proposed_dataset_visible_to_org_admin(self): @@ -81,18 +81,18 @@ def test_proposed_dataset_visible_to_org_admin(self): user2 = factories.User() org = factories.Organization(user=user2) call_action( - 'organization_member_create', None, username=user['name'], - id=org['id'], role='editor') + u'organization_member_create', None, username=user['name'], + id=org['id'], role=u'editor') dataset = factories.Dataset( - name='d1', notes='Proposed:', user=user, owner_org=org['id']) + name=u'd1', notes=u'Proposed:', user=user, owner_org=org['id']) - results = get_action('package_search')( - {'user': user2['name']}, {'include_private': True})['results'] + results = get_action(u'package_search')( + {u'user': user2[u'name']}, {u'include_private': True})['results'] names = [r['name'] for r in results] - assert_equal(names, ['d1']) + assert_equal(names, [u'd1']) - ret = call_auth( - 'package_show', {'user': user2['name'], 'model': model}, id='d1') + ret = call_auth(u'package_show', + {u'user': user2['name'], u'model': model}, id=u'd1') assert ret def test_proposed_dataset_invisible_to_another_editor(self): @@ -100,16 +100,16 @@ def test_proposed_dataset_invisible_to_another_editor(self): user2 = factories.User() org = factories.Organization(user=user2) call_action( - 'organization_member_create', None, username=user['name'], - id=org['id'], role='editor') + u'organization_member_create', None, username=user['name'], + id=org['id'], role=u'editor') dataset = factories.Dataset( - name='d1', notes='Proposed:', user=user2, owner_org=org['id']) + name=u'd1', notes=u'Proposed:', user=user2, owner_org=org['id']) - results = get_action('package_search')( - {'user': user['name']}, {'include_private': True})['results'] + results = get_action(u'package_search')( + {u'user': user['name']}, {u'include_private': True})['results'] names = [r['name'] for r in results] assert_equal(names, []) assert_raises( - NotAuthorized, call_auth, 'package_show', - {'user': user['name'], 'model': model}, id='d1') + NotAuthorized, call_auth, u'package_show', + {u'user': user['name'], u'model': model}, id=u'd1') From c94c26804b55ca0a1f07ad6600692b067c509599 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Sat, 27 Aug 2016 18:10:23 -0400 Subject: [PATCH 27/65] [#3192] pep8 --- .../tests/test_example_ipermissionlabels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py b/ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py index 9454b614a10..de96d08321e 100644 --- a/ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py +++ b/ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py @@ -91,7 +91,7 @@ def test_proposed_dataset_visible_to_org_admin(self): names = [r['name'] for r in results] assert_equal(names, [u'd1']) - ret = call_auth(u'package_show', + ret = call_auth(u'package_show', {u'user': user2['name'], u'model': model}, id=u'd1') assert ret From cc46efa33e5ee1fabe50ae734ce458073d508528 Mon Sep 17 00:00:00 2001 From: Florian Brucker Date: Thu, 14 Jul 2016 10:13:24 +0200 Subject: [PATCH 28/65] [#2977] Basic implementation of background jobs via python-rq. Adds a basic support of background jobs using python-rq. Jobs can be managed via the `ckan.lib.jobs` module, the `job_*` API functions and the `jobs` paster command. --- ckan/lib/cli.py | 143 ++++++++++++++++++++++++++---------- ckan/lib/jobs.py | 137 ++++++++++++++++++++++++++++++++++ ckan/logic/action/delete.py | 32 ++++++++ ckan/logic/action/get.py | 27 +++++++ ckan/logic/auth/delete.py | 10 +++ ckan/logic/auth/get.py | 10 +++ ckan/websetup.py | 4 +- requirements.in | 1 + requirements.txt | 47 ++++++------ setup.py | 1 + 10 files changed, 351 insertions(+), 61 deletions(-) create mode 100644 ckan/lib/jobs.py diff --git a/ckan/lib/cli.py b/ckan/lib/cli.py index 5752edd80e1..83d05989d36 100644 --- a/ckan/lib/cli.py +++ b/ckan/lib/cli.py @@ -43,6 +43,16 @@ def deprecation_warning(message=None): sys.stderr.write(u'\n') +def error(msg): + ''' + Print an error message to STDOUT and exit with return code 1. + ''' + sys.stderr.write(msg) + if not msg.endswith('\n'): + sys.stderr.write('\n') + sys.exit(1) + + def parse_db_config(config_key='sqlalchemy.url'): ''' Takes a config key for a database connection url and parses it into a dictionary. Expects a url like: @@ -257,8 +267,7 @@ def command(self): elif cmd == 'migrate-filestore': self.migrate_filestore() else: - print 'Command %s not recognized' % cmd - sys.exit(1) + error('Command %s not recognized' % cmd) def _get_db_config(self): return parse_db_config() @@ -610,9 +619,8 @@ def export_datasets(self, out_folder): r = urllib2.urlopen(url).read() except urllib2.HTTPError, e: if e.code == 404: - print ('Please install ckanext-dcat and enable the ' + - '`dcat` plugin to use the RDF serializations') - sys.exit(1) + error('Please install ckanext-dcat and enable the ' + + '`dcat` plugin to use the RDF serializations') with open(fname, 'wb') as f: f.write(r) except IOError, ioe: @@ -801,16 +809,14 @@ def password_prompt(cls): password1 = getpass.getpass('Password: ') password2 = getpass.getpass('Confirm password: ') if password1 != password2: - print 'Passwords do not match' - sys.exit(1) + error('Passwords do not match') return password1 def add(self): import ckan.model as model if len(self.args) < 2: - print 'Need name of the user.' - sys.exit(1) + error('Need name of the user.') username = self.args[1] # parse args into data_dict @@ -842,8 +848,7 @@ def add(self): user_dict = logic.get_action('user_create')(context, data_dict) pprint(user_dict) except logic.ValidationError, e: - print e - sys.exit(1) + error(e) def remove(self): import ckan.model as model @@ -965,8 +970,7 @@ def command(self): elif cmd == 'clean': self.clean() else: - print 'Command %s not recognized' % cmd - sys.exit(1) + error('Command %s not recognized' % cmd) def run_(self): default_ini = os.path.join(os.getcwd(), 'development.ini') @@ -976,8 +980,7 @@ def run_(self): elif os.path.isfile(default_ini): os.environ['CKAN_CONFIG'] = default_ini else: - print 'No .ini specified and none was found in current directory' - sys.exit(1) + error('No .ini specified and none was found in current directory') from ckan.lib.celery_app import celery celery_args = [] @@ -1013,8 +1016,7 @@ def clean(self): print '%i of %i tasks deleted' % (tasks_initially - tasks_afterwards, tasks_initially) if tasks_afterwards: - print 'ERROR: Failed to delete all tasks' - sys.exit(1) + error('Failed to delete all tasks') model.repo.commit_and_remove() @@ -1094,15 +1096,13 @@ def command(self): self.update_all(engine, start_date) elif cmd == 'export': if len(self.args) <= 1: - print self.__class__.__doc__ - sys.exit(1) + error(self.__class__.__doc__) output_file = self.args[1] start_date = self.args[2] if len(self.args) > 2 else None self.update_all(engine, start_date) self.export_tracking(engine, output_file) else: - print self.__class__.__doc__ - sys.exit(1) + error(self.__class__.__doc__) def update_all(self, engine, start_date=None): if start_date: @@ -2250,11 +2250,9 @@ def _get_view_plugins(self, view_plugin_types, set(loaded_view_plugins)) if plugins_not_found: - msg = ('View plugin(s) not found : {0}. '.format(plugins_not_found) - + 'Have they been added to the `ckan.plugins` configuration' - + ' option?') - log.error(msg) - sys.exit(1) + error('View plugin(s) not found : {0}. '.format(plugins_not_found) + + 'Have they been added to the `ckan.plugins` configuration' + + ' option?') return loaded_view_plugins @@ -2338,8 +2336,7 @@ def _update_search_params(self, search_data_dict): try: user_search_params = json.loads(self.options.search_params) except ValueError, e: - log.error('Unable to parse JSON search parameters: {0}'.format(e)) - sys.exit(1) + error('Unable to parse JSON search parameters: {0}'.format(e)) if user_search_params.get('q'): search_data_dict['q'] = user_search_params['q'] @@ -2413,8 +2410,7 @@ def create_views(self, view_plugin_types=[]): query = self._search_datasets(page, loaded_view_plugins) if page == 1 and query['count'] == 0: - log.info('No datasets to create resource views on, exiting...') - sys.exit(1) + error('No datasets to create resource views on, exiting...') elif page == 1 and not self.options.assume_yes: @@ -2426,8 +2422,7 @@ def create_views(self, view_plugin_types=[]): loaded_view_plugins)) if confirm == 'no': - log.info('Command aborted by user') - sys.exit(1) + error('Command aborted by user') if query['results']: for dataset_dict in query['results']: @@ -2472,8 +2467,7 @@ def clear_views(self, view_plugin_types=[]): result = query_yes_no(msg, default='no') if result == 'no': - log.info('Command aborted by user') - sys.exit(1) + error('Command aborted by user') context = {'user': self.site_user['name']} logic.get_action('resource_view_clear')( @@ -2551,15 +2545,88 @@ def command(self): if options: for option in options: if '=' not in option: - sys.stderr.write( + error( 'An option does not have an equals sign: %r ' 'It should be \'key=value\'. If there are spaces ' 'you\'ll need to quote the option.\n' % option) - sys.exit(1) try: config_tool.config_edit_using_option_strings( config_filepath, options, self.options.section, edit=self.options.edit) except config_tool.ConfigToolError, e: - sys.stderr.write(e.message) - sys.exit(1) + error(e) + + +class JobsCommand(CkanCommand): + '''Manage background jobs + + Usage: + + paster jobs worker - Start a worker that fetches jobs + from the queue and executes them + paster jobs list - List currently enqueued jobs + paster jobs cancel - Cancel a specific job + paster jobs clear - Cancel all jobs + paster jobs test - Enqueue a test job + + See also the `worker` command for running workers that fetch + and execute jobs. + ''' + + summary = __doc__.split(u'\n')[0] + usage = __doc__ + min_args = 0 + + def command(self): + self._load_config() + from ckan.lib.jobs import init_queue + self.queue = init_queue() + + if not self.args: + print(self.__doc__) + sys.exit(0) + + cmd = self.args.pop(0) + if cmd == 'worker': + self.worker() + elif cmd == u'list': + self.list() + elif cmd == u'cancel': + self.cancel() + elif cmd == u'clear': + self.clear() + elif cmd == u'test': + self.test() + else: + error(u'Unknown command "{}"'.format(cmd)) + + def worker(self): + from ckan.lib.jobs import init_queue, Worker + worker = Worker(self.queue) + worker.work() + + def list(self): + jobs = p.toolkit.get_action('job_list')({}, {}) + for job in jobs: + if job['title'] is None: + job['title'] = '' + print('{created} {id} {title}'.format(**job)) + + def cancel(self): + if not self.args: + error('You must specify a job ID') + id = self.args[0] + try: + p.toolkit.get_action('job_cancel')({}, {'id': id}) + except logic.NotFound: + error('There is no job with ID "{}"'.format(id)) + print('Cancelled job {}'.format(id)) + + def clear(self): + p.toolkit.get_action('job_clear')({}, {}) + print('Cleared the job queue') + + def test(self): + from ckan.lib.jobs import enqueue, test_job + job = enqueue(test_job, ['A test job'], title='A test job') + print('Enqueued test job {}'.format(job.id)) diff --git a/ckan/lib/jobs.py b/ckan/lib/jobs.py new file mode 100644 index 00000000000..451766b849b --- /dev/null +++ b/ckan/lib/jobs.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python +# encoding: utf-8 + +u''' +Asynchronous background jobs. + +Note that most job management functions are not available from this +module but via the various ``job_*`` API functions. +''' + +import logging + +from pylons import config +from redis import Redis +from rq import Queue, Worker as RqWorker +from rq.connections import push_connection + + +log = logging.getLogger(__name__) + +REDIS_URL_SETTING_NAME = u'ckan.jobs.redis_url' + +REDIS_URL_DEFAULT_VALUE = u'redis://localhost:6379/0' + +queue = None + + +def init_queue(): + u''' + Initialize the job queue. + + :returns: The queue. + :rtype: ``rq.queue.Queue`` + ''' + global queue + if queue is not None: + return + redis_url = config.get(REDIS_URL_SETTING_NAME, REDIS_URL_DEFAULT_VALUE) + log.warn(u'Initializing background job queue at {}'.format(redis_url)) + redis_conn = Redis.from_url(redis_url) + redis_conn.ping() # Force connection check + queue = Queue(connection=redis_conn) + push_connection(redis_conn) # See https://github.com/nvie/rq/issues/479 + return queue + + +def enqueue(fn, args=None, title=None): + u''' + Enqueue a job to be run in the background. + + :param function fn: Function to be executed in the background + + :param list args: List of arguments to be passed to the function. + Pass an empty list if there are no arguments (default). + + :param string title: Optional human-readable title of the job. + + :returns: The enqueued job. + :rtype: ``rq.job.Job`` + ''' + if args is None: + args = [] + job = queue.enqueue_call(func=fn, args=args) + job.meta[u'title'] = title + job.save() + if title: + msg = u'Enqueued background job "{}" ({})'.format(title, job.id) + else: + msg = u'Enqueued background job {}'.format(job.id) + log.info(msg) + return job + + +def from_id(id): + u''' + Look up an enqueued job by its ID. + + :param string id: The ID of the job. + + :returns: The job. + :rtype: ``rq.job.Job`` + + :raises KeyError: if no job with that ID exists. + ''' + for job in queue.jobs: + if job.id == id: + return job + raise KeyError('No such job: "{}"'.format(id)) + + +def dictize_job(job): + u'''Convert a job to a dict. + + In contrast to ``rq.job.Job.to_dict`` this function includes only + the attributes that are relevant to our use case and promotes the + meta attributes that we use (e.g. ``title``). + + :param rq.job.Job job: The job to dictize. + + :returns: The dictized job. + :rtype: dict + ''' + return { + 'id': job.id, + 'title': job.meta.get('title'), + 'created': job.created_at.isoformat(), + } + + +def test_job(*args): + u'''Test job. + + A test job for debugging purposes. Prints out any arguments it + receives. + ''' + print(args) + + +class Worker(RqWorker): + u''' + Custom worker with CKAN-specific logging. + ''' + def register_birth(self, *args, **kwargs): + result = super(Worker, self).register_birth(*args, **kwargs) + log.info('Worker {} has started (PID: {})'.format(self.key, self.pid)) + return result + + def execute_job(self, job, *args, **kwargs): + log.info('Worker {} starts to execute job {}'.format(self.key, job.id)) + result = super(Worker, self).execute_job(job, *args, **kwargs) + log.info('Worker {} has finished executing job {}'.format(self.key, job.id)) + return result + + def register_death(self, *args, **kwargs): + result = super(Worker, self).register_death(*args, **kwargs) + log.info('Worker {} has stopped'.format(self.key)) + return result diff --git a/ckan/logic/action/delete.py b/ckan/logic/action/delete.py index fe4300a2ef7..b98b91de7b2 100644 --- a/ckan/logic/action/delete.py +++ b/ckan/logic/action/delete.py @@ -2,8 +2,11 @@ '''API functions for deleting data from CKAN.''' +import logging + import sqlalchemy as sqla +import ckan.lib.jobs as jobs import ckan.logic import ckan.logic.action import ckan.plugins as plugins @@ -12,6 +15,9 @@ from ckan.common import _ + +log = logging.getLogger('ckan.logic') + validate = ckan.lib.navl.dictization_functions.validate # Define some shortcuts @@ -701,3 +707,29 @@ def unfollow_group(context, data_dict): ckan.logic.schema.default_follow_group_schema()) _unfollow(context, data_dict, schema, context['model'].UserFollowingGroup) + + +def job_clear(context, data_dict): + '''Clear all enqueued background jobs. + + Does not affect jobs that are already being processed. + ''' + _check_access('job_clear', context, data_dict) + jobs.queue.empty() + log.warn('Cleared background job queue') + + +def job_cancel(context, data_dict): + '''Cancel a queued background job. + + Removes the job from the queue. + + :param string id: The ID of the background job. + ''' + _check_access(u'job_cancel', context, data_dict) + id = _get_or_bust(data_dict, u'id') + try: + jobs.from_id(id).cancel() + log.warn('Cancelled background job {}'.format(id)) + except KeyError: + raise NotFound diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 8d375e0f593..074ecfc6224 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -17,6 +17,7 @@ import ckan.logic.action import ckan.logic.schema import ckan.lib.dictization.model_dictize as model_dictize +import ckan.lib.jobs as jobs import ckan.lib.navl.dictization_functions import ckan.model as model import ckan.model.misc as misc @@ -3500,3 +3501,29 @@ def config_option_list(context, data_dict): schema = ckan.logic.schema.update_configuration_schema() return schema.keys() + + +def job_list(context, data_dict): + '''List enqueued background jobs. + + :returns: The currently enqueued background jobs. + :rtype: list + ''' + _check_access('job_list', context, data_dict) + return [jobs.dictize_job(job) for job in jobs.queue.jobs] + + +def job_show(context, data_dict): + '''Show details for a background job. + + :param string id: The ID of the background job. + + :returns: Details about the background job. + :rtype: dict + ''' + _check_access(u'job_show', context, data_dict) + id = _get_or_bust(data_dict, u'id') + try: + return jobs.dictize_job(jobs.from_id(id)) + except KeyError: + raise NotFound diff --git a/ckan/logic/auth/delete.py b/ckan/logic/auth/delete.py index ff1a52044bc..4efd48300ad 100644 --- a/ckan/logic/auth/delete.py +++ b/ckan/logic/auth/delete.py @@ -137,3 +137,13 @@ def organization_member_delete(context, data_dict): def member_delete(context, data_dict): return authz.is_authorized('member_create', context, data_dict) + + +def job_clear(context, data_dict): + '''Clear background jobs. Only sysadmins.''' + return {'success': False} + + +def job_cancel(context, data_dict): + '''Cancel a background job. Only sysadmins.''' + return {'success': False} diff --git a/ckan/logic/auth/get.py b/ckan/logic/auth/get.py index 011b19c2e84..7f47a35e4d4 100644 --- a/ckan/logic/auth/get.py +++ b/ckan/logic/auth/get.py @@ -343,3 +343,13 @@ def config_option_show(context, data_dict): def config_option_list(context, data_dict): '''List runtime-editable configuration options. Only sysadmins.''' return {'success': False} + + +def job_list(context, data_dict): + '''List background jobs. Only sysadmins.''' + return {'success': False} + + +def job_show(context, data_dict): + '''Show background job. Only sysadmins.''' + return {'success': False} diff --git a/ckan/websetup.py b/ckan/websetup.py index 48365a5f81f..0f1c84aadda 100644 --- a/ckan/websetup.py +++ b/ckan/websetup.py @@ -10,9 +10,11 @@ def setup_app(command, conf, vars): """Place any commands to setup ckan here""" load_environment(conf.global_conf, conf.local_conf) - + from ckan import model log.debug('Creating tables') model.repo.create_db() log.info('Creating tables: SUCCESS') + from ckan.lib.jobs import init_queue + init_queue() diff --git a/requirements.in b/requirements.in index b3ff4ca7fce..fed7a94fda3 100644 --- a/requirements.in +++ b/requirements.in @@ -22,6 +22,7 @@ repoze.who-friendlyform==1.0.8 repoze.who==2.3 requests==2.10.0 Routes==1.13 +rq==0.6.0 sqlalchemy-migrate==0.10.0 SQLAlchemy==0.9.6 sqlparse==0.1.19 diff --git a/requirements.txt b/requirements.txt index 7e12c29998a..9810f299ab4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,61 +1,64 @@ # # This file is autogenerated by pip-compile -# Make changes in requirements.in, then run this to update: +# To update, run: # -# pip-compile requirements.in +# pip-compile --output-file requirements.txt requirements.in # argparse==1.4.0 # via ofs Babel==2.3.4 -Beaker==1.8.0 +Beaker==1.8.0 # via pylons bleach==1.4.3 +click==6.6 # via rq decorator==4.0.6 # via pylons, sqlalchemy-migrate fanstatic==0.12 Flask==0.10.1 -FormEncode==1.3.0 +FormEncode==1.3.0 # via pylons +funcsigs==1.0.2 # via beaker html5lib==0.9999999 # via bleach itsdangerous==0.24 # via flask -Jinja2==2.8 -Mako==1.0.4 +Jinja2==2.8 # via flask +Mako==1.0.4 # via pylons Markdown==2.6.6 -MarkupSafe==0.23 +MarkupSafe==0.23 # via jinja2, mako, webhelpers nose==1.3.7 # via pylons ofs==0.4.2 ordereddict==1.1 Pairtree==0.7.1-T passlib==1.6.5 paste==1.7.5.1 -PasteDeploy==1.5.2 -PasteScript==2.0.2 -pbr==0.11.1 # via sqlalchemy-migrate +PasteDeploy==1.5.2 # via pastescript, pylons +PasteScript==2.0.2 # via pylons +pbr==1.10.0 # via sqlalchemy-migrate psycopg2==2.4.5 -pysolr==3.5.0 -Pygments==2.1.3 +Pygments==2.1.3 # via weberror Pylons==0.9.7 +pysolr==3.5.0 python-dateutil==1.5 pytz==2016.4 pyutilib.component.core==4.6.4 +redis==2.10.5 # via rq repoze.lru==0.6 # via routes repoze.who-friendlyform==1.0.8 repoze.who==2.3 requests==2.10.0 -Routes==1.13 +Routes==1.13 # via pylons +rq==0.6.0 simplejson==3.8.2 # via pylons -six==1.10.0 # via bleach, html5lib, pastescript, sqlalchemy-migrate +six==1.10.0 # via bleach, html5lib, pastescript, pyutilib.component.core, sqlalchemy-migrate sqlalchemy-migrate==0.10.0 -SQLAlchemy==0.9.6 +SQLAlchemy==0.9.6 # via sqlalchemy-migrate sqlparse==0.1.19 -Tempita==0.5.2 +Tempita==0.5.2 # via pylons, sqlalchemy-migrate, weberror tzlocal==1.2.2 unicodecsv==0.14.1 vdm==0.13 -WebError==0.13.1 -WebHelpers==1.3 -WebOb==1.0.8 -WebTest==1.4.3 -Werkzeug==0.11.10 +WebError==0.13.1 # via pylons +WebHelpers==1.3 # via pylons +WebOb==1.0.8 # via fanstatic, pylons, repoze.who, repoze.who-friendlyform, weberror, webtest +WebTest==1.4.3 # via pylons +Werkzeug==0.11.10 # via flask zope.interface==4.2.0 # The following packages are commented out because they are # considered to be unsafe in a requirements file: -# pip # via pbr # setuptools # via repoze.who, zope.interface diff --git a/setup.py b/setup.py index a07f9aff729..e5223d9f700 100644 --- a/setup.py +++ b/setup.py @@ -49,6 +49,7 @@ 'front-end-build = ckan.lib.cli:FrontEndBuildCommand', 'views = ckan.lib.cli:ViewsCommand', 'config-tool = ckan.lib.cli:ConfigToolCommand', + 'jobs = ckan.lib.cli:JobsCommand', ], 'console_scripts': [ 'ckan-admin = bin.ckan_admin:Command', From 4f50fe38d6be080ed4de91ad91fc611ae904fabd Mon Sep 17 00:00:00 2001 From: Florian Brucker Date: Fri, 15 Jul 2016 10:55:06 +0200 Subject: [PATCH 29/65] [#2977] Lazy initialization of background job queue. Check Redis availability on server start but initialize queue only once it is actually required. --- ckan/config/environment.py | 6 +++ ckan/lib/cli.py | 7 +-- ckan/lib/jobs.py | 104 ++++++++++++++++++++++++++---------- ckan/logic/action/delete.py | 2 +- ckan/logic/action/get.py | 2 +- ckan/websetup.py | 4 +- 6 files changed, 88 insertions(+), 37 deletions(-) diff --git a/ckan/config/environment.py b/ckan/config/environment.py index 278e5fceb0b..15d1124f1af 100644 --- a/ckan/config/environment.py +++ b/ckan/config/environment.py @@ -16,6 +16,7 @@ import ckan.plugins as p import ckan.lib.helpers as helpers import ckan.lib.app_globals as app_globals +import ckan.lib.jobs as jobs import ckan.lib.render as render import ckan.lib.search as search import ckan.logic as logic @@ -93,6 +94,11 @@ def find_controller(self, controller): for msg in msgs: warnings.filterwarnings('ignore', msg, sqlalchemy.exc.SAWarning) + # Check Redis availability + if not jobs.is_available(): + log.critical('Could not connect to Redis. The background job queue ' + 'will not be available.') + # load all CKAN plugins p.load_all() diff --git a/ckan/lib/cli.py b/ckan/lib/cli.py index 83d05989d36..b25eded4af1 100644 --- a/ckan/lib/cli.py +++ b/ckan/lib/cli.py @@ -2579,8 +2579,6 @@ class JobsCommand(CkanCommand): def command(self): self._load_config() - from ckan.lib.jobs import init_queue - self.queue = init_queue() if not self.args: print(self.__doc__) @@ -2601,9 +2599,8 @@ def command(self): error(u'Unknown command "{}"'.format(cmd)) def worker(self): - from ckan.lib.jobs import init_queue, Worker - worker = Worker(self.queue) - worker.work() + from ckan.lib.jobs import Worker + Worker().work() def list(self): jobs = p.toolkit.get_action('job_list')({}, {}) diff --git a/ckan/lib/jobs.py b/ckan/lib/jobs.py index 451766b849b..185d166c712 100644 --- a/ckan/lib/jobs.py +++ b/ckan/lib/jobs.py @@ -11,7 +11,7 @@ import logging from pylons import config -from redis import Redis +from redis import ConnectionPool, Redis from rq import Queue, Worker as RqWorker from rq.connections import push_connection @@ -22,26 +22,63 @@ REDIS_URL_DEFAULT_VALUE = u'redis://localhost:6379/0' -queue = None +# Redis connection pool. Do not use this directly, use ``connect_to_redis`` +# instead. +_connection_pool = None +# RQ job queue. Do not use this directly, use ``get_queue`` instead. +_queue = None -def init_queue(): + +def connect_to_redis(): + u''' + (Lazily) connect to Redis. + + The connection is set up but not actually established. The latter + happens automatically once the connection is used. + + :returns: A lazy Redis connection. + :rtype: ``redis.Redis`` + ''' + global _connection_pool + if _connection_pool is None: + url = config.get(REDIS_URL_SETTING_NAME, REDIS_URL_DEFAULT_VALUE) + log.debug(u'Using Redis at {}'.format(url)) + _connection_pool = ConnectionPool.from_url(url) + return Redis(connection_pool=_connection_pool) + + +def is_available(): u''' - Initialize the job queue. + Check whether Redis is available. - :returns: The queue. + :returns: The availability of Redis. + :rtype: boolean + ''' + redis_conn = connect_to_redis() + try: + return redis_conn.ping() + except Exception: + log.exception(u'Redis is not available') + return False + + +def get_queue(): + u''' + Get the job queue. + + The job queue is initialized if that hasn't happened before. + + :returns: The job queue. :rtype: ``rq.queue.Queue`` ''' - global queue - if queue is not None: - return - redis_url = config.get(REDIS_URL_SETTING_NAME, REDIS_URL_DEFAULT_VALUE) - log.warn(u'Initializing background job queue at {}'.format(redis_url)) - redis_conn = Redis.from_url(redis_url) - redis_conn.ping() # Force connection check - queue = Queue(connection=redis_conn) - push_connection(redis_conn) # See https://github.com/nvie/rq/issues/479 - return queue + global _queue + if _queue is None: + log.debug(u'Initializing the background job queue') + redis_conn = connect_to_redis() + _queue = Queue(connection=redis_conn) + push_connection(redis_conn) # https://github.com/nvie/rq/issues/479 + return _queue def enqueue(fn, args=None, title=None): @@ -60,7 +97,7 @@ def enqueue(fn, args=None, title=None): ''' if args is None: args = [] - job = queue.enqueue_call(func=fn, args=args) + job = get_queue().enqueue_call(func=fn, args=args) job.meta[u'title'] = title job.save() if title: @@ -82,10 +119,10 @@ def from_id(id): :raises KeyError: if no job with that ID exists. ''' - for job in queue.jobs: + for job in get_queue().jobs: if job.id == id: return job - raise KeyError('No such job: "{}"'.format(id)) + raise KeyError(u'No such job: "{}"'.format(id)) def dictize_job(job): @@ -101,9 +138,9 @@ def dictize_job(job): :rtype: dict ''' return { - 'id': job.id, - 'title': job.meta.get('title'), - 'created': job.created_at.isoformat(), + u'id': job.id, + u'title': job.meta.get(u'title'), + u'created': job.created_at.isoformat(), } @@ -111,27 +148,40 @@ def test_job(*args): u'''Test job. A test job for debugging purposes. Prints out any arguments it - receives. + receives. Can be scheduled via ``paster jobs test``. ''' print(args) class Worker(RqWorker): u''' - Custom worker with CKAN-specific logging. + CKAN-specific worker. ''' + def __init__(self, queues=None, *args, **kwargs): + u''' + Constructor. + + Accepts the same arguments as the constructor of + ``rq.worker.Worker``. However, ``queues`` defaults to the CKAN + background job queue. + ''' + if queues is None: + queues = get_queue() + super(Worker, self).__init__(queues, *args, **kwargs) + def register_birth(self, *args, **kwargs): result = super(Worker, self).register_birth(*args, **kwargs) - log.info('Worker {} has started (PID: {})'.format(self.key, self.pid)) + log.info(u'Worker {} has started (PID: {})'.format(self.key, self.pid)) return result def execute_job(self, job, *args, **kwargs): - log.info('Worker {} starts to execute job {}'.format(self.key, job.id)) + log.info(u'Worker {} starts executing job {}'.format(self.key, job.id)) result = super(Worker, self).execute_job(job, *args, **kwargs) - log.info('Worker {} has finished executing job {}'.format(self.key, job.id)) + log.info(u'Worker {} has finished executing job {}'.format( + self.key, job.id)) return result def register_death(self, *args, **kwargs): result = super(Worker, self).register_death(*args, **kwargs) - log.info('Worker {} has stopped'.format(self.key)) + log.info(u'Worker {} has stopped'.format(self.key)) return result diff --git a/ckan/logic/action/delete.py b/ckan/logic/action/delete.py index b98b91de7b2..7314b365cdc 100644 --- a/ckan/logic/action/delete.py +++ b/ckan/logic/action/delete.py @@ -715,7 +715,7 @@ def job_clear(context, data_dict): Does not affect jobs that are already being processed. ''' _check_access('job_clear', context, data_dict) - jobs.queue.empty() + jobs.get_queue().empty() log.warn('Cleared background job queue') diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 074ecfc6224..65244fc54aa 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -3510,7 +3510,7 @@ def job_list(context, data_dict): :rtype: list ''' _check_access('job_list', context, data_dict) - return [jobs.dictize_job(job) for job in jobs.queue.jobs] + return [jobs.dictize_job(job) for job in jobs.get_queue().jobs] def job_show(context, data_dict): diff --git a/ckan/websetup.py b/ckan/websetup.py index 0f1c84aadda..48365a5f81f 100644 --- a/ckan/websetup.py +++ b/ckan/websetup.py @@ -10,11 +10,9 @@ def setup_app(command, conf, vars): """Place any commands to setup ckan here""" load_environment(conf.global_conf, conf.local_conf) - + from ckan import model log.debug('Creating tables') model.repo.create_db() log.info('Creating tables: SUCCESS') - from ckan.lib.jobs import init_queue - init_queue() From dd2edc3fcb6ef9ddf0357847bebaa0c2d4315376 Mon Sep 17 00:00:00 2001 From: Florian Brucker Date: Mon, 18 Jul 2016 10:48:21 +0200 Subject: [PATCH 30/65] [#2977] Mark Celery features as deprecated. --- ckan/lib/celery_app.py | 8 ++++++++ ckan/lib/cli.py | 5 +++++ ckan/logic/action/get.py | 2 ++ 3 files changed, 15 insertions(+) diff --git a/ckan/lib/celery_app.py b/ckan/lib/celery_app.py index 9f66e85f33b..047acfa1f01 100644 --- a/ckan/lib/celery_app.py +++ b/ckan/lib/celery_app.py @@ -1,5 +1,11 @@ # encoding: utf-8 +''' +Celery background tasks management. + +This module is DEPRECATED, use ``ckan.lib.jobs`` instead. +''' + import ConfigParser import os import logging @@ -9,6 +15,8 @@ log = logging.getLogger(__name__) +log.warning('ckan.lib.celery_app is deprecated, use ckan.lib.jobs instead.') + LIST_PARAMS = """CELERY_IMPORTS ADMINS ROUTES""".split() from celery import Celery diff --git a/ckan/lib/cli.py b/ckan/lib/cli.py index b25eded4af1..97a59cf537b 100644 --- a/ckan/lib/cli.py +++ b/ckan/lib/cli.py @@ -946,6 +946,8 @@ def purge(self, dataset_ref): class Celery(CkanCommand): '''Celery daemon + This command is DEPRECATED, use `paster jobs` instead. + Usage: celeryd - run the celery daemon celeryd run concurrency - run the celery daemon with @@ -973,6 +975,7 @@ def command(self): error('Command %s not recognized' % cmd) def run_(self): + deprecation_warning(u'Use `paster jobs worker` instead.') default_ini = os.path.join(os.getcwd(), 'development.ini') if self.options.config: @@ -989,6 +992,7 @@ def run_(self): celery.worker_main(argv=['celeryd', '--loglevel=INFO'] + celery_args) def view(self): + deprecation_warning(u'Use `paster jobs list` instead.') self._load_config() import ckan.model as model from kombu.transport.sqlalchemy.models import Message @@ -1003,6 +1007,7 @@ def view(self): print '%i: Invisible Sent:%s' % (message.id, message.sent_at) def clean(self): + deprecation_warning(u'Use `paster jobs clear` instead.') self._load_config() import ckan.model as model query = model.Session.execute("select * from kombu_message") diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 65244fc54aa..200b71a69bd 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -1114,6 +1114,8 @@ def resource_view_list(context, data_dict): def resource_status_show(context, data_dict): '''Return the statuses of a resource's tasks. + This function is DEPRECATED. + :param id: the id of the resource :type id: string From e0afc7cbd423377960f852067984ddd97350870a Mon Sep 17 00:00:00 2001 From: Florian Brucker Date: Mon, 18 Jul 2016 11:15:23 +0200 Subject: [PATCH 31/65] [#2977] Log exceptions raised by background jobs. --- ckan/lib/jobs.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ckan/lib/jobs.py b/ckan/lib/jobs.py index 185d166c712..c3ee2140dc4 100644 --- a/ckan/lib/jobs.py +++ b/ckan/lib/jobs.py @@ -185,3 +185,8 @@ def register_death(self, *args, **kwargs): result = super(Worker, self).register_death(*args, **kwargs) log.info(u'Worker {} has stopped'.format(self.key)) return result + + def handle_exception(self, job, *exc_info): + log.exception((u'Worker {} raised an exception while executing ' + u'job {}: {}').format(self.key, job.id, exc_info[1])) + return super(Worker, self).handle_exception(job, *exc_info) From 7ded14794cde0f34004d03b82d29f74ecb6cee16 Mon Sep 17 00:00:00 2001 From: Florian Brucker Date: Tue, 19 Jul 2016 13:39:20 +0200 Subject: [PATCH 32/65] [#2977] Add Redis to installation instructions. --- doc/maintaining/installing/install-from-package.rst | 2 +- doc/maintaining/installing/install-from-source.rst | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/maintaining/installing/install-from-package.rst b/doc/maintaining/installing/install-from-package.rst index 37125251238..43ebfd43ba2 100644 --- a/doc/maintaining/installing/install-from-package.rst +++ b/doc/maintaining/installing/install-from-package.rst @@ -29,7 +29,7 @@ CKAN: #. Install the Ubuntu packages that CKAN requires (and 'git', to enable you to install CKAN extensions):: - sudo apt-get install -y nginx apache2 libapache2-mod-wsgi libpq5 git-core + sudo apt-get install -y nginx apache2 libapache2-mod-wsgi libpq5 redis-server git-core #. Download the CKAN package: diff --git a/doc/maintaining/installing/install-from-source.rst b/doc/maintaining/installing/install-from-source.rst index 4bcc247ca26..dcb94d7c7ba 100644 --- a/doc/maintaining/installing/install-from-source.rst +++ b/doc/maintaining/installing/install-from-source.rst @@ -23,7 +23,7 @@ work on CKAN. If you're using a Debian-based operating system (such as Ubuntu) install the required packages with this command:: - sudo apt-get install python-dev postgresql libpq-dev python-pip python-virtualenv git-core solr-jetty openjdk-6-jdk + sudo apt-get install python-dev postgresql libpq-dev python-pip python-virtualenv git-core solr-jetty openjdk-6-jdk redis-server If you're not using a Debian-based operating system, find the best way to install the following packages on your operating system (see @@ -42,6 +42,7 @@ Git `A distributed version control system `_ Jetty `An HTTP server `_ (used for Solr). OpenJDK 6 JDK `The Java Development Kit `_ +Redis `An in-memory data structure store `_ ===================== =============================================== From b264a8507c3c9469c2fbefd60a2e08b6fe0f406b Mon Sep 17 00:00:00 2001 From: Florian Brucker Date: Tue, 19 Jul 2016 15:59:45 +0200 Subject: [PATCH 33/65] [#2977] Add `redis_url` configuration option. --- ckan/config/deployment.ini_tmpl | 10 ++++++++++ ckan/config/environment.py | 1 + ckan/lib/jobs.py | 2 +- doc/maintaining/configuration.rst | 22 ++++++++++++++++++++++ 4 files changed, 34 insertions(+), 1 deletion(-) diff --git a/ckan/config/deployment.ini_tmpl b/ckan/config/deployment.ini_tmpl index 73acd9bb6d7..700a914dd94 100644 --- a/ckan/config/deployment.ini_tmpl +++ b/ckan/config/deployment.ini_tmpl @@ -79,6 +79,16 @@ ckan.site_id = default #solr_url = http://127.0.0.1:8983/solr +## Redis Settings + +# URL to your Redis instance, including the database to be used. Note +# that you cannot use the same Redis database for more than one CKAN +# instance (although you can use separate databases on the same Redis +# instance). + +#redis_url = redis://localhost:6379/0 + + ## CORS Settings # If cors.origin_allow_all is true, all origins are allowed. diff --git a/ckan/config/environment.py b/ckan/config/environment.py index 15d1124f1af..d56e7bd1dde 100644 --- a/ckan/config/environment.py +++ b/ckan/config/environment.py @@ -111,6 +111,7 @@ def find_controller(self, controller): 'sqlalchemy.url': 'CKAN_SQLALCHEMY_URL', 'ckan.datastore.write_url': 'CKAN_DATASTORE_WRITE_URL', 'ckan.datastore.read_url': 'CKAN_DATASTORE_READ_URL', + 'redis_url': 'CKAN_REDIS_URL', 'solr_url': 'CKAN_SOLR_URL', 'ckan.site_id': 'CKAN_SITE_ID', 'ckan.site_url': 'CKAN_SITE_URL', diff --git a/ckan/lib/jobs.py b/ckan/lib/jobs.py index c3ee2140dc4..f6ba0797f06 100644 --- a/ckan/lib/jobs.py +++ b/ckan/lib/jobs.py @@ -18,7 +18,7 @@ log = logging.getLogger(__name__) -REDIS_URL_SETTING_NAME = u'ckan.jobs.redis_url' +REDIS_URL_SETTING_NAME = u'redis_url' REDIS_URL_DEFAULT_VALUE = u'redis://localhost:6379/0' diff --git a/doc/maintaining/configuration.rst b/doc/maintaining/configuration.rst index 52910fdb1c6..f5adb023af2 100644 --- a/doc/maintaining/configuration.rst +++ b/doc/maintaining/configuration.rst @@ -695,6 +695,28 @@ Default value: ``None`` List of the extra resource fields that would be used when searching. +Redis Settings +--------------- + +.. _redis_url: + +redis_url +^^^^^^^^^ + +Example:: + + redis_url = redis://localhost:7000/1 + +Default value: ``redis://localhost:6379/0`` + +URL to your Redis instance, including the database to be used. + +.. note:: + + If you're hosting multiple CKAN instances then you need to use a separate + Redis database for each of them. + + CORS Settings ------------- From 541558a81b88cbeaf235d17113532d769efa3824 Mon Sep 17 00:00:00 2001 From: Florian Brucker Date: Fri, 22 Jul 2016 12:02:30 +0200 Subject: [PATCH 34/65] [#2977] Add support for multiple queues. The background job system now supports multiple queues. By default, a single queue is used. In addition, RQ queue names are now prefixed with the CKAN site ID so that multiple CKAN instances can share the same Redis database. --- ckan/config/deployment.ini_tmpl | 6 +- ckan/lib/cli.py | 93 ++++++++++++++------- ckan/lib/jobs.py | 134 ++++++++++++++++++++++++------ ckan/logic/action/delete.py | 25 ++++-- ckan/logic/action/get.py | 17 +++- ckan/logic/schema.py | 12 +++ doc/maintaining/configuration.rst | 5 -- 7 files changed, 221 insertions(+), 71 deletions(-) diff --git a/ckan/config/deployment.ini_tmpl b/ckan/config/deployment.ini_tmpl index 700a914dd94..68c328e5b93 100644 --- a/ckan/config/deployment.ini_tmpl +++ b/ckan/config/deployment.ini_tmpl @@ -81,11 +81,7 @@ ckan.site_id = default ## Redis Settings -# URL to your Redis instance, including the database to be used. Note -# that you cannot use the same Redis database for more than one CKAN -# instance (although you can use separate databases on the same Redis -# instance). - +# URL to your Redis instance, including the database to be used. #redis_url = redis://localhost:6379/0 diff --git a/ckan/lib/cli.py b/ckan/lib/cli.py index 97a59cf537b..e24156144d4 100644 --- a/ckan/lib/cli.py +++ b/ckan/lib/cli.py @@ -2567,15 +2567,43 @@ class JobsCommand(CkanCommand): Usage: - paster jobs worker - Start a worker that fetches jobs - from the queue and executes them - paster jobs list - List currently enqueued jobs - paster jobs cancel - Cancel a specific job - paster jobs clear - Cancel all jobs - paster jobs test - Enqueue a test job - - See also the `worker` command for running workers that fetch - and execute jobs. + paster jobs worker [QUEUES] + + Start a worker that fetches jobs from queues and executes + them. If no queue names are given then the worker listens + to the default queue, this is equivalent to + + paster jobs worker default + + If queue names are given then the worker listens to those + queues and only those: + + paster jobs worker my-custom-queue + + Hence, if you want the worker to listen to the default queue + and some others then you must list the default queue explicitly: + + paster jobs worker default my-custom-queue + + paster jobs list [QUEUES] + + List currently enqueued jobs from the given queues. If no queue + names are given then the jobs from all queues are listed. + + paster jobs cancel ID + + Cancel a specific job. + + paster jobs clear [QUEUES] + + Cancel all jobs on the given queues. If no queue names are + given then ALL queues are cleared. + + paster jobs test [QUEUES] + + Enqueue a test job. If no queue names are given then the job is + added to the default queue. If queue names are given then a + separate test job is added to each of the queues. ''' summary = __doc__.split(u'\n')[0] @@ -2584,13 +2612,12 @@ class JobsCommand(CkanCommand): def command(self): self._load_config() - - if not self.args: + try: + cmd = self.args.pop(0) + except IndexError: print(self.__doc__) sys.exit(0) - - cmd = self.args.pop(0) - if cmd == 'worker': + if cmd == u'worker': self.worker() elif cmd == u'list': self.list() @@ -2605,30 +2632,40 @@ def command(self): def worker(self): from ckan.lib.jobs import Worker - Worker().work() + Worker(self.args).work() def list(self): - jobs = p.toolkit.get_action('job_list')({}, {}) + data_dict = { + u'queues': self.args, + } + jobs = p.toolkit.get_action(u'job_list')({}, data_dict) for job in jobs: - if job['title'] is None: - job['title'] = '' - print('{created} {id} {title}'.format(**job)) + if job[u'title'] is None: + job[u'title'] = '' + else: + job[u'title'] = u'"{}"'.format(job[u'title']) + print(u'{created} {id} {queue} {title}'.format(**job)) def cancel(self): if not self.args: - error('You must specify a job ID') + error(u'You must specify a job ID') id = self.args[0] try: - p.toolkit.get_action('job_cancel')({}, {'id': id}) + p.toolkit.get_action(u'job_cancel')({}, {u'id': id}) except logic.NotFound: - error('There is no job with ID "{}"'.format(id)) - print('Cancelled job {}'.format(id)) + error(u'There is no job with ID "{}"'.format(id)) + print(u'Cancelled job {}'.format(id)) def clear(self): - p.toolkit.get_action('job_clear')({}, {}) - print('Cleared the job queue') + data_dict = { + u'queues': self.args, + } + queues = p.toolkit.get_action(u'job_clear')({}, data_dict) + queues = (u'"{}"'.format(q) for q in queues) + print(u'Cleared queue(s) {}'.format(u', '.join(queues))) def test(self): - from ckan.lib.jobs import enqueue, test_job - job = enqueue(test_job, ['A test job'], title='A test job') - print('Enqueued test job {}'.format(job.id)) + from ckan.lib.jobs import DEFAULT_QUEUE_NAME, enqueue, test_job + for queue in (self.args or [DEFAULT_QUEUE_NAME]): + job = enqueue(test_job, [u'A test job'], title=u'A test job', queue=queue) + print(u'Added test job {} to queue "{}"'.format(job.id, queue)) diff --git a/ckan/lib/jobs.py b/ckan/lib/jobs.py index f6ba0797f06..cfb84138bc7 100644 --- a/ckan/lib/jobs.py +++ b/ckan/lib/jobs.py @@ -6,6 +6,14 @@ Note that most job management functions are not available from this module but via the various ``job_*`` API functions. + +Internally, RQ queue names are prefixed with a string containing the +CKAN site ID to avoid key collisions when the same Redis database is +used for multiple CKAN instances. The functions of this module expect +unprefixed queue names (e.g. ``'default'``) unless noted otherwise. The +raw RQ objects (e.g. a queue returned by ``get_queue``) use the full, +prefixed names. Use the functions ``add_queue_name_prefix`` and +``remove_queue_name_prefix`` to manage queue name prefixes. ''' import logging @@ -14,6 +22,7 @@ from redis import ConnectionPool, Redis from rq import Queue, Worker as RqWorker from rq.connections import push_connection +from rq.utils import ensure_list log = logging.getLogger(__name__) @@ -22,12 +31,43 @@ REDIS_URL_DEFAULT_VALUE = u'redis://localhost:6379/0' +DEFAULT_QUEUE_NAME = u'default' + # Redis connection pool. Do not use this directly, use ``connect_to_redis`` # instead. _connection_pool = None -# RQ job queue. Do not use this directly, use ``get_queue`` instead. -_queue = None +# RQ job queues. Do not use this directly, use ``get_queue`` instead. +_queues = {} + + +def get_queue_name_prefix(): + u''' + Get the queue name prefix. + ''' + # This must be done at runtime since we need a loaded config + return u'ckan:{}:'.format(config[u'ckan.site_id']) + + +def add_queue_name_prefix(name): + u''' + Prefix a queue name. + + .. seealso:: :py:func:`remove_queue_name_prefix` + ''' + return get_queue_name_prefix() + name + + +def remove_queue_name_prefix(name): + u''' + Remove a queue name's prefix. + + .. seealso:: :py:func:`add_queue_name_prefix` + ''' + prefix = get_queue_name_prefix() + if name.startswith(prefix): + name = name[len(prefix):] + return name def connect_to_redis(): @@ -39,13 +79,17 @@ def connect_to_redis(): :returns: A lazy Redis connection. :rtype: ``redis.Redis`` + + .. seealso:: :py:func:`is_available` ''' global _connection_pool if _connection_pool is None: url = config.get(REDIS_URL_SETTING_NAME, REDIS_URL_DEFAULT_VALUE) log.debug(u'Using Redis at {}'.format(url)) _connection_pool = ConnectionPool.from_url(url) - return Redis(connection_pool=_connection_pool) + conn = Redis(connection_pool=_connection_pool) + push_connection(conn) # https://github.com/nvie/rq/issues/479 + return conn def is_available(): @@ -54,6 +98,8 @@ def is_available(): :returns: The availability of Redis. :rtype: boolean + + .. seealso:: :py:func:`connect_to_redis` ''' redis_conn = connect_to_redis() try: @@ -63,25 +109,47 @@ def is_available(): return False -def get_queue(): +def get_all_queues(): u''' - Get the job queue. + Return all job queues. + + :returns: A list of all queues. + :rtype: List of ``rq.queue.Queue`` instances + + .. seealso:: :py:func:`get_queue` + ''' + redis_conn = connect_to_redis() + prefix = get_queue_name_prefix() + return [q for q in Queue.all(connection=redis_conn) if + q.name.startswith(prefix)] + + +def get_queue(name=DEFAULT_QUEUE_NAME): + u''' + Get a job queue. The job queue is initialized if that hasn't happened before. + :param string name: The name of the queue. If not given then the + default queue is returned. + :returns: The job queue. :rtype: ``rq.queue.Queue`` + + .. seealso:: :py:func:`get_all_queues` ''' - global _queue - if _queue is None: - log.debug(u'Initializing the background job queue') + global _queues + fullname = add_queue_name_prefix(name) + try: + return _queues[fullname] + except KeyError: + log.debug(u'Initializing background job queue "{}"'.format(name)) redis_conn = connect_to_redis() - _queue = Queue(connection=redis_conn) - push_connection(redis_conn) # https://github.com/nvie/rq/issues/479 - return _queue + queue = _queues[fullname] = Queue(fullname, connection=redis_conn) + return queue -def enqueue(fn, args=None, title=None): +def enqueue(fn, args=None, title=None, queue=DEFAULT_QUEUE_NAME): u''' Enqueue a job to be run in the background. @@ -92,18 +160,21 @@ def enqueue(fn, args=None, title=None): :param string title: Optional human-readable title of the job. + :param string queue: Name of the queue. If not given then the + default queue is used. + :returns: The enqueued job. :rtype: ``rq.job.Job`` ''' if args is None: args = [] - job = get_queue().enqueue_call(func=fn, args=args) + job = get_queue(queue).enqueue_call(func=fn, args=args) job.meta[u'title'] = title job.save() + msg = u'Added background job {}'.format(job.id) if title: - msg = u'Enqueued background job "{}" ({})'.format(title, job.id) - else: - msg = u'Enqueued background job {}'.format(job.id) + msg = u'{} ("{}")'.format(msg, title) + msg = u'{} to queue "{}"'.format(msg, queue) log.info(msg) return job @@ -119,9 +190,10 @@ def from_id(id): :raises KeyError: if no job with that ID exists. ''' - for job in get_queue().jobs: - if job.id == id: - return job + for queue in get_all_queues(): + for job in queue.jobs: + if job.id == id: + return job raise KeyError(u'No such job: "{}"'.format(id)) @@ -141,6 +213,7 @@ def dictize_job(job): u'id': job.id, u'title': job.meta.get(u'title'), u'created': job.created_at.isoformat(), + u'queue': remove_queue_name_prefix(job.origin), } @@ -162,20 +235,29 @@ def __init__(self, queues=None, *args, **kwargs): Constructor. Accepts the same arguments as the constructor of - ``rq.worker.Worker``. However, ``queues`` defaults to the CKAN - background job queue. + ``rq.worker.Worker``. However, the behavior of the ``queues`` + parameter is different. + + :param queues: The job queue(s) to listen on. Can be a string + with the name of a single queue or a list of queue names. + If not given then the default queue is used. ''' - if queues is None: - queues = get_queue() + queues = queues or [DEFAULT_QUEUE_NAME] + queues = [get_queue(q) for q in ensure_list(queues)] super(Worker, self).__init__(queues, *args, **kwargs) def register_birth(self, *args, **kwargs): result = super(Worker, self).register_birth(*args, **kwargs) - log.info(u'Worker {} has started (PID: {})'.format(self.key, self.pid)) + names = [remove_queue_name_prefix(n) for n in self.queue_names()] + names = u', '.join(u'"{}"'.format(n) for n in names) + log.info(u'Worker {} (PID {}) has started on queue(s) {} '.format( + self.key, self.pid, names)) return result def execute_job(self, job, *args, **kwargs): - log.info(u'Worker {} starts executing job {}'.format(self.key, job.id)) + queue = remove_queue_name_prefix(job.origin) + log.info(u'Worker {} starts executing job {} from queue "{}"'.format( + self.key, job.id, queue)) result = super(Worker, self).execute_job(job, *args, **kwargs) log.info(u'Worker {} has finished executing job {}'.format( self.key, job.id)) @@ -183,7 +265,7 @@ def execute_job(self, job, *args, **kwargs): def register_death(self, *args, **kwargs): result = super(Worker, self).register_death(*args, **kwargs) - log.info(u'Worker {} has stopped'.format(self.key)) + log.info(u'Worker {} (PID {}) has stopped'.format(self.key, self.pid)) return result def handle_exception(self, job, *exc_info): diff --git a/ckan/logic/action/delete.py b/ckan/logic/action/delete.py index 7314b365cdc..fe9061c5a98 100644 --- a/ckan/logic/action/delete.py +++ b/ckan/logic/action/delete.py @@ -709,14 +709,29 @@ def unfollow_group(context, data_dict): context['model'].UserFollowingGroup) +@ckan.logic.validate(ckan.logic.schema.job_clear_schema) def job_clear(context, data_dict): - '''Clear all enqueued background jobs. + '''Clear background job queues. Does not affect jobs that are already being processed. + + :param list queues: The queues to clear. If not given then ALL + queues are cleared. + + :returns: The cleared queues. + :rtype: list ''' - _check_access('job_clear', context, data_dict) - jobs.get_queue().empty() - log.warn('Cleared background job queue') + _check_access(u'job_clear', context, data_dict) + queues = data_dict.get(u'queues') + if queues: + queues = [jobs.get_queue(q) for q in queues] + else: + queues = jobs.get_all_queues() + names = [jobs.remove_queue_name_prefix(queue.name) for queue in queues] + for queue, name in zip(queues, names): + queue.empty() + log.warn(u'Cleared background job queue "{}"'.format(name)) + return names def job_cancel(context, data_dict): @@ -730,6 +745,6 @@ def job_cancel(context, data_dict): id = _get_or_bust(data_dict, u'id') try: jobs.from_id(id).cancel() - log.warn('Cancelled background job {}'.format(id)) + log.warn(u'Cancelled background job {}'.format(id)) except KeyError: raise NotFound diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 200b71a69bd..ce714e8a28c 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -3505,14 +3505,27 @@ def config_option_list(context, data_dict): return schema.keys() +@logic.validate(logic.schema.job_list_schema) def job_list(context, data_dict): '''List enqueued background jobs. + :param list queues: Queues to list jobs from. If not given then the + jobs from all queues are listed. + :returns: The currently enqueued background jobs. :rtype: list ''' - _check_access('job_list', context, data_dict) - return [jobs.dictize_job(job) for job in jobs.get_queue().jobs] + _check_access(u'job_list', context, data_dict) + dictized_jobs = [] + queues = data_dict.get(u'queues') + if queues: + queues = [jobs.get_queue(q) for q in queues] + else: + queues = jobs.get_all_queues() + for queue in queues: + for job in queue.jobs: + dictized_jobs.append(jobs.dictize_job(job)) + return dictized_jobs def job_show(context, data_dict): diff --git a/ckan/logic/schema.py b/ckan/logic/schema.py index 78ac4ed5a40..259c74cdc8c 100644 --- a/ckan/logic/schema.py +++ b/ckan/logic/schema.py @@ -701,3 +701,15 @@ def update_configuration_schema(): schema = plugin.update_config_schema(schema) return schema + + +def job_list_schema(): + return { + u'queues': [ignore_missing, list_of_strings], + } + + +def job_clear_schema(): + return { + u'queues': [ignore_missing, list_of_strings], + } diff --git a/doc/maintaining/configuration.rst b/doc/maintaining/configuration.rst index f5adb023af2..c07ada7c8cc 100644 --- a/doc/maintaining/configuration.rst +++ b/doc/maintaining/configuration.rst @@ -711,11 +711,6 @@ Default value: ``redis://localhost:6379/0`` URL to your Redis instance, including the database to be used. -.. note:: - - If you're hosting multiple CKAN instances then you need to use a separate - Redis database for each of them. - CORS Settings ------------- From b3be3208d0803cff0730c29e8e7cbd533c9c247f Mon Sep 17 00:00:00 2001 From: Florian Brucker Date: Mon, 25 Jul 2016 08:25:03 +0200 Subject: [PATCH 35/65] [#2977] Document requirement to namespace Redis keys. Documents that Redis keys should be prefixed with the site ID and the extension name. --- doc/extensions/best-practices.rst | 56 +++++++++++++++++++---- doc/extensions/custom-config-settings.rst | 2 +- doc/theming/best-practices.rst | 38 ++------------- doc/theming/javascript.rst | 3 +- doc/theming/templates.rst | 6 +-- 5 files changed, 56 insertions(+), 49 deletions(-) diff --git a/doc/extensions/best-practices.rst b/doc/extensions/best-practices.rst index 422dc50bec2..5fe67f5ee94 100644 --- a/doc/extensions/best-practices.rst +++ b/doc/extensions/best-practices.rst @@ -37,17 +37,57 @@ Implement each plugin class in a separate Python module This keeps CKAN's plugin loading order simple, see :ref:`ckan.plugins`. -.. _extension config setting names best practice: +.. _avoid name clashes: ------------------------------------------------------------------ -Names of config settings should include the name of the extension ------------------------------------------------------------------ +------------------ +Avoid name clashes +------------------ +Many of the names you pick for your identifiers and files must be unique in +relation to the names used by core CKAN and other extensions. To avoid +conflicts you should prefix any public name that your extension introduces with +the name of your extension. For example: -Names of config settings provided by extensions should include the name -of the extension, to avoid conflicting with core config settings or with -config settings from other extensions. For example:: +* The names of *configuration settings* introduced by your extension should + have the form ``ckan.my_extension.my_config_setting``. - ckan.my_extension.show_most_popular_groups = True +* The names of *templates and template snippets* introduced by your extension + should begin with the name of your extension:: + + snippets/my_extension_useful_snippet.html + + If you have add a lot of templates you can also put them into a separate + folder named after your extension instead. + +* The names of *template helper functions* introduced by your extension should + begin with the name of your extension. For example: + + .. literalinclude:: /../ckanext/example_theme/v08_custom_helper_function/plugin.py + :pyobject: ExampleThemePlugin.get_helpers + +* The names of *JavaScript modules* introduced by your extension should begin + with the name of your extension. For example + ``fanstatic/example_theme_popover.js``: + + .. literalinclude:: /../ckanext/example_theme/v16_initialize_a_javascript_module/fanstatic/example_theme_popover.js + +* The names of *API action functions* introduced by your extension should begin + with the name of your extension. For example + ``my_extension_foobarize_everything``. + +In some situations, resources like databases may even be shared between +multiple CKAN *instances*, which requires an even higher degree of uniqueness +for the corresponding names. In that case, you should also prefix your +identifiers with the CKAN site ID, which is available via + +:: + + from pylons import config + site_id = config[u'ckan.site_id'] + +Currently this only affects the :ref:`Redis database `: + +* All *keys in the Redis database* created by your extension should be prefixed + with both the CKAN site ID and your extension's name. ------------------------------------- diff --git a/doc/extensions/custom-config-settings.rst b/doc/extensions/custom-config-settings.rst index 81c350fdc1c..f9d0ef0589c 100644 --- a/doc/extensions/custom-config-settings.rst +++ b/doc/extensions/custom-config-settings.rst @@ -32,7 +32,7 @@ will be allowed to create groups. Names of config settings provided by extensions should include the name of the extension, to avoid conflicting with core config settings or with config settings from other extensions. - See :ref:`extension config setting names best practice`. + See :ref:`avoid name clashes`. .. note:: diff --git a/doc/theming/best-practices.rst b/doc/theming/best-practices.rst index 91b9bc5f44e..9126b165d29 100644 --- a/doc/theming/best-practices.rst +++ b/doc/theming/best-practices.rst @@ -50,42 +50,12 @@ All user-visible strings should be internationalized, see :doc:`/contributing/string-i18n`. ------------------------------------------------------------------ -Helper function names should begin with the name of the extension ------------------------------------------------------------------ +------------------ +Avoid name clashes +------------------ -Namespacing helper functions in this way avoids accidentally overriding, or -being overriden by, a core helper function, or a helper function from another -extension. For example: +See :ref:`avoid name clashes`. -.. literalinclude:: /../ckanext/example_theme/v08_custom_helper_function/plugin.py - :pyobject: ExampleThemePlugin.get_helpers - - -.. _snippet filenames best practice: - -------------------------------------------------------------- -Snippet filenames should begin with the name of the extension -------------------------------------------------------------- - -Namespacing snippets in this way avoids accidentally overriding, or being -overridden by, a core snippet, or a snippet from another extension. -For example:: - - snippets/example_theme_most_popular_groups.html - - -.. _javascript module names best practice: - ----------------------------------------------------------------------- -|javascript| modules names should begin with the name of the extension ----------------------------------------------------------------------- - -Namespacing |javascript| modules in this way avoids accidentally overriding, or -being overridden by, a core module, or a module from another extension. For -example: ``fanstatic/example_theme_popover.js``: - -.. literalinclude:: /../ckanext/example_theme/v16_initialize_a_javascript_module/fanstatic/example_theme_popover.js .. _javascript module docstrings best practice: diff --git a/doc/theming/javascript.rst b/doc/theming/javascript.rst index a1feaef4c0f..15ed997ea19 100644 --- a/doc/theming/javascript.rst +++ b/doc/theming/javascript.rst @@ -88,8 +88,7 @@ To get CKAN to call some custom JavaScript code, we need to: .. note:: |javascript| module names should begin with the name of the extension, - to avoid conflicting with other modules. - See :ref:`javascript module names best practice`. + to avoid conflicting with other modules. See :ref:`avoid name clashes`. .. note:: diff --git a/doc/theming/templates.rst b/doc/theming/templates.rst index 91ef55e1ab7..479fc1124c0 100644 --- a/doc/theming/templates.rst +++ b/doc/theming/templates.rst @@ -657,8 +657,7 @@ should see the most popular groups rendered differently: To avoid unintended conflicts, we recommend that snippet filenames begin with the name of the extension they belong to, e.g. - ``snippets/example_theme_*.html``. - See :ref:`snippet filenames best practice`. + ``snippets/example_theme_*.html``. See :ref:`avoid name clashes`. .. note:: @@ -754,8 +753,7 @@ the most popular groups on the front page. First, add a new helper function to Names of config settings provided by extensions should include the name of the extension, to avoid conflicting with core config settings or with - config settings from other extensions. - See :ref:`extension config setting names best practice`. + config settings from other extensions. See :ref:`avoid name clashes`. Now we can call this helper function from our ``layout1.html`` template: From e9e7e3c7a74500afda9da58ec8ed1e0cb3632aeb Mon Sep 17 00:00:00 2001 From: Florian Brucker Date: Mon, 25 Jul 2016 11:43:36 +0200 Subject: [PATCH 36/65] [#2977] Add tests for `ckan.lib.jobs`. --- ckan/config/environment.py | 7 +- ckan/lib/jobs.py | 114 +++++++---------- ckan/lib/redis.py | 61 +++++++++ ckan/tests/helpers.py | 94 ++++++++++++-- ckan/tests/lib/test_jobs.py | 244 ++++++++++++++++++++++++++++++++++++ doc/contributing/test.rst | 2 + test-core.ini | 3 + 7 files changed, 441 insertions(+), 84 deletions(-) create mode 100644 ckan/lib/redis.py create mode 100644 ckan/tests/lib/test_jobs.py diff --git a/ckan/config/environment.py b/ckan/config/environment.py index d56e7bd1dde..cc19d1e9bd7 100644 --- a/ckan/config/environment.py +++ b/ckan/config/environment.py @@ -16,7 +16,7 @@ import ckan.plugins as p import ckan.lib.helpers as helpers import ckan.lib.app_globals as app_globals -import ckan.lib.jobs as jobs +from ckan.lib.redis import is_redis_available import ckan.lib.render as render import ckan.lib.search as search import ckan.logic as logic @@ -95,9 +95,8 @@ def find_controller(self, controller): warnings.filterwarnings('ignore', msg, sqlalchemy.exc.SAWarning) # Check Redis availability - if not jobs.is_available(): - log.critical('Could not connect to Redis. The background job queue ' - 'will not be available.') + if not is_redis_available(): + log.critical('Could not connect to Redis.') # load all CKAN plugins p.load_all() diff --git a/ckan/lib/jobs.py b/ckan/lib/jobs.py index cfb84138bc7..72e1941e10d 100644 --- a/ckan/lib/jobs.py +++ b/ckan/lib/jobs.py @@ -19,29 +19,35 @@ import logging from pylons import config -from redis import ConnectionPool, Redis from rq import Queue, Worker as RqWorker from rq.connections import push_connection +from rq.exceptions import NoSuchJobError +from rq.job import Job from rq.utils import ensure_list +from ckan.lib.redis import connect_to_redis -log = logging.getLogger(__name__) - -REDIS_URL_SETTING_NAME = u'redis_url' -REDIS_URL_DEFAULT_VALUE = u'redis://localhost:6379/0' +log = logging.getLogger(__name__) DEFAULT_QUEUE_NAME = u'default' -# Redis connection pool. Do not use this directly, use ``connect_to_redis`` -# instead. -_connection_pool = None - # RQ job queues. Do not use this directly, use ``get_queue`` instead. _queues = {} -def get_queue_name_prefix(): +def _connect(): + u''' + Connect to Redis and tell RQ about it. + + Workaround for https://github.com/nvie/rq/issues/479. + ''' + conn = connect_to_redis() + push_connection(conn) + return conn + + +def _get_queue_name_prefix(): u''' Get the queue name prefix. ''' @@ -55,71 +61,34 @@ def add_queue_name_prefix(name): .. seealso:: :py:func:`remove_queue_name_prefix` ''' - return get_queue_name_prefix() + name + return _get_queue_name_prefix() + name def remove_queue_name_prefix(name): u''' Remove a queue name's prefix. - .. seealso:: :py:func:`add_queue_name_prefix` - ''' - prefix = get_queue_name_prefix() - if name.startswith(prefix): - name = name[len(prefix):] - return name - - -def connect_to_redis(): - u''' - (Lazily) connect to Redis. + :raises ValueError: if the given name is not prefixed. - The connection is set up but not actually established. The latter - happens automatically once the connection is used. - - :returns: A lazy Redis connection. - :rtype: ``redis.Redis`` - - .. seealso:: :py:func:`is_available` - ''' - global _connection_pool - if _connection_pool is None: - url = config.get(REDIS_URL_SETTING_NAME, REDIS_URL_DEFAULT_VALUE) - log.debug(u'Using Redis at {}'.format(url)) - _connection_pool = ConnectionPool.from_url(url) - conn = Redis(connection_pool=_connection_pool) - push_connection(conn) # https://github.com/nvie/rq/issues/479 - return conn - - -def is_available(): - u''' - Check whether Redis is available. - - :returns: The availability of Redis. - :rtype: boolean - - .. seealso:: :py:func:`connect_to_redis` + .. seealso:: :py:func:`add_queue_name_prefix` ''' - redis_conn = connect_to_redis() - try: - return redis_conn.ping() - except Exception: - log.exception(u'Redis is not available') - return False + prefix = _get_queue_name_prefix() + if not name.startswith(prefix): + raise ValueError(u'Queue name "{}" is not prefixed.'.format(name)) + return name[len(prefix):] def get_all_queues(): u''' - Return all job queues. + Return all (non-empty) job queues. :returns: A list of all queues. :rtype: List of ``rq.queue.Queue`` instances .. seealso:: :py:func:`get_queue` ''' - redis_conn = connect_to_redis() - prefix = get_queue_name_prefix() + redis_conn = _connect() + prefix = _get_queue_name_prefix() return [q for q in Queue.all(connection=redis_conn) if q.name.startswith(prefix)] @@ -144,12 +113,12 @@ def get_queue(name=DEFAULT_QUEUE_NAME): return _queues[fullname] except KeyError: log.debug(u'Initializing background job queue "{}"'.format(name)) - redis_conn = connect_to_redis() + redis_conn = _connect() queue = _queues[fullname] = Queue(fullname, connection=redis_conn) return queue -def enqueue(fn, args=None, title=None, queue=DEFAULT_QUEUE_NAME): +def enqueue(fn, args=None, kwargs=None, title=None, queue=DEFAULT_QUEUE_NAME): u''' Enqueue a job to be run in the background. @@ -158,6 +127,10 @@ def enqueue(fn, args=None, title=None, queue=DEFAULT_QUEUE_NAME): :param list args: List of arguments to be passed to the function. Pass an empty list if there are no arguments (default). + :param dict kwargs: Dict of keyword arguments to be passed to the + function. Pass an empty dict if there are no keyword arguments + (default). + :param string title: Optional human-readable title of the job. :param string queue: Name of the queue. If not given then the @@ -168,7 +141,9 @@ def enqueue(fn, args=None, title=None, queue=DEFAULT_QUEUE_NAME): ''' if args is None: args = [] - job = get_queue(queue).enqueue_call(func=fn, args=args) + if kwargs is None: + kwargs = {} + job = get_queue(queue).enqueue_call(func=fn, args=args, kwargs=kwargs) job.meta[u'title'] = title job.save() msg = u'Added background job {}'.format(job.id) @@ -190,11 +165,10 @@ def from_id(id): :raises KeyError: if no job with that ID exists. ''' - for queue in get_all_queues(): - for job in queue.jobs: - if job.id == id: - return job - raise KeyError(u'No such job: "{}"'.format(id)) + try: + return Job.fetch(id, connection=_connect()) + except NoSuchJobError: + raise KeyError(u'There is no job with ID "{}".'.format(id)) def dictize_job(job): @@ -256,11 +230,11 @@ def register_birth(self, *args, **kwargs): def execute_job(self, job, *args, **kwargs): queue = remove_queue_name_prefix(job.origin) - log.info(u'Worker {} starts executing job {} from queue "{}"'.format( + log.info(u'Worker {} has started job {} from queue "{}"'.format( self.key, job.id, queue)) result = super(Worker, self).execute_job(job, *args, **kwargs) - log.info(u'Worker {} has finished executing job {}'.format( - self.key, job.id)) + log.info(u'Worker {} has finished job {} from queue "{}"'.format( + self.key, job.id, queue)) return result def register_death(self, *args, **kwargs): @@ -269,6 +243,6 @@ def register_death(self, *args, **kwargs): return result def handle_exception(self, job, *exc_info): - log.exception((u'Worker {} raised an exception while executing ' - u'job {}: {}').format(self.key, job.id, exc_info[1])) + log.exception(u'Job {} on worker {} raised an exception: {}'.format( + job.id, self.key, exc_info[1])) return super(Worker, self).handle_exception(job, *exc_info) diff --git a/ckan/lib/redis.py b/ckan/lib/redis.py new file mode 100644 index 00000000000..3666ead9f8c --- /dev/null +++ b/ckan/lib/redis.py @@ -0,0 +1,61 @@ +# encoding: utf-8 + +u''' +Redis utilities. +''' + +from __future__ import absolute_import + +import datetime +import logging + +from pylons import config +from redis import ConnectionPool, Redis + + +log = logging.getLogger(__name__) + +REDIS_URL_SETTING_NAME = u'redis_url' + +REDIS_URL_DEFAULT_VALUE = u'redis://localhost:6379/0' + +# Redis connection pool. Do not use this directly, use ``connect_to_redis`` +# instead. +_connection_pool = None + + +def connect_to_redis(): + u''' + (Lazily) connect to Redis. + + The connection is set up but not actually established. The latter + happens automatically once the connection is used. + + :returns: A lazy Redis connection. + :rtype: ``redis.Redis`` + + .. seealso:: :py:func:`is_redis_available` + ''' + global _connection_pool + if _connection_pool is None: + url = config.get(REDIS_URL_SETTING_NAME, REDIS_URL_DEFAULT_VALUE) + log.debug(u'Using Redis at {}'.format(url)) + _connection_pool = ConnectionPool.from_url(url) + return Redis(connection_pool=_connection_pool) + + +def is_redis_available(): + u''' + Check whether Redis is available. + + :returns: The availability of Redis. + :rtype: boolean + + .. seealso:: :py:func:`connect_to_redis` + ''' + redis_conn = connect_to_redis() + try: + return redis_conn.ping() + except Exception: + log.exception(u'Redis is not available') + return False diff --git a/ckan/tests/helpers.py b/ckan/tests/helpers.py index bfda83a1a1e..51edb2d63f2 100644 --- a/ckan/tests/helpers.py +++ b/ckan/tests/helpers.py @@ -19,6 +19,13 @@ This module is reserved for these very useful functions. ''' + +import collections +import contextlib +import functools +import logging +import re + import webtest import nose.tools from nose.tools import assert_in, assert_not_in @@ -316,21 +323,41 @@ def test_ckan_site_title(self): :param value: the new config key's value, e.g. ``'My Test CKAN'`` :type value: string + + .. seealso:: :py:func:`changed_config` ''' def decorator(func): + @functools.wraps(func) def wrapper(*args, **kwargs): - _original_config = config.copy() - config[key] = value + with changed_config(key, value): + return func(*args, **kwargs) + return wrapper + return decorator - try: - return_value = func(*args, **kwargs) - finally: - config.clear() - config.update(_original_config) - return return_value - return nose.tools.make_decorator(func)(wrapper) - return decorator +@contextlib.contextmanager +def changed_config(key, value): + ''' + Context manager for temporarily changing a config value. + + Allows you to temporarily change the value of a CKAN configuration + option. The original value is restored once the context manager is + left. + + Usage:: + + with changed_config(u'ckan.site_title', u'My Test CKAN'): + assert config[u'ckan.site_title'] == u'My Test CKAN' + + .. seealso:: :py:func:`change_config` + ''' + _original_config = config.copy() + config[key] = value + try: + yield + finally: + config.clear() + config.update(_original_config) def mock_auth(auth_function_path): @@ -468,3 +495,50 @@ def wrapper(*args, **kwargs): return return_value return nose.tools.make_decorator(func)(wrapper) return decorator + + +class CapturingLogHandler(logging.Handler): + u''' + Log handler to check for expected logs. + + Automatically attaches itself to the root logger or to ``logger`` if + given. ``logger`` can be a string with the logger's name or an + actual ``logging.Logger`` instance. + ''' + def __init__(self, logger=None, *args, **kwargs): + super(CapturingLogHandler, self).__init__(*args, **kwargs) + self.clear() + if logger is None: + logger = logging.getLogger() + elif not isinstance(logger, logging.Logger): + logger = logging.getLogger(logger) + logger.addHandler(self) + + def emit(self, record): + self.messages[record.levelname.lower()].append(record.getMessage()) + + def assert_log(self, level, pattern, msg=None): + u''' + Assert that a certain message has been logged. + + :param string pattern: A regex which the message has to match. + The match is done using ``re.search``. + + :param string level: The message level (``'debug'``, ...). + + :param string msg: Optional failure message in case the expected + log message was not logged. + + :raises AssertionError: If the expected message was not logged. + ''' + pattern = re.compile(pattern) + for log_msg in self.messages[level]: + if pattern.search(log_msg): + return + raise AssertionError(msg) + + def clear(self): + u''' + Clear all captured log messages. + ''' + self.messages = collections.defaultdict(list) diff --git a/ckan/tests/lib/test_jobs.py b/ckan/tests/lib/test_jobs.py new file mode 100644 index 00000000000..8a194b8f7c1 --- /dev/null +++ b/ckan/tests/lib/test_jobs.py @@ -0,0 +1,244 @@ +# encoding: utf-8 + +u''' +Tests for ``ckan.lib.jobs``. +''' + +import datetime + +from nose.tools import ok_, assert_equal, raises +from pylons import config +import rq + +import ckan.lib.jobs as jobs +from ckan.lib.redis import connect_to_redis +from ckan.tests.helpers import CapturingLogHandler, changed_config + + +def _delete_rq_queue(queue): + u''' + Delete an RQ queue. + + The queue is emptied before it is deleted. + + :param rq.queue.Queue queue: The RQ queue. + ''' + # See https://github.com/nvie/rq/issues/731 + queue.empty() + redis_conn = connect_to_redis() + redis_conn.srem(rq.Queue.redis_queues_keys, queue._key) + redis_conn.delete(queue._key) + + +class RQTests(object): + + def setUp(self): + # Delete all RQ queues + redis_conn = connect_to_redis() + for queue in rq.Queue.all(connection=redis_conn): + _delete_rq_queue(queue) + + def all_jobs(self): + u''' + Get a list of all RQ jobs. + ''' + jobs = [] + redis_conn = connect_to_redis() + for queue in rq.Queue.all(connection=redis_conn): + jobs.extend(queue.jobs) + return jobs + + def enqueue(self, job=None, *args, **kwargs): + u''' + Enqueue a test job. + ''' + if job is None: + job = jobs.test_job + return jobs.enqueue(job, *args, **kwargs) + + +class TestQueueNamePrefixes(RQTests): + + def test_queue_name_prefix_contains_site_id(self): + prefix = jobs.add_queue_name_prefix(u'') + ok_(config[u'ckan.site_id'] in prefix) + + def test_queue_name_removal_with_prefix(self): + plain = u'foobar' + prefixed = jobs.add_queue_name_prefix(plain) + assert_equal(jobs.remove_queue_name_prefix(prefixed), plain) + + @raises(ValueError) + def test_queue_name_removal_without_prefix(self): + jobs.remove_queue_name_prefix(u'foobar') + + +class TestEnqueue(RQTests): + + def test_enqueue_return_value(self): + job = self.enqueue() + ok_(isinstance(job, rq.job.Job)) + + def test_enqueue_args(self): + self.enqueue() + self.enqueue(args=[1, 2]) + all_jobs = self.all_jobs() + assert_equal(len(all_jobs), 2) + assert_equal(len(all_jobs[0].args), 0) + assert_equal(all_jobs[1].args, [1, 2]) + + def test_enqueue_kwargs(self): + self.enqueue() + self.enqueue(kwargs={u'foo': 1}) + all_jobs = self.all_jobs() + assert_equal(len(all_jobs), 2) + assert_equal(len(all_jobs[0].kwargs), 0) + assert_equal(all_jobs[1].kwargs, {u'foo': 1}) + + def test_enqueue_title(self): + self.enqueue() + self.enqueue(title=u'Title') + all_jobs = self.all_jobs() + assert_equal(len(all_jobs), 2) + assert_equal(all_jobs[0].meta[u'title'], None) + assert_equal(all_jobs[1].meta[u'title'], u'Title') + + def test_enqueue_queue(self): + self.enqueue() + self.enqueue(queue=u'my_queue') + all_jobs = self.all_jobs() + assert_equal(len(all_jobs), 2) + assert_equal(all_jobs[0].origin, + jobs.add_queue_name_prefix(jobs.DEFAULT_QUEUE_NAME)) + assert_equal(all_jobs[1].origin, + jobs.add_queue_name_prefix(u'my_queue')) + + +class TestGetAllQueues(RQTests): + + def test_foreign_queues_are_ignored(self): + u''' + Test that foreign RQ-queues are ignored. + ''' + # Create queues for this CKAN instance + self.enqueue(queue=u'q1') + self.enqueue(queue=u'q2') + # Create queue for another CKAN instance + with changed_config(u'ckan.site_id', u'some-other-ckan-instance'): + self.enqueue(queue=u'q2') + # Create queue not related to CKAN + rq.Queue('q4').enqueue_call(jobs.test_job) + all_queues = jobs.get_all_queues() + names = {jobs.remove_queue_name_prefix(q.name) for q in all_queues} + assert_equal(names, {u'q1', u'q2'}) + + +class TestGetQueue(RQTests): + + def test_get_queue_default_queue(self): + u''' + Test that the default queue is returned if no queue is given. + ''' + q = jobs.get_queue() + assert_equal(jobs.remove_queue_name_prefix(q.name), + jobs.DEFAULT_QUEUE_NAME) + + def test_get_queue_other_queue(self): + u''' + Test that a different queue can be given. + ''' + q = jobs.get_queue(u'my_queue') + assert_equal(jobs.remove_queue_name_prefix(q.name), u'my_queue') + + +class TestFromID(RQTests): + + def test_from_id_existing(self): + job = self.enqueue() + assert_equal(jobs.from_id(job.id), job) + job = self.enqueue(queue=u'my_queue') + assert_equal(jobs.from_id(job.id), job) + + @raises(KeyError) + def test_from_id_not_existing(self): + jobs.from_id(u'does-not-exist') + + +class TestDictizeJob(RQTests): + + def test_dictize_job(self): + job = self.enqueue(title=u'Title', queue=u'my_queue') + d = jobs.dictize_job(job) + assert_equal(d[u'id'], job.id) + assert_equal(d[u'title'], u'Title') + assert_equal(d[u'queue'], u'my_queue') + dt = datetime.datetime.strptime(d[u'created'], u'%Y-%m-%dT%H:%M:%S.%f') + now = datetime.datetime.utcnow() + ok_(abs((now - dt).total_seconds()) < 10) + + +def failing_job(): + u''' + A background job that fails. + ''' + raise RuntimeError(u'JOB FAILURE') + + +class TestWorker(RQTests): + + def test_worker_logging_lifecycle(self): + u''' + Test that a logger's lifecycle is logged. + ''' + queue = u'my_queue' + job = self.enqueue(queue=queue) + logs = CapturingLogHandler(u'ckan.lib.jobs') + worker = jobs.Worker([queue]) + worker.work(burst=True) + messages = logs.messages[u'info'] + # We expect 4 log messages: Worker start, job start, job end, + # worker end. + assert_equal(len(messages), 4) + ok_(worker.key in messages[0]) + ok_(queue in messages[0]) + ok_(worker.key in messages[1]) + ok_(job.id in messages[1]) + ok_(worker.key in messages[2]) + ok_(job.id in messages[2]) + ok_(worker.key in messages[3]) + + def test_worker_exception_logging(self): + u''' + Test that exceptions in a job are logged. + ''' + job = self.enqueue(failing_job) + logs = CapturingLogHandler(u'ckan.lib.jobs') + worker = jobs.Worker() + + # Prevent worker from forking so that we can capture log + # messages from within the job + def execute_job(*args, **kwargs): + return worker.perform_job(*args, **kwargs) + worker.execute_job = execute_job + + worker.work(burst=True) + logs.assert_log(u'error', u'JOB FAILURE') + + def test_worker_default_queue(self): + self.enqueue() + self.enqueue(queue=u'my_queue') + jobs.Worker().work(burst=True) + all_jobs = self.all_jobs() + assert_equal(len(all_jobs), 1) + assert_equal(jobs.remove_queue_name_prefix(all_jobs[0].origin), + u'my_queue') + + def test_worker_multiple_queues(self): + self.enqueue() + self.enqueue(queue=u'queue1') + self.enqueue(queue=u'queue2') + jobs.Worker([u'queue1', u'queue2']).work(burst=True) + all_jobs = self.all_jobs() + assert_equal(len(all_jobs), 1) + assert_equal(jobs.remove_queue_name_prefix(all_jobs[0].origin), + jobs.DEFAULT_QUEUE_NAME) diff --git a/doc/contributing/test.rst b/doc/contributing/test.rst index 0258c2be0ef..d060936dd64 100644 --- a/doc/contributing/test.rst +++ b/doc/contributing/test.rst @@ -60,6 +60,8 @@ Create test databases: This database connection is specified in the ``test-core.ini`` file by the ``sqlalchemy.url`` parameter. +You should also make sure that the :ref:`Redis database ` configured +in ``test-core.ini`` is different from your production database. ~~~~~~~~~~~~~ Run the tests diff --git a/test-core.ini b/test-core.ini index 5687a2f0867..8b63bf9e530 100644 --- a/test-core.ini +++ b/test-core.ini @@ -31,6 +31,9 @@ ckan.datapusher.formats = csv xls xlsx tsv application/csv application/vnd.ms-ex ## Solr support solr_url = http://127.0.0.1:8983/solr/ckan +# Redis URL. Use a separate Redis database for testing. +redis_url = redis://localhost:6379/1 + ckan.auth.user_create_organizations = true ckan.auth.user_create_groups = true ckan.auth.create_user_via_api = false From df7e61ff9fad17c9b4aecf146a20d4da3a69acdc Mon Sep 17 00:00:00 2001 From: Florian Brucker Date: Tue, 26 Jul 2016 11:26:06 +0200 Subject: [PATCH 37/65] [#2977] Tests for `job_*` API functions. --- ckan/lib/jobs.py | 8 +-- ckan/logic/action/delete.py | 8 +-- ckan/logic/action/get.py | 2 +- ckan/tests/helpers.py | 50 ++++++++++++++++- ckan/tests/lib/test_jobs.py | 75 ++++++-------------------- ckan/tests/logic/action/test_delete.py | 61 +++++++++++++++++++++ ckan/tests/logic/action/test_get.py | 55 +++++++++++++++++++ 7 files changed, 189 insertions(+), 70 deletions(-) diff --git a/ckan/lib/jobs.py b/ckan/lib/jobs.py index 72e1941e10d..ef1c1e36f7d 100644 --- a/ckan/lib/jobs.py +++ b/ckan/lib/jobs.py @@ -80,9 +80,9 @@ def remove_queue_name_prefix(name): def get_all_queues(): u''' - Return all (non-empty) job queues. + Return all job queues currently in use. - :returns: A list of all queues. + :returns: The queues. :rtype: List of ``rq.queue.Queue`` instances .. seealso:: :py:func:`get_queue` @@ -154,7 +154,7 @@ def enqueue(fn, args=None, kwargs=None, title=None, queue=DEFAULT_QUEUE_NAME): return job -def from_id(id): +def job_from_id(id): u''' Look up an enqueued job by its ID. @@ -186,7 +186,7 @@ def dictize_job(job): return { u'id': job.id, u'title': job.meta.get(u'title'), - u'created': job.created_at.isoformat(), + u'created': job.created_at.strftime(u'%Y-%m-%dT%H:%M:%S'), u'queue': remove_queue_name_prefix(job.origin), } diff --git a/ckan/logic/action/delete.py b/ckan/logic/action/delete.py index fe9061c5a98..b9294dc1556 100644 --- a/ckan/logic/action/delete.py +++ b/ckan/logic/action/delete.py @@ -730,21 +730,21 @@ def job_clear(context, data_dict): names = [jobs.remove_queue_name_prefix(queue.name) for queue in queues] for queue, name in zip(queues, names): queue.empty() - log.warn(u'Cleared background job queue "{}"'.format(name)) + log.info(u'Cleared background job queue "{}"'.format(name)) return names def job_cancel(context, data_dict): '''Cancel a queued background job. - Removes the job from the queue. + Removes the job from the queue and deletes it. :param string id: The ID of the background job. ''' _check_access(u'job_cancel', context, data_dict) id = _get_or_bust(data_dict, u'id') try: - jobs.from_id(id).cancel() - log.warn(u'Cancelled background job {}'.format(id)) + jobs.job_from_id(id).delete() + log.info(u'Cancelled background job {}'.format(id)) except KeyError: raise NotFound diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index ce714e8a28c..c9fdb4c0e08 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -3539,6 +3539,6 @@ def job_show(context, data_dict): _check_access(u'job_show', context, data_dict) id = _get_or_bust(data_dict, u'id') try: - return jobs.dictize_job(jobs.from_id(id)) + return jobs.dictize_job(jobs.job_from_id(id)) except KeyError: raise NotFound diff --git a/ckan/tests/helpers.py b/ckan/tests/helpers.py index 51edb2d63f2..ff60fd21eaa 100644 --- a/ckan/tests/helpers.py +++ b/ckan/tests/helpers.py @@ -30,8 +30,11 @@ import nose.tools from nose.tools import assert_in, assert_not_in import mock +import rq from ckan.common import config +import ckan.lib.jobs as jobs +from ckan.lib.redis import connect_to_redis import ckan.lib.search as search import ckan.config.middleware import ckan.model as model @@ -214,6 +217,49 @@ def teardown_class(cls): config.update(cls._original_config) +class RQTestBase(object): + ''' + Base class for tests of RQ functionality. + ''' + def setup(self): + u''' + Delete all RQ queues and jobs. + ''' + # See https://github.com/nvie/rq/issues/731 + redis_conn = connect_to_redis() + for queue in rq.Queue.all(connection=redis_conn): + queue.empty() + redis_conn.srem(rq.Queue.redis_queues_keys, queue._key) + redis_conn.delete(queue._key) + + def all_jobs(self): + u''' + Get a list of all RQ jobs. + ''' + jobs = [] + redis_conn = connect_to_redis() + for queue in rq.Queue.all(connection=redis_conn): + jobs.extend(queue.jobs) + return jobs + + def enqueue(self, job=None, *args, **kwargs): + u''' + Enqueue a test job. + ''' + if job is None: + job = jobs.test_job + return jobs.enqueue(job, *args, **kwargs) + + +class FunctionalRQTestBase(FunctionalTestBase, RQTestBase): + ''' + Base class for functional tests of RQ functionality. + ''' + def setup(self): + FunctionalTestBase.setup(self) + RQTestBase.setup(self) + + def submit_and_follow(app, form, extra_environ=None, name=None, value=None, **args): ''' @@ -324,7 +370,7 @@ def test_ckan_site_title(self): :param value: the new config key's value, e.g. ``'My Test CKAN'`` :type value: string - .. seealso:: :py:func:`changed_config` + .. seealso:: The context manager :py:func:`changed_config` ''' def decorator(func): @functools.wraps(func) @@ -349,7 +395,7 @@ def changed_config(key, value): with changed_config(u'ckan.site_title', u'My Test CKAN'): assert config[u'ckan.site_title'] == u'My Test CKAN' - .. seealso:: :py:func:`change_config` + .. seealso:: The decorator :py:func:`change_config` ''' _original_config = config.copy() config[key] = value diff --git a/ckan/tests/lib/test_jobs.py b/ckan/tests/lib/test_jobs.py index 8a194b8f7c1..95b7618b750 100644 --- a/ckan/tests/lib/test_jobs.py +++ b/ckan/tests/lib/test_jobs.py @@ -11,53 +11,10 @@ import rq import ckan.lib.jobs as jobs -from ckan.lib.redis import connect_to_redis -from ckan.tests.helpers import CapturingLogHandler, changed_config +from ckan.tests.helpers import CapturingLogHandler, changed_config, RQTestBase -def _delete_rq_queue(queue): - u''' - Delete an RQ queue. - - The queue is emptied before it is deleted. - - :param rq.queue.Queue queue: The RQ queue. - ''' - # See https://github.com/nvie/rq/issues/731 - queue.empty() - redis_conn = connect_to_redis() - redis_conn.srem(rq.Queue.redis_queues_keys, queue._key) - redis_conn.delete(queue._key) - - -class RQTests(object): - - def setUp(self): - # Delete all RQ queues - redis_conn = connect_to_redis() - for queue in rq.Queue.all(connection=redis_conn): - _delete_rq_queue(queue) - - def all_jobs(self): - u''' - Get a list of all RQ jobs. - ''' - jobs = [] - redis_conn = connect_to_redis() - for queue in rq.Queue.all(connection=redis_conn): - jobs.extend(queue.jobs) - return jobs - - def enqueue(self, job=None, *args, **kwargs): - u''' - Enqueue a test job. - ''' - if job is None: - job = jobs.test_job - return jobs.enqueue(job, *args, **kwargs) - - -class TestQueueNamePrefixes(RQTests): +class TestQueueNamePrefixes(RQTestBase): def test_queue_name_prefix_contains_site_id(self): prefix = jobs.add_queue_name_prefix(u'') @@ -73,7 +30,7 @@ def test_queue_name_removal_without_prefix(self): jobs.remove_queue_name_prefix(u'foobar') -class TestEnqueue(RQTests): +class TestEnqueue(RQTestBase): def test_enqueue_return_value(self): job = self.enqueue() @@ -114,7 +71,7 @@ def test_enqueue_queue(self): jobs.add_queue_name_prefix(u'my_queue')) -class TestGetAllQueues(RQTests): +class TestGetAllQueues(RQTestBase): def test_foreign_queues_are_ignored(self): u''' @@ -127,15 +84,15 @@ def test_foreign_queues_are_ignored(self): with changed_config(u'ckan.site_id', u'some-other-ckan-instance'): self.enqueue(queue=u'q2') # Create queue not related to CKAN - rq.Queue('q4').enqueue_call(jobs.test_job) + rq.Queue(u'q4').enqueue_call(jobs.test_job) all_queues = jobs.get_all_queues() names = {jobs.remove_queue_name_prefix(q.name) for q in all_queues} assert_equal(names, {u'q1', u'q2'}) -class TestGetQueue(RQTests): +class TestGetQueue(RQTestBase): - def test_get_queue_default_queue(self): + def test_get_queue_default_queue(self): u''' Test that the default queue is returned if no queue is given. ''' @@ -151,20 +108,20 @@ def test_get_queue_other_queue(self): assert_equal(jobs.remove_queue_name_prefix(q.name), u'my_queue') -class TestFromID(RQTests): +class TestJobFromID(RQTestBase): - def test_from_id_existing(self): + def test_job_from_id_existing(self): job = self.enqueue() - assert_equal(jobs.from_id(job.id), job) + assert_equal(jobs.job_from_id(job.id), job) job = self.enqueue(queue=u'my_queue') - assert_equal(jobs.from_id(job.id), job) + assert_equal(jobs.job_from_id(job.id), job) @raises(KeyError) - def test_from_id_not_existing(self): - jobs.from_id(u'does-not-exist') + def test_job_from_id_not_existing(self): + jobs.job_from_id(u'does-not-exist') -class TestDictizeJob(RQTests): +class TestDictizeJob(RQTestBase): def test_dictize_job(self): job = self.enqueue(title=u'Title', queue=u'my_queue') @@ -172,7 +129,7 @@ def test_dictize_job(self): assert_equal(d[u'id'], job.id) assert_equal(d[u'title'], u'Title') assert_equal(d[u'queue'], u'my_queue') - dt = datetime.datetime.strptime(d[u'created'], u'%Y-%m-%dT%H:%M:%S.%f') + dt = datetime.datetime.strptime(d[u'created'], u'%Y-%m-%dT%H:%M:%S') now = datetime.datetime.utcnow() ok_(abs((now - dt).total_seconds()) < 10) @@ -184,7 +141,7 @@ def failing_job(): raise RuntimeError(u'JOB FAILURE') -class TestWorker(RQTests): +class TestWorker(RQTestBase): def test_worker_logging_lifecycle(self): u''' diff --git a/ckan/tests/logic/action/test_delete.py b/ckan/tests/logic/action/test_delete.py index ad9eba3ab90..72783dd5dc9 100644 --- a/ckan/tests/logic/action/test_delete.py +++ b/ckan/tests/logic/action/test_delete.py @@ -1,5 +1,7 @@ # encoding: utf-8 +import re + import nose.tools import ckan.tests.helpers as helpers @@ -7,10 +9,15 @@ import ckan.logic as logic import ckan.model as model import ckan.plugins as p +import ckan.lib.jobs as jobs import ckan.lib.search as search + assert_equals = nose.tools.assert_equals assert_raises = nose.tools.assert_raises +eq = nose.tools.eq_ +ok = nose.tools.ok_ +raises = nose.tools.raises class TestDelete: @@ -482,3 +489,57 @@ def test_missing_id_returns_error(self): def test_bad_id_returns_404(self): assert_raises(logic.NotFound, helpers.call_action, 'dataset_purge', id='123') + + +class TestJobClear(helpers.FunctionalRQTestBase): + + def test_all_queues(self): + ''' + Test clearing all queues. + ''' + self.enqueue() + self.enqueue(queue=u'q') + self.enqueue(queue=u'q') + self.enqueue(queue=u'q') + queues = helpers.call_action(u'job_clear') + eq({jobs.DEFAULT_QUEUE_NAME, u'q'}, set(queues)) + all_jobs = self.all_jobs() + eq(len(all_jobs), 0) + + def test_specific_queues(self): + ''' + Test clearing specific queues. + ''' + job1 = self.enqueue() + job2 = self.enqueue(queue=u'q1') + job3 = self.enqueue(queue=u'q1') + job4 = self.enqueue(queue=u'q2') + logs = helpers.CapturingLogHandler(u'ckan.logic') + queues = helpers.call_action(u'job_clear', queues=[u'q1', u'q2']) + eq({u'q1', u'q2'}, set(queues)) + all_jobs = self.all_jobs() + eq(len(all_jobs), 1) + eq(all_jobs[0], job1) + logs.assert_log(u'info', u'q1') + logs.assert_log(u'info', u'q2') + + +class TestJobCancel(helpers.FunctionalRQTestBase): + + def test_existing_job(self): + ''' + Test cancelling an existing job. + ''' + job1 = self.enqueue(queue=u'q') + job2 = self.enqueue(queue=u'q') + logs = helpers.CapturingLogHandler(u'ckan.logic') + helpers.call_action(u'job_cancel', id=job1.id) + all_jobs = self.all_jobs() + eq(len(all_jobs), 1) + eq(all_jobs[0], job2) + assert_raises(KeyError, jobs.job_from_id, job1.id) + logs.assert_log(u'info', re.escape(job1.id)) + + @raises(logic.NotFound) + def test_not_existing_job(self): + helpers.call_action(u'job_cancel', id=u'does-not-exist') diff --git a/ckan/tests/logic/action/test_get.py b/ckan/tests/logic/action/test_get.py index 4aa1051edee..0ec5b36d83f 100644 --- a/ckan/tests/logic/action/test_get.py +++ b/ckan/tests/logic/action/test_get.py @@ -1,5 +1,7 @@ # encoding: utf-8 +import datetime + import nose.tools import ckan.logic as logic @@ -11,6 +13,7 @@ eq = nose.tools.eq_ +ok = nose.tools.ok_ assert_raises = nose.tools.assert_raises @@ -2087,3 +2090,55 @@ def test_followee_list_with_q(self): eq(len(followee_list), 1) eq(followee_list[0]['display_name'], 'Environment') + + +class TestJobList(helpers.FunctionalRQTestBase): + + def test_all_queues(self): + ''' + Test getting jobs from all queues. + ''' + job1 = self.enqueue() + job2 = self.enqueue() + job3 = self.enqueue(queue=u'my_queue') + jobs = helpers.call_action(u'job_list') + eq(len(jobs), 3) + eq({job[u'id'] for job in jobs}, {job1.id, job2.id, job3.id}) + + def test_specific_queues(self): + ''' + Test getting jobs from specific queues. + ''' + job1 = self.enqueue() + job2 = self.enqueue(queue=u'q2') + job3 = self.enqueue(queue=u'q3') + job4 = self.enqueue(queue=u'q3') + jobs = helpers.call_action(u'job_list', queues=[u'q2']) + eq(len(jobs), 1) + eq(jobs[0][u'id'], job2.id) + jobs = helpers.call_action(u'job_list', queues=[u'q2', u'q3']) + eq(len(jobs), 3) + eq({job[u'id'] for job in jobs}, {job2.id, job3.id, job4.id}) + + +class TestJobShow(helpers.FunctionalRQTestBase): + + def test_existing_job(self): + ''' + Test showing an existing job. + ''' + job = self.enqueue(queue=u'my_queue', title=u'Title') + d = helpers.call_action(u'job_show', id=job.id) + eq(d[u'id'], job.id) + eq(d[u'title'], u'Title') + eq(d[u'queue'], u'my_queue') + dt = datetime.datetime.strptime(d[u'created'], u'%Y-%m-%dT%H:%M:%S') + now = datetime.datetime.utcnow() + ok(abs((now - dt).total_seconds()) < 10) + + @nose.tools.raises(logic.NotFound) + def test_not_existing_job(self): + ''' + Test showing a not existing job. + ''' + helpers.call_action(u'job_show', id=u'does-not-exist') From 061ca311846462625dea0478a3ebf5131586dcfb Mon Sep 17 00:00:00 2001 From: Florian Brucker Date: Tue, 26 Jul 2016 15:35:39 +0200 Subject: [PATCH 38/65] [#2977] Tests for `paster jobs` command. --- ckan/lib/cli.py | 33 +++- ckan/tests/helpers.py | 130 +++++++++++-- ckan/tests/lib/test_cli.py | 248 ++++++++++++++++++++++++- ckan/tests/lib/test_jobs.py | 14 +- ckan/tests/logic/action/test_delete.py | 8 +- 5 files changed, 397 insertions(+), 36 deletions(-) diff --git a/ckan/lib/cli.py b/ckan/lib/cli.py index e24156144d4..0bc9c00d3f3 100644 --- a/ckan/lib/cli.py +++ b/ckan/lib/cli.py @@ -11,20 +11,23 @@ import itertools import json import logging +import urlparse +from optparse import OptionConflictError + +import sqlalchemy as sa +import routes +import paste.script +from paste.registry import Registry +from paste.script.util.logging_config import fileConfig + import ckan.logic as logic import ckan.model as model import ckan.include.rjsmin as rjsmin import ckan.include.rcssmin as rcssmin import ckan.lib.fanstatic_resources as fanstatic_resources import ckan.plugins as p -import sqlalchemy as sa -import urlparse -import routes from ckan.common import config -import paste.script -from paste.registry import Registry -from paste.script.util.logging_config import fileConfig #NB No CKAN imports are allowed until after the config file is loaded. # i.e. do the imports in methods, after _load_config is called. @@ -2567,7 +2570,7 @@ class JobsCommand(CkanCommand): Usage: - paster jobs worker [QUEUES] + paster jobs worker [--burst] [QUEUES] Start a worker that fetches jobs from queues and executes them. If no queue names are given then the worker listens @@ -2585,6 +2588,9 @@ class JobsCommand(CkanCommand): paster jobs worker default my-custom-queue + If the `--burst` option is given then the worker will exit + as soon as all its queues are empty. + paster jobs list [QUEUES] List currently enqueued jobs from the given queues. If no queue @@ -2610,6 +2616,17 @@ class JobsCommand(CkanCommand): usage = __doc__ min_args = 0 + + def __init__(self, *args, **kwargs): + super(JobsCommand, self).__init__(*args, **kwargs) + try: + self.parser.add_option(u'--burst', action='store_true', + default=False, + help=u'Start worker in burst mode.') + except OptionConflictError: + # Option has already been added in previous call + pass + def command(self): self._load_config() try: @@ -2632,7 +2649,7 @@ def command(self): def worker(self): from ckan.lib.jobs import Worker - Worker(self.args).work() + Worker(self.args).work(burst=self.options.burst) def list(self): data_dict = { diff --git a/ckan/tests/helpers.py b/ckan/tests/helpers.py index ff60fd21eaa..c0e7e2752f5 100644 --- a/ckan/tests/helpers.py +++ b/ckan/tests/helpers.py @@ -22,9 +22,12 @@ import collections import contextlib +import errno import functools import logging +import os import re +import tempfile import webtest import nose.tools @@ -543,22 +546,88 @@ def wrapper(*args, **kwargs): return decorator -class CapturingLogHandler(logging.Handler): +@contextlib.contextmanager +def recorded_logs(logger=None, level=logging.DEBUG, + override_disabled=True, override_global_level=True): u''' - Log handler to check for expected logs. + Context manager for recording log messages. + + :param logger: The logger to record messages from. Can either be a + :py:class:`logging.Logger` instance or a string with the + logger's name. Defaults to the root logger. + + :param int level: Temporary log level for the target logger while + the context manager is active. Pass ``None`` if you don't want + the level to be changed. The level is automatically reset to its + original value when the context manager is left. + + :param bool override_disabled: A logger can be disabled by setting + its ``disabled`` attribute. By default, this context manager + sets that attribute to ``False`` at the beginning of its + execution and resets it when the context manager is left. Set + ``override_disabled`` to ``False`` to keep the current value + of the attribute. + + :param bool override_global_level: The ``logging.disable`` function + allows one to install a global minimum log level that takes + precedence over a logger's own level. By default, this context + manager makes sure that the global limit is at most ``level``, + and reduces it if necessary during its execution. Set + ``override_global_level`` to ``False`` to keep the global limit. + + :returns: A recording log handler that listens to ``logger`` during + the execution of the context manager. + :rtype: :py:class:`RecordingLogHandler` + + Example:: - Automatically attaches itself to the root logger or to ``logger`` if - given. ``logger`` can be a string with the logger's name or an - actual ``logging.Logger`` instance. + import logging + + logger = logging.getLogger(__name__) + + with recorded_logs(logger) as logs: + logger.info(u'Hello, world!') + + logs.assert_log(u'info', u'world') ''' - def __init__(self, logger=None, *args, **kwargs): - super(CapturingLogHandler, self).__init__(*args, **kwargs) + if logger is None: + logger = logging.getLogger() + elif not isinstance(logger, logging.Logger): + logger = logging.getLogger(logger) + handler = RecordingLogHandler() + old_level = logger.level + manager_level = logger.manager.disable + disabled = logger.disabled + logger.addHandler(handler) + try: + if level is not None: + logger.setLevel(level) + if override_disabled: + logger.disabled = False + if override_global_level: + if (level is None) and (manager_level > old_level): + logger.manager.disable = old_level + elif (level is not None) and (manager_level > level): + logger.manager.disable = level + yield handler + finally: + logger.handlers.remove(handler) + logger.setLevel(old_level) + logger.disabled = disabled + logger.manager.disable = manager_level + + +class RecordingLogHandler(logging.Handler): + u''' + Log handler that records log messages for later inspection. + + You can inspect the recorded messages via the ``messages`` attribute + (a dict that maps log levels to lists of messages) or by using + ``assert_log``. + ''' + def __init__(self, *args, **kwargs): + super(RecordingLogHandler, self).__init__(*args, **kwargs) self.clear() - if logger is None: - logger = logging.getLogger() - elif not isinstance(logger, logging.Logger): - logger = logging.getLogger(logger) - logger.addHandler(self) def emit(self, record): self.messages[record.levelname.lower()].append(record.getMessage()) @@ -577,10 +646,19 @@ def assert_log(self, level, pattern, msg=None): :raises AssertionError: If the expected message was not logged. ''' - pattern = re.compile(pattern) + compiled_pattern = re.compile(pattern) for log_msg in self.messages[level]: - if pattern.search(log_msg): + if compiled_pattern.search(log_msg): return + if not msg: + if self.messages[level]: + lines = u'\n '.join(self.messages[level]) + msg = (u'Pattern "{}" was not found in the log messages for ' + + u'level "{}":\n {}').format(pattern, level, lines) + else: + msg = (u'Pattern "{}" was not found in the log messages for ' + + u'level "{}" (no messages were recorded for that ' + + u'level).').format(pattern, level) raise AssertionError(msg) def clear(self): @@ -588,3 +666,27 @@ def clear(self): Clear all captured log messages. ''' self.messages = collections.defaultdict(list) + + +@contextlib.contextmanager +def temp_file(*args, **kwargs): + u''' + Context manager that provides a temporary file. + + The temporary file is named and open. It is automatically deleted + when the context manager is left if it still exists at that point. + + Any arguments are passed on to + :py:func:`tempfile.NamedTemporaryFile`. + ''' + kwargs['delete'] = False + f = tempfile.NamedTemporaryFile(*args, **kwargs) + try: + yield f + finally: + f.close() + try: + os.remove(f.name) + except OSError as e: + if e.errno != errno.ENOENT: + raise diff --git a/ckan/tests/lib/test_cli.py b/ckan/tests/lib/test_cli.py index c8ddd77222d..6a706cf3a8a 100644 --- a/ckan/tests/lib/test_cli.py +++ b/ckan/tests/lib/test_cli.py @@ -1,22 +1,77 @@ # encoding: utf-8 +import datetime import logging +import os +import os.path +from StringIO import StringIO +import sys -from nose.tools import assert_raises +from nose.tools import (assert_raises, eq_ as eq, ok_ as ok, assert_in, + assert_not_in, assert_not_equal as neq, assert_false as nok) +from pylons import config +from paste.script.command import run -from ckan.lib.cli import UserCmd +import ckan.lib.cli as cli +import ckan.lib.jobs as jobs import ckan.tests.helpers as helpers log = logging.getLogger(__name__) +def paster(*args, **kwargs): + ''' + Call a paster command. + + All arguments are parsed and passed on to the command. The + ``--config`` option is automatically appended. + + By default, an ``AssertionError`` is raised if the command exits + with a non-zero return code or if anything is written to STDERR. + Pass ``fail_on_error=False`` to disable this behavior. + + Example:: + + code, stdout, stderr = paster(u'jobs', u'list') + assert u'My Job Title' in stdout + + code, stdout, stderr = paster(u'jobs', u'foobar', + fail_on_error=False) + assert code == 1 + assert u'Unknown command' in stderr + + Any ``SystemExit`` raised by the command is swallowed. + + :returns: A tuple containing the return code, the content of + STDOUT, and the content of STDERR. + ''' + fail_on_error = kwargs.pop(u'fail_on_error', True) + args = list(args) + [u'--config=' + config[u'__file__']] + sys.stdout, sys.stderr = StringIO(u''), StringIO(u'') + code = 0 + try: + run(args) + except SystemExit as e: + code = e.code + finally: + stdout, stderr = sys.stdout.getvalue(), sys.stderr.getvalue() + sys.stdout, sys.stderr = sys.__stdout__, sys.__stderr__ + if code != 0 and fail_on_error: + raise AssertionError(u'Paster command exited with non-zero ' + + u'return code {}: {}'.format(code, stderr)) + if stderr.strip() and fail_on_error: + raise AssertionError(u'Paster command wrote to STDERR: {}'.format( + stderr)) + return code, stdout, stderr + + class TestUserAdd(object): '''Tests for UserCmd.add''' @classmethod def setup_class(cls): - cls.user_cmd = UserCmd('user-command') + cls.user_cmd = cli.UserCmd('user-command') def setup(self): helpers.reset_db() @@ -70,3 +125,190 @@ def test_cli_user_add_unicode_fullname_system_exit(self): self.user_cmd.add() except SystemExit: assert False, "SystemExit exception shouldn't be raised" + + +class TestJobsUnknown(helpers.RQTestBase): + ''' + Test unknown sub-command for ``paster jobs``. + ''' + def test_unknown_command(self): + ''' + Test error handling for unknown ``paster jobs`` sub-command. + ''' + code, stdout, stderr = paster(u'jobs', u'does-not-exist', + fail_on_error=False) + neq(code, 0) + assert_in(u'Unknown command', stderr) + + +class TestJobsList(helpers.RQTestBase): + ''' + Tests for ``paster jobs list``. + ''' + def test_list_default_queue(self): + ''' + Test output of ``jobs list`` for default queue. + ''' + job = self.enqueue() + stdout = paster(u'jobs', u'list')[1] + fields = stdout.split() + eq(len(fields), 3) + dt = datetime.datetime.strptime(fields[0], u'%Y-%m-%dT%H:%M:%S') + now = datetime.datetime.utcnow() + ok(abs((now - dt).total_seconds()) < 10) + eq(fields[1], job.id) + eq(fields[2], jobs.DEFAULT_QUEUE_NAME) + + def test_list_other_queue(self): + ''' + Test output of ``jobs.list`` for non-default queue. + ''' + job = self.enqueue(queue=u'my_queue') + stdout = paster(u'jobs', u'list')[1] + fields = stdout.split() + eq(len(fields), 3) + eq(fields[2], u'my_queue') + + def test_list_title(self): + ''' + Test title output of ``jobs list``. + ''' + job = self.enqueue(title=u'My_Title') + stdout = paster(u'jobs', u'list')[1] + fields = stdout.split() + eq(len(fields), 4) + eq(fields[3], u'"My_Title"') + + def test_list_filter(self): + ''' + Test filtering by queues for ``jobs list``. + ''' + job1 = self.enqueue(queue=u'q1') + job2 = self.enqueue(queue=u'q2') + job3 = self.enqueue(queue=u'q3') + stdout = paster(u'jobs', u'list', u'q1', u'q2')[1] + assert_in(u'q1', stdout) + assert_in(u'q2', stdout) + assert_not_in(u'q3', stdout) + + +class TestJobsCancel(helpers.RQTestBase): + ''' + Tests for ``paster jobs cancel``. + ''' + def test_cancel_existing(self): + ''' + Test ``jobs cancel`` for an existing job. + ''' + job1 = self.enqueue() + job2 = self.enqueue() + stdout = paster(u'jobs', u'cancel', job1.id)[1] + all_jobs = self.all_jobs() + eq(len(all_jobs), 1) + eq(all_jobs[0].id, job2.id) + assert_in(job1.id, stdout) + + def test_cancel_not_existing(self): + ''' + Test ``jobs cancel`` for a not existing job. + ''' + code, stdout, stderr = paster(u'jobs', u'cancel', u'does-not-exist', + fail_on_error=False) + neq(code, 0) + assert_in(u'does-not-exist', stderr) + + +class TestJobsClear(helpers.RQTestBase): + ''' + Tests for ``paster jobs clear``. + ''' + def test_clear_all_queues(self): + ''' + Test clearing all queues via ``jobs clear``. + ''' + self.enqueue() + self.enqueue() + self.enqueue(queue=u'q1') + self.enqueue(queue=u'q2') + stdout = paster(u'jobs', u'clear')[1] + assert_in(jobs.DEFAULT_QUEUE_NAME, stdout) + assert_in(u'q1', stdout) + assert_in(u'q2', stdout) + eq(self.all_jobs(), []) + + def test_clear_specific_queues(self): + ''' + Test clearing specific queues via ``jobs clear``. + ''' + job1 = self.enqueue() + job2 = self.enqueue(queue=u'q1') + self.enqueue(queue=u'q2') + self.enqueue(queue=u'q2') + self.enqueue(queue=u'q3') + stdout = paster(u'jobs', u'clear', u'q2', u'q3')[1] + assert_in(u'q2', stdout) + assert_in(u'q3', stdout) + assert_not_in(jobs.DEFAULT_QUEUE_NAME, stdout) + assert_not_in(u'q1', stdout) + all_jobs = self.all_jobs() + eq(set(all_jobs), {job1, job2}) + + +class TestJobsTest(helpers.RQTestBase): + ''' + Tests for ``paster jobs test``. + ''' + def test_test_default_queue(self): + ''' + Test ``jobs test`` for the default queue. + ''' + stdout = paster(u'jobs', u'test')[1] + all_jobs = self.all_jobs() + eq(len(all_jobs), 1) + eq(jobs.remove_queue_name_prefix(all_jobs[0].origin), + jobs.DEFAULT_QUEUE_NAME) + + def test_test_specific_queues(self): + ''' + Test ``jobs test`` for specific queues. + ''' + stdout = paster(u'jobs', u'test', u'q1', u'q2')[1] + all_jobs = self.all_jobs() + eq(len(all_jobs), 2) + eq({jobs.remove_queue_name_prefix(j.origin) for j in all_jobs}, + {u'q1', u'q2'}) + + +class TestJobsWorker(helpers.RQTestBase): + ''' + Tests for ``paster jobs worker``. + ''' + # All tests of ``jobs worker`` must use the ``--burst`` option to + # make sure that the worker exits. + + def test_worker_default_queue(self): + ''' + Test ``jobs worker`` with the default queue. + ''' + with helpers.temp_file() as f: + self.enqueue(os.remove, args=[f.name]) + paster(u'jobs', u'worker', u'--burst') + all_jobs = self.all_jobs() + eq(all_jobs, []) + nok(os.path.isfile(f.name)) + + def test_worker_specific_queues(self): + ''' + Test ``jobs worker`` with specific queues. + ''' + with helpers.temp_file() as f: + with helpers.temp_file() as g: + job1 = self.enqueue() + job2 = self.enqueue(queue=u'q2') + self.enqueue(os.remove, args=[f.name], queue=u'q3') + self.enqueue(os.remove, args=[g.name], queue=u'q4') + paster(u'jobs', u'worker', u'--burst', u'q3', u'q4') + all_jobs = self.all_jobs() + eq(set(all_jobs), {job1, job2}) + nok(os.path.isfile(f.name)) + nok(os.path.isfile(g.name)) diff --git a/ckan/tests/lib/test_jobs.py b/ckan/tests/lib/test_jobs.py index 95b7618b750..ed842ffa4e7 100644 --- a/ckan/tests/lib/test_jobs.py +++ b/ckan/tests/lib/test_jobs.py @@ -11,7 +11,7 @@ import rq import ckan.lib.jobs as jobs -from ckan.tests.helpers import CapturingLogHandler, changed_config, RQTestBase +from ckan.tests.helpers import changed_config, recorded_logs, RQTestBase class TestQueueNamePrefixes(RQTestBase): @@ -149,9 +149,9 @@ def test_worker_logging_lifecycle(self): ''' queue = u'my_queue' job = self.enqueue(queue=queue) - logs = CapturingLogHandler(u'ckan.lib.jobs') - worker = jobs.Worker([queue]) - worker.work(burst=True) + with recorded_logs(u'ckan.lib.jobs') as logs: + worker = jobs.Worker([queue]) + worker.work(burst=True) messages = logs.messages[u'info'] # We expect 4 log messages: Worker start, job start, job end, # worker end. @@ -169,16 +169,16 @@ def test_worker_exception_logging(self): Test that exceptions in a job are logged. ''' job = self.enqueue(failing_job) - logs = CapturingLogHandler(u'ckan.lib.jobs') worker = jobs.Worker() # Prevent worker from forking so that we can capture log # messages from within the job def execute_job(*args, **kwargs): return worker.perform_job(*args, **kwargs) - worker.execute_job = execute_job - worker.work(burst=True) + worker.execute_job = execute_job + with recorded_logs(u'ckan.lib.jobs') as logs: + worker.work(burst=True) logs.assert_log(u'error', u'JOB FAILURE') def test_worker_default_queue(self): diff --git a/ckan/tests/logic/action/test_delete.py b/ckan/tests/logic/action/test_delete.py index 72783dd5dc9..6563cb926b5 100644 --- a/ckan/tests/logic/action/test_delete.py +++ b/ckan/tests/logic/action/test_delete.py @@ -514,8 +514,8 @@ def test_specific_queues(self): job2 = self.enqueue(queue=u'q1') job3 = self.enqueue(queue=u'q1') job4 = self.enqueue(queue=u'q2') - logs = helpers.CapturingLogHandler(u'ckan.logic') - queues = helpers.call_action(u'job_clear', queues=[u'q1', u'q2']) + with helpers.recorded_logs(u'ckan.logic') as logs: + queues = helpers.call_action(u'job_clear', queues=[u'q1', u'q2']) eq({u'q1', u'q2'}, set(queues)) all_jobs = self.all_jobs() eq(len(all_jobs), 1) @@ -532,8 +532,8 @@ def test_existing_job(self): ''' job1 = self.enqueue(queue=u'q') job2 = self.enqueue(queue=u'q') - logs = helpers.CapturingLogHandler(u'ckan.logic') - helpers.call_action(u'job_cancel', id=job1.id) + with helpers.recorded_logs(u'ckan.logic') as logs: + helpers.call_action(u'job_cancel', id=job1.id) all_jobs = self.all_jobs() eq(len(all_jobs), 1) eq(all_jobs[0], job2) From c8a8553de1e976fa9113f03a2f8bc1da0d8187f6 Mon Sep 17 00:00:00 2001 From: Florian Brucker Date: Wed, 27 Jul 2016 14:57:33 +0200 Subject: [PATCH 39/65] [#2977] Add `paster jobs show` command. Useful for displaying details about a single job. Corresponds to the job_show API function. --- ckan/lib/cli.py | 23 +++++++++++++++++++++++ ckan/tests/lib/test_cli.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/ckan/lib/cli.py b/ckan/lib/cli.py index 0bc9c00d3f3..691e5a5e606 100644 --- a/ckan/lib/cli.py +++ b/ckan/lib/cli.py @@ -2596,6 +2596,10 @@ class JobsCommand(CkanCommand): List currently enqueued jobs from the given queues. If no queue names are given then the jobs from all queues are listed. + paster jobs show ID + + Show details about a specific job. + paster jobs cancel ID Cancel a specific job. @@ -2638,6 +2642,8 @@ def command(self): self.worker() elif cmd == u'list': self.list() + elif cmd == u'show': + self.show() elif cmd == u'cancel': self.cancel() elif cmd == u'clear': @@ -2663,6 +2669,23 @@ def list(self): job[u'title'] = u'"{}"'.format(job[u'title']) print(u'{created} {id} {queue} {title}'.format(**job)) + def show(self): + if not self.args: + error(u'You must specify a job ID') + id = self.args[0] + try: + job = p.toolkit.get_action(u'job_show')({}, {u'id': id}) + except logic.NotFound: + error(u'There is no job with ID "{}"'.format(id)) + print(u'ID: {}'.format(job[u'id'])) + if job[u'title'] is None: + title = u'None' + else: + title = u'"{}"'.format(job[u'title']) + print(u'Title: {}'.format(title)) + print(u'Created: {}'.format(job[u'created'])) + print(u'Queue: {}'.format(job[u'queue'])) + def cancel(self): if not self.args: error(u'You must specify a job ID') diff --git a/ckan/tests/lib/test_cli.py b/ckan/tests/lib/test_cli.py index 6a706cf3a8a..cd6e2e03b9c 100644 --- a/ckan/tests/lib/test_cli.py +++ b/ckan/tests/lib/test_cli.py @@ -192,6 +192,28 @@ def test_list_filter(self): assert_not_in(u'q3', stdout) +class TestJobShow(helpers.RQTestBase): + ''' + Tests for ``paster jobs show``. + ''' + def test_show_existing(self): + ''' + Test ``jobs show`` for an existing job. + ''' + job = self.enqueue(queue=u'my_queue', title=u'My Title') + stdout = paster(u'jobs', u'show', job.id)[1] + assert_in(job.id, stdout) + assert_in(jobs.remove_queue_name_prefix(job.origin), stdout) + + def test_show_missing_id(self): + ''' + Test ``jobs show`` with a missing ID. + ''' + code, stdout, stderr = paster(u'jobs', u'show', fail_on_error=False) + neq(code, 0) + ok(stderr) + + class TestJobsCancel(helpers.RQTestBase): ''' Tests for ``paster jobs cancel``. @@ -217,6 +239,14 @@ def test_cancel_not_existing(self): neq(code, 0) assert_in(u'does-not-exist', stderr) + def test_cancel_missing_id(self): + ''' + Test ``jobs cancel`` with a missing ID. + ''' + code, stdout, stderr = paster(u'jobs', u'cancel', fail_on_error=False) + neq(code, 0) + ok(stderr) + class TestJobsClear(helpers.RQTestBase): ''' From a6788dbdc36a88d4201e6b3a67292adb6712c420 Mon Sep 17 00:00:00 2001 From: Florian Brucker Date: Thu, 28 Jul 2016 09:22:50 +0200 Subject: [PATCH 40/65] [#2977] Document new background job system. --- ckan/config/celery-supervisor.conf | 31 -- ckan/config/supervisor-ckan-worker.conf | 43 +++ ckan/lib/cli.py | 6 +- doc/api/index.rst | 2 + doc/extensions/best-practices.rst | 4 + doc/maintaining/background-tasks.rst | 412 +++++++++++++++------- doc/maintaining/installing/deployment.rst | 11 + doc/maintaining/paster.rst | 122 ++++++- 8 files changed, 466 insertions(+), 165 deletions(-) delete mode 100644 ckan/config/celery-supervisor.conf create mode 100644 ckan/config/supervisor-ckan-worker.conf diff --git a/ckan/config/celery-supervisor.conf b/ckan/config/celery-supervisor.conf deleted file mode 100644 index a70ed812803..00000000000 --- a/ckan/config/celery-supervisor.conf +++ /dev/null @@ -1,31 +0,0 @@ -; =============================== -; ckan celeryd supervisor example -; =============================== - -; symlink or copy this file to /etc/supervisr/conf.d -; change the path/to/virtualenv below to the virtualenv ckan is in. - - -[program:celery] -; Full Path to executable, should be path to virtural environment, -; Full path to config file too. - -command=/path/to/pyenv/bin/paster --plugin=ckan celeryd --config=/path/to/config/testing.ini - -; user that owns virtual environment. -user=ckan - -numprocs=1 -stdout_logfile=/var/log/celeryd.log -stderr_logfile=/var/log/celeryd.log -autostart=true -autorestart=true -startsecs=10 - -; Need to wait for currently executing tasks to finish at shutdown. -; Increase this if you have very long running tasks. -stopwaitsecs = 600 - -; if rabbitmq is supervised, set its priority higher -; so it starts first -priority=998 diff --git a/ckan/config/supervisor-ckan-worker.conf b/ckan/config/supervisor-ckan-worker.conf new file mode 100644 index 00000000000..1cf2ffb460a --- /dev/null +++ b/ckan/config/supervisor-ckan-worker.conf @@ -0,0 +1,43 @@ +; ======================================================= +; Supervisor configuration for CKAN background job worker +; ======================================================= + +; 1. Copy this file to /etc/supervisr/conf.d +; 2. Make sure the paths below match your setup + + +[program:ckan-worker] + +; Use the full paths to the virtualenv and your configuration file here. +command=/usr/lib/ckan/default/bin/paster --plugin=ckan jobs worker --config=/etc/ckan/default/production.ini + + +; User the worker runs as. +user=www-data + + +; Start just a single worker. Increase this number if you have many or +; particularly long running background jobs. +numprocs=1 +process_name=%(program_name)s-%(process_num)02d + + +; Log files. +stdout_logfile=/var/log/ckan-worker.log +stderr_logfile=/var/log/ckan-worker.log + + +; Make sure that the worker is started on system start and automatically +; restarted if it crashes unexpectedly. +autostart=true +autorestart=true + + +; Number of seconds the process has to run before it is considered to have +; started successfully. +startsecs=10 + +; Need to wait for currently executing tasks to finish at shutdown. +; Increase this if you have very long running tasks. +stopwaitsecs = 600 + diff --git a/ckan/lib/cli.py b/ckan/lib/cli.py index 691e5a5e606..888889a8fee 100644 --- a/ckan/lib/cli.py +++ b/ckan/lib/cli.py @@ -947,7 +947,7 @@ def purge(self, dataset_ref): class Celery(CkanCommand): - '''Celery daemon + '''Celery daemon [DEPRECATED] This command is DEPRECATED, use `paster jobs` instead. @@ -2602,7 +2602,9 @@ class JobsCommand(CkanCommand): paster jobs cancel ID - Cancel a specific job. + Cancel a specific job. Jobs can only be canceled while they are + enqueued. Once a worker has started executing a job it cannot + be aborted anymore. paster jobs clear [QUEUES] diff --git a/doc/api/index.rst b/doc/api/index.rst index fac6ede30ec..c5b2bce8848 100644 --- a/doc/api/index.rst +++ b/doc/api/index.rst @@ -1,3 +1,5 @@ +.. _action api: + ========= API guide ========= diff --git a/doc/extensions/best-practices.rst b/doc/extensions/best-practices.rst index 5fe67f5ee94..b1b7e7d1f6a 100644 --- a/doc/extensions/best-practices.rst +++ b/doc/extensions/best-practices.rst @@ -74,6 +74,10 @@ the name of your extension. For example: with the name of your extension. For example ``my_extension_foobarize_everything``. +* The names of *background job queues* introduced by your extension should + begin with the name of your extension. For example + ``my_extension:super-special-job-queue``. + In some situations, resources like databases may even be shared between multiple CKAN *instances*, which requires an even higher degree of uniqueness for the corresponding names. In that case, you should also prefix your diff --git a/doc/maintaining/background-tasks.rst b/doc/maintaining/background-tasks.rst index db43cdc4570..790782609e3 100644 --- a/doc/maintaining/background-tasks.rst +++ b/doc/maintaining/background-tasks.rst @@ -1,175 +1,349 @@ -================ -Background tasks -================ +.. _background jobs: -.. version-added: 1.5.1 - -CKAN allows you to create tasks that run in the 'background', that is -asynchronously and without blocking the main application (these tasks can also -be automatically retried in the case of transient failures). Such tasks can be +=============== +Background jobs +=============== +CKAN allows you to create jobs that run in the 'background', i.e. +asynchronously and without blocking the main application. Such jobs can be created in :doc:`Extensions ` or in core CKAN. -Background tasks can be essential to providing certain kinds of functionality, +Background jobs can be essential to providing certain kinds of functionality, for example: -* Creating webhooks that notify other services when certain changes occur (for +* Creating web-hooks that notify other services when certain changes occur (for example a dataset is updated) + * Performing processing or validation or on data (as done by the Archiver and DataStorer Extensions) +Basically, any piece of work that takes too long to perform while the main +application is waiting is a good candidate for a background job. + +.. note:: + + The current background job system is based on RQ_ and was introduced in + CKAN 2.6. See :ref:`background jobs migration` for details on how to + migrate your jobs from the previous system introduced in CKAN 1.5. + + .. _RQ: http://python-rq.org + + +.. _background jobs writing: + +Writing and enqueuing background jobs +===================================== + +.. note:: + + This section is only relevant for developers working on CKAN or an + extension. + +The core of a background job is a regular Python function. For example, here's +a very simply job function that logs a message:: + + import logging + + def log_job(msg, level=logging.INFO, logger=u'ckan'): + u''' + Background job to log a message. + ''' + logger = logging.getLogger(logger) + logger.log(level, msg) + + +And that's it. Your job function can use all the usual Python features. Just +keep in mind that your function will be run in a separate process by a +:ref:`worker `, so your function should not depend on +the current state of global variables, etc. Ideally your job function should +receive all the information it needs via its arguments. + +In addition, the module that contains your job function must be importable by +the worker, which must also be able to get the function from its module. This +means that nested functions, lambdas and instance methods cannot be used as job +functions. While class methods of top-level classes can be used it's best to +stick to ordinary module-level functions. + +.. note:: + + Background jobs do not support return values (since they run asynchronously + there is no place to return those values to). If your job function produces + a result then it needs to store that result, for example in a file or in + CKAN's database. + +Once you have a job function, all you need to do is to use +``ckan.lib.jobs.enqueue`` to create an actual job out of it:: + + import ckan.lib.jobs as jobs + + jobs.enqueue(log_job, [u'My log message']) + +This will place a job on the :ref:`job queue ` where it +can be picked up and executed by a worker. + +The first argument to ``enqueue`` is the job function to use. The second is a +list of the arguments which should be passed to the function. You can omit it +in which case no arguments will be passed. You can also pass keyword arguments +in a dict as the third argument:: + + jobs.enqueue(log_job, [u'My log message'], {u'logger': u'ckanext.foo'}) + +You can also give the job a title which can be useful for identifying it when +:ref:`managing the job queue `:: + + jobs.enqueue(log_job, [u'My log message'], title=u'My log job') + + +.. _background jobs workers: + +Running background jobs +======================= +Jobs are placed on the :ref:`job queue `, from which +they can be retrieved and executed. Since jobs are designed to run +asynchronously that happens in a separate process called a *worker*. + +After it has been started, a worker listens on the queue until a job is +enqueued. The worker then removes the job from the queue and executes it. +Afterwards the worker waits again for the next job to be enqueued. + +.. note:: + + Executed jobs are discarded. In particular, no information about past jobs + is kept. + +Workers can be started using the :ref:`paster jobs worker` command:: + + paster --plugin=ckan jobs worker --config=/etc/ckan/default/development.ini + +The worker process will run indefinitely (you can stop it using ``CTRL+C``). + +.. note:: + + You can run multiple workers if your setup uses many or particularly long + background jobs. + + +.. _background jobs supervisor: + +Using Supervisor +^^^^^^^^^^^^^^^^ +In a production setting, the worker should be run in a more robust way. One +possibility is to use Supervisor_. + +.. _Supervisor: http://supervisord.org/ + +First install Supervisor:: + + sudo apt-get install supervisor + +Next copy the configuration file template:: + + sudo cp /usr/lib/ckan/default/src/ckan/ckan/config/supervisor-ckan-worker.conf /etc/supervisor/conf.d + +Open ``/etc/supervisor/conf.d/supervisor-ckan-worker.conf`` in your favourite +text editor and make sure all the settings suit your needs. If you installed +CKAN in a non-default location (somewhere other than ``/usr/lib/ckan/default``) +then you will need to update the paths in the config file (see the comments in +the file for details). + +Restart Supervisor:: + + sudo service supervisor restart + +The worker should now be running. To check its status, use + +:: + + sudo supervisorctl status + +You can restart the worker via + +:: + + sudo supervisorctl restart ckan-worker:* + +To test that background jobs are processed correctly you can enqueue a test job +via + +:: + + paster --plugin=ckan jobs test -c /etc/ckan/default/production.ini + +The worker's log (``/var/log/ckan-worker.log``) should then show how the job +was processed by the worker. + +In case you run into problems, make sure to check the logs of Supervisor and +the worker:: + + cat /var/log/supervisor/supervisord.log + cat /var/log/ckan-worker.log + + + +.. _background jobs management: + +Managing background jobs +======================== +Once they are enqueued, background jobs can be managed via +:ref:`paster ` and the :ref:`web API `. -Enabling background tasks -========================= +List enqueues jobs +^^^^^^^^^^^^^^^^^^ +* :ref:`paster jobs list ` +* :py:func:`ckan.logic.action.get.job_list` -To manage and run background tasks requires a job queue and CKAN uses celery_ -(plus the CKAN database) for this purpose. Thus, to use background tasks you -need to install and run celery_. +Show details about a job +^^^^^^^^^^^^^^^^^^^^^^^^ +* :ref:`paster jobs show ` +* :py:func:`ckan.logic.action.get.job_show` -Installation of celery_ will normally be taken care of by whichever component -or extension utilizes it so we skip that here. +Cancel a job +^^^^^^^^^^^^ +A job that hasn't been processed yet can be canceled via -.. _celery: http://celeryproject.org/ +* :ref:`paster jobs cancel ` +* :py:func:`ckan.logic.action.delete.job_cancel` -To run the celery daemon you have two options: +Clear all enqueued jobs +^^^^^^^^^^^^^^^^^^^^^^^ +* :ref:`paster jobs clear ` +* :py:func:`ckan.logic.action.delete.job_clear` -1. In development setup you can just use paster. This can be done as simply - as:: +Logging +^^^^^^^ +Information about enqueued and processed background jobs is automatically +logged to the CKAN logs. You may need to update your logging configuration to +record messages at the *INFO* level for the messages to be stored. - paster celeryd +.. _background jobs queues: - This only works if you have a ``development.ini`` file in ckan root. +Background job queues +===================== +By default, all functionality related to background jobs uses a single job +queue that is specific to the current CKAN instance. However, in some +situations it is useful to have more than one queue. For example, you might +want to distinguish between short, urgent jobs and longer, less urgent ones. +The urgent jobs should be processed even if a long and less urgent job is +already running. -2. In production, the daemon should be run with a different ini file and be run - as an init script. The simplest way to do this is to install supervisor:: +For such scenarios, the job system supports multiple queues. To use a different +queue, all you have to do is pass the (arbitrary) queue name. For example, to +enqueue a job at a non-default queue:: - apt-get install supervisor + jobs.enqueue(log_job, [u"I'm from a different queue!"], + queue=u'my-own-queue') - Using this file as a template and copy to ``/etc/supservisor/conf.d``:: +Similarly, to start a worker that only listens to the queue you just posted a +job to:: - https://github.com/ckan/ckan/blob/master/ckan/config/celery-supervisor.conf + paster --plugin=ckan jobs worker my-own-queue --config=/etc/ckan/default/development.ini - Alternatively, you can run:: +See the documentation of the various functions and commands for details on how +to use non-standard queues. - paster celeryd --config=/path/to/file.ini +.. note:: + If you create a custom queue in your extension then you should prefix the + queue name using your extension's name. See :ref:`avoid name clashes`. -Writing background tasks -========================== + Queue names are internally automatically prefixed with the CKAN site ID, + so multiple parallel CKAN instances are not a problem. -These instructions should show you how to write an background task and how to -call it from inside CKAN or another extension using celery. -Examples --------- +.. _background jobs migration: -Here are some existing real examples of writing CKAN tasks: +Migrating from CKAN's previous background job system +==================================================== +Before version 2.6 (starting from 1.5), CKAN offered a different background job +system built around Celery_. That system is still available but deprecated and +will be removed in future versions of CKAN. You should therefore update your +code to use the new system described above. -* https://github.com/ckan/ckanext-archiver -* https://github.com/ckan/ckanext-qa -* https://github.com/ckan/ckanext-datastorer +.. _Celery: http://celeryproject.org/ -Setup ------ +Migrating existing job functions is easy. In the old system, a job function +would look like this:: -An entry point is required inside the ``setup.py`` for your extension, and so -you should add something resembling the following that points to a function in -a module. In this case the function is called task_imports in the -``ckanext.NAME.celery_import`` module:: + @celery.task(name=u'my_extension.echofunction') + def echo(message): + print message - entry_points = """ - [ckan.celery_task] - tasks = ckanext.NAME.celery_import:task_imports - """ +As :ref:`described above `, under the new system the +same function would be simply written as -The function, in this case ``task_imports`` should be a function that returns -fully qualified module paths to modules that contain the defined task (see the -next section). In this case we will put all of our tasks in a file called -``tasks.py`` and so ``task_imports`` should be in a file called -``ckanext/NAME/celery_import.py``:: +:: - def task_imports(): - return ['ckanext.NAME.tasks'] + def echo(message): + print message -This returns an iterable of all of the places to look to find tasks, in this -example we are only putting them in one place. +There is no need for a special decorator. In the new system there is also no +need for registering your tasks via ``setup.py``. +Migrating the code that enqueues a task is also easy. Previously it would look +like this:: -Implementing the tasks ----------------------- + celery.send_task(u'my_extension.echofunction', args=[u'Hello World'], + task_id=str(uuid.uuid4())) -The most straightforward way of defining tasks in our ``tasks.py`` module, is -to use the decorators provided by celery. These decorators make it easy to just -define a function and then give it a name and make it accessible to celery. -Make sure you import celery from ckan.lib.celery_app:: +With the new system, it looks as follows:: - from ckan.lib.celery_app import celery + import ckan.lib.jobs as jobs -Implement your function, specifying the arguments you wish it to take. For our -sample we will use a simple echo task that will print out its argument to the -console:: + jobs.enqueue(ckanext.my_extension.plugin.echo, [u'Hello World']) - def echo( message ): - print message +As you can see, the new system does not use strings to identify job functions +but uses the functions directly instead. There is also no need for creating a +job ID, that will be done automatically for you. -Next it is important to decorate your function with the celery task decorator. -You should give the task a name, which is used later on when calling the task:: - @celery.task(name = "NAME.echofunction") - def echo( message ): - print message +Supporting both systems at once +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Not all CKAN installations will immediately update to CKAN 2.6. It might +therefore make sense for you to support both the new and the old job system. +That way you are ready when the old system is removed but can continue to +support older CKAN installations. -That's it, your function is ready to be run asynchronously outside of the main -execution of the CKAN app. Next you should make sure you run ``python setup.py -develop`` in your extensions folder and then go to your CKAN installation -folder (normally pyenv/src/ckan/) to run the following command:: +Such a setup might look as follows. First split your Celery-based job +functions into the job itself and its Celery handler. That is, change - paster celeryd +:: -Once you have done this your task name ``NAME.echofunction`` should appear in -the list of tasks loaded. If it is there then you are all set and ready to go. -If not then you should try the following to try and resolve the problem: + @celery.task(name=u'my_extension.echofunction') + def echo(message): + print message -1. Make sure the entry point is defined correctly in your ``setup.py`` and that - you have executed ``python setup.py develop`` -2. Check that your task_imports function returns an iterable with valid module - names in -3. Ensure that the decorator marks the functions (if there is more than one - decorator, make sure the celery.task is the first one - which means it will - execute last). -4. If none of the above helps, go into #ckan on irc.freenode.net where there - should be people who can help you resolve your issue. +to -Calling the task ----------------- +:: -Now that the task is defined, and has been loaded by celery it is ready to be -called. To call a background task you need to know only the name of the task, -and the arguments that it expects as well as providing it a task id.:: + def echo(message): + print message - import uuid - from ckan.lib.celery_app import celery - celery.send_task("NAME.echofunction", args=["Hello World"], task_id=str(uuid.uuid4())) + @celery.task(name=u'my_extension.echofunction') + def echo_celery(*args, **kwargs): + echo(*args, **kwargs) -After executing this code you should see the message printed in the console -where you ran ``paster celeryd``. +That way, you can call ``echo`` using the new system and use the name for +Celery. +Then use the new system if it is available and fall back to Celery otherwise:: -Retrying on errors ------------------- + def compat_enqueue(name, fn, args=None): + u''' + Enqueue a background job using Celery or RQ. + ''' + try: + # Try to use RQ + from ckan.lib.jobs import enqueue + enqueue(fn, args=args) + except ImportError: + # Fallback to Celery + import uuid + from ckan.lib.celery_app import celery + celery.send_task(name, args=args, task_id=str(uuid.uuid4())) -Should your task fail to complete because of a transient error, it is possible -to ask celery to retry the task, after some period of time. The default wait -before retrying is three minutes, but you can optionally specify this in the -call to retry via the countdown parameter, and you can also specify the -exception that triggered the failure. For our example the call to retry would -look like the following - note that it calls the function name, not the task -name given in the decorator:: +Use that function as follows for enqueuing a job:: - try: - ... some work that may fail, http request? - except Exception, e: - # Retry again in 2 minutes - echo.retry(args=(message), exc=e, countdown=120, max_retries=10) + compat_enqueue(u'my_extension.echofunction', + ckanext.my_extension.plugin.echo, + [u'Hello World']) -If you don't want to wait a period of time you can use the eta datetime -parameter to specify an explicit time to run the task (i.e. 9AM tomorrow) diff --git a/doc/maintaining/installing/deployment.rst b/doc/maintaining/installing/deployment.rst index d99decffcd3..3f452072339 100644 --- a/doc/maintaining/installing/deployment.rst +++ b/doc/maintaining/installing/deployment.rst @@ -248,6 +248,17 @@ You should now be able to visit your server in a web browser and see your new CKAN instance. +-------------------------------------- +10. Setup a worker for background jobs +-------------------------------------- +CKAN uses asynchronous :ref:`background jobs` for long tasks. These jobs are +executed by a separate process which is called a :ref:`worker `. + +To run the worker in a robust way, :ref:`install and configure Supervisor +`. + + --------------- Troubleshooting --------------- diff --git a/doc/maintaining/paster.rst b/doc/maintaining/paster.rst index 3ec72316443..748c9feaedd 100644 --- a/doc/maintaining/paster.rst +++ b/doc/maintaining/paster.rst @@ -1,3 +1,5 @@ +.. _paster: + ====================== Command Line Interface ====================== @@ -169,7 +171,6 @@ Paster Commands Reference The following paster commands are supported by CKAN: ================= ============================================================ -celeryd Control celery daemon. check-po-files Check po files for common mistakes color Create or remove a color scheme. create-test-data Create test data in the database. @@ -177,6 +178,7 @@ dataset Manage datasets. datastore Perform commands to set up the datastore. db Perform various tasks on the database. front-end-build Creates and minifies css and JavaScript files +jobs Manage background jobs less Compile all root less documents into their CSS counterparts minify Create minified versions of the given Javascript and CSS files. notify Send out modification notifications. @@ -192,18 +194,6 @@ user Manage users. ================= ============================================================ -celeryd: Control celery daemon -============================== - -Usage:: - - celeryd - run the celery daemon - celeryd run concurrency - run the celery daemon with - argument 'concurrency' - celeryd view - view all tasks in the queue - celeryd clean - delete all tasks in the queue - - check-po-files: Check po files for common mistakes ================================================== @@ -268,6 +258,112 @@ Usage:: front-end-build +.. _paster jobs: + +jobs: Manage background jobs +============================ + +The ``jobs`` command can be used to manage :ref:`background jobs`. + + +.. _paster jobs worker: + +Run a background job worker +^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:: + + paster jobs worker [--burst] [QUEUES] + +Starts a worker that fetches job from the :ref:`job queues ` and executes them. If no queue names are given then it listens to +the default queue. This is equivalent to + +:: + + paster jobs worker default + +If queue names are given then the worker listens to those queues and only +those:: + + paster jobs worker my-custom-queue another-special-queue + +Hence, if you want the worker to listen to the default queue and some others +then you must list the default queue explicitly:: + + paster jobs worker default my-custom-queue + +If the ``--burst`` option is given then the worker will exit as soon as all its +queues are empty. Otherwise it will wait indefinitely until a new job is +enqueued (this is the default). + +.. note:: + + In a production setting you should :ref:`use a more robust way of running + background workers `. + + +.. _paster jobs list: + +List enqueued jobs +^^^^^^^^^^^^^^^^^^ +:: + + paster jobs list [QUEUES] + +Lists the currently enqueued jobs from the given :ref:`job queues `. If no queue names are given then the jobs from all queues are +listed. + + +.. _paster jobs show: + +Show details about a job +^^^^^^^^^^^^^^^^^^^^^^^^ +:: + + paster jobs show ID + +Shows details about the enqueued job with the given ID. + + +.. _paster jobs cancel: + +Cancel a job +^^^^^^^^^^^^ +:: + + paster jobs cancel ID + +Cancels the enqueued job with the given ID. Jobs can only be canceled while +they are enqueued. Once a worker has started executing a job it cannot be +aborted anymore. + + +.. _paster jobs clear: + +Clear job queues +^^^^^^^^^^^^^^^^ +:: + + paster jobs clear [QUEUES] + +Cancels all jobs on the given :ref:`job queues `. If no +queues are given then *all* queues are cleared. + + +.. _paster jobs test: + +Enqueue a test job +^^^^^^^^^^^^^^^^^^ +:: + + paster jobs test [QUEUES] + +Enqueues a test job. If no :ref:`job queues ` are given +then the job is added to the default queue. If queue names are given then a +separate test job is added to each of the queues. + + .. _less: less: Compile all root less documents into their CSS counterparts From d0b56a6896038cd475e26fd94581dc784e9a86f2 Mon Sep 17 00:00:00 2001 From: Florian Brucker Date: Thu, 28 Jul 2016 09:24:27 +0200 Subject: [PATCH 41/65] [#2977] Increase verbosity of background job worker output. This output doesn't go into the CKAN logs but into a separate worker log (`/var/log/ckan-worker.log` if the default Supervisor configuration is used). --- ckan/lib/jobs.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ckan/lib/jobs.py b/ckan/lib/jobs.py index ef1c1e36f7d..3282802b896 100644 --- a/ckan/lib/jobs.py +++ b/ckan/lib/jobs.py @@ -19,7 +19,7 @@ import logging from pylons import config -from rq import Queue, Worker as RqWorker +import rq from rq.connections import push_connection from rq.exceptions import NoSuchJobError from rq.job import Job @@ -89,7 +89,7 @@ def get_all_queues(): ''' redis_conn = _connect() prefix = _get_queue_name_prefix() - return [q for q in Queue.all(connection=redis_conn) if + return [q for q in rq.Queue.all(connection=redis_conn) if q.name.startswith(prefix)] @@ -114,7 +114,7 @@ def get_queue(name=DEFAULT_QUEUE_NAME): except KeyError: log.debug(u'Initializing background job queue "{}"'.format(name)) redis_conn = _connect() - queue = _queues[fullname] = Queue(fullname, connection=redis_conn) + queue = _queues[fullname] = rq.Queue(fullname, connection=redis_conn) return queue @@ -200,7 +200,7 @@ def test_job(*args): print(args) -class Worker(RqWorker): +class Worker(rq.Worker): u''' CKAN-specific worker. ''' @@ -218,6 +218,7 @@ def __init__(self, queues=None, *args, **kwargs): ''' queues = queues or [DEFAULT_QUEUE_NAME] queues = [get_queue(q) for q in ensure_list(queues)] + rq.worker.logger.setLevel(logging.INFO) super(Worker, self).__init__(queues, *args, **kwargs) def register_birth(self, *args, **kwargs): From 958db3acc5180f4dafca873a366eb9aeb77ee335 Mon Sep 17 00:00:00 2001 From: Florian Brucker Date: Thu, 28 Jul 2016 09:45:40 +0200 Subject: [PATCH 42/65] [#2977] Document in which CKAN version new features were added. --- ckan/lib/jobs.py | 2 ++ ckan/lib/redis.py | 2 ++ ckan/logic/action/delete.py | 4 ++++ ckan/logic/action/get.py | 4 ++++ doc/maintaining/configuration.rst | 2 ++ doc/maintaining/paster.rst | 2 ++ 6 files changed, 16 insertions(+) diff --git a/ckan/lib/jobs.py b/ckan/lib/jobs.py index 3282802b896..3223f948b0e 100644 --- a/ckan/lib/jobs.py +++ b/ckan/lib/jobs.py @@ -14,6 +14,8 @@ raw RQ objects (e.g. a queue returned by ``get_queue``) use the full, prefixed names. Use the functions ``add_queue_name_prefix`` and ``remove_queue_name_prefix`` to manage queue name prefixes. + +.. versionadded:: 2.6 ''' import logging diff --git a/ckan/lib/redis.py b/ckan/lib/redis.py index 3666ead9f8c..f0510afac31 100644 --- a/ckan/lib/redis.py +++ b/ckan/lib/redis.py @@ -2,6 +2,8 @@ u''' Redis utilities. + +.. versionadded:: 2.6 ''' from __future__ import absolute_import diff --git a/ckan/logic/action/delete.py b/ckan/logic/action/delete.py index b9294dc1556..d98bd054822 100644 --- a/ckan/logic/action/delete.py +++ b/ckan/logic/action/delete.py @@ -720,6 +720,8 @@ def job_clear(context, data_dict): :returns: The cleared queues. :rtype: list + + .. versionadded:: 2.6 ''' _check_access(u'job_clear', context, data_dict) queues = data_dict.get(u'queues') @@ -740,6 +742,8 @@ def job_cancel(context, data_dict): Removes the job from the queue and deletes it. :param string id: The ID of the background job. + + .. versionadded:: 2.6 ''' _check_access(u'job_cancel', context, data_dict) id = _get_or_bust(data_dict, u'id') diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index c9fdb4c0e08..7ca225d9a89 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -3514,6 +3514,8 @@ def job_list(context, data_dict): :returns: The currently enqueued background jobs. :rtype: list + + .. versionadded:: 2.6 ''' _check_access(u'job_list', context, data_dict) dictized_jobs = [] @@ -3535,6 +3537,8 @@ def job_show(context, data_dict): :returns: Details about the background job. :rtype: dict + + .. versionadded:: 2.6 ''' _check_access(u'job_show', context, data_dict) id = _get_or_bust(data_dict, u'id') diff --git a/doc/maintaining/configuration.rst b/doc/maintaining/configuration.rst index c07ada7c8cc..1ea2d8c5a03 100644 --- a/doc/maintaining/configuration.rst +++ b/doc/maintaining/configuration.rst @@ -711,6 +711,8 @@ Default value: ``redis://localhost:6379/0`` URL to your Redis instance, including the database to be used. +.. versionadded:: 2.6 + CORS Settings ------------- diff --git a/doc/maintaining/paster.rst b/doc/maintaining/paster.rst index 748c9feaedd..25e1a46b287 100644 --- a/doc/maintaining/paster.rst +++ b/doc/maintaining/paster.rst @@ -265,6 +265,8 @@ jobs: Manage background jobs The ``jobs`` command can be used to manage :ref:`background jobs`. +.. versionadded:: 2.6 + .. _paster jobs worker: From 6379585216e47de0c3a7eaff7f0efc7408c75d89 Mon Sep 17 00:00:00 2001 From: Florian Brucker Date: Mon, 22 Aug 2016 15:02:49 +0200 Subject: [PATCH 43/65] [#2977] Rename configuration option `redis_url` to `ckan.redis.url`. --- ckan/config/deployment.ini_tmpl | 2 +- ckan/config/environment.py | 2 +- ckan/lib/redis.py | 2 +- doc/contributing/test.rst | 4 ++-- doc/extensions/best-practices.rst | 2 +- doc/maintaining/configuration.rst | 8 ++++---- test-core.ini | 2 +- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/ckan/config/deployment.ini_tmpl b/ckan/config/deployment.ini_tmpl index 68c328e5b93..ee59c86e329 100644 --- a/ckan/config/deployment.ini_tmpl +++ b/ckan/config/deployment.ini_tmpl @@ -82,7 +82,7 @@ ckan.site_id = default ## Redis Settings # URL to your Redis instance, including the database to be used. -#redis_url = redis://localhost:6379/0 +#ckan.redis.url = redis://localhost:6379/0 ## CORS Settings diff --git a/ckan/config/environment.py b/ckan/config/environment.py index cc19d1e9bd7..fb5f7d2cdc9 100644 --- a/ckan/config/environment.py +++ b/ckan/config/environment.py @@ -110,7 +110,7 @@ def find_controller(self, controller): 'sqlalchemy.url': 'CKAN_SQLALCHEMY_URL', 'ckan.datastore.write_url': 'CKAN_DATASTORE_WRITE_URL', 'ckan.datastore.read_url': 'CKAN_DATASTORE_READ_URL', - 'redis_url': 'CKAN_REDIS_URL', + 'ckan.redis.url': 'CKAN_REDIS_URL', 'solr_url': 'CKAN_SOLR_URL', 'ckan.site_id': 'CKAN_SITE_ID', 'ckan.site_url': 'CKAN_SITE_URL', diff --git a/ckan/lib/redis.py b/ckan/lib/redis.py index f0510afac31..d3c33098cbf 100644 --- a/ckan/lib/redis.py +++ b/ckan/lib/redis.py @@ -17,7 +17,7 @@ log = logging.getLogger(__name__) -REDIS_URL_SETTING_NAME = u'redis_url' +REDIS_URL_SETTING_NAME = u'ckan.redis.url' REDIS_URL_DEFAULT_VALUE = u'redis://localhost:6379/0' diff --git a/doc/contributing/test.rst b/doc/contributing/test.rst index d060936dd64..bcbf0adcac6 100644 --- a/doc/contributing/test.rst +++ b/doc/contributing/test.rst @@ -60,8 +60,8 @@ Create test databases: This database connection is specified in the ``test-core.ini`` file by the ``sqlalchemy.url`` parameter. -You should also make sure that the :ref:`Redis database ` configured -in ``test-core.ini`` is different from your production database. +You should also make sure that the :ref:`Redis database ` +configured in ``test-core.ini`` is different from your production database. ~~~~~~~~~~~~~ Run the tests diff --git a/doc/extensions/best-practices.rst b/doc/extensions/best-practices.rst index b1b7e7d1f6a..67e341ea3ac 100644 --- a/doc/extensions/best-practices.rst +++ b/doc/extensions/best-practices.rst @@ -88,7 +88,7 @@ identifiers with the CKAN site ID, which is available via from pylons import config site_id = config[u'ckan.site_id'] -Currently this only affects the :ref:`Redis database `: +Currently this only affects the :ref:`Redis database `: * All *keys in the Redis database* created by your extension should be prefixed with both the CKAN site ID and your extension's name. diff --git a/doc/maintaining/configuration.rst b/doc/maintaining/configuration.rst index 1ea2d8c5a03..616769ba9de 100644 --- a/doc/maintaining/configuration.rst +++ b/doc/maintaining/configuration.rst @@ -698,14 +698,14 @@ List of the extra resource fields that would be used when searching. Redis Settings --------------- -.. _redis_url: +.. _ckan_redis_url: -redis_url -^^^^^^^^^ +ckan.redis.url +^^^^^^^^^^^^^^ Example:: - redis_url = redis://localhost:7000/1 + ckan.redis.url = redis://localhost:7000/1 Default value: ``redis://localhost:6379/0`` diff --git a/test-core.ini b/test-core.ini index 8b63bf9e530..97426d8f17d 100644 --- a/test-core.ini +++ b/test-core.ini @@ -32,7 +32,7 @@ ckan.datapusher.formats = csv xls xlsx tsv application/csv application/vnd.ms-ex solr_url = http://127.0.0.1:8983/solr/ckan # Redis URL. Use a separate Redis database for testing. -redis_url = redis://localhost:6379/1 +ckan.redis.url = redis://localhost:6379/1 ckan.auth.user_create_organizations = true ckan.auth.user_create_groups = true From 4fcc8c1563844853769a96911df234bada8a6e6d Mon Sep 17 00:00:00 2001 From: Florian Brucker Date: Mon, 22 Aug 2016 15:09:05 +0200 Subject: [PATCH 44/65] [#2977] Remove `ckan.tests.helpers.temp_file` context manager. In most cases one can simply use `tempfile.NamedTemporaryFile` instead. --- ckan/tests/helpers.py | 25 ------------------------- ckan/tests/lib/test_cli.py | 7 ++++--- 2 files changed, 4 insertions(+), 28 deletions(-) diff --git a/ckan/tests/helpers.py b/ckan/tests/helpers.py index c0e7e2752f5..c1bb355abd7 100644 --- a/ckan/tests/helpers.py +++ b/ckan/tests/helpers.py @@ -27,7 +27,6 @@ import logging import os import re -import tempfile import webtest import nose.tools @@ -666,27 +665,3 @@ def clear(self): Clear all captured log messages. ''' self.messages = collections.defaultdict(list) - - -@contextlib.contextmanager -def temp_file(*args, **kwargs): - u''' - Context manager that provides a temporary file. - - The temporary file is named and open. It is automatically deleted - when the context manager is left if it still exists at that point. - - Any arguments are passed on to - :py:func:`tempfile.NamedTemporaryFile`. - ''' - kwargs['delete'] = False - f = tempfile.NamedTemporaryFile(*args, **kwargs) - try: - yield f - finally: - f.close() - try: - os.remove(f.name) - except OSError as e: - if e.errno != errno.ENOENT: - raise diff --git a/ckan/tests/lib/test_cli.py b/ckan/tests/lib/test_cli.py index cd6e2e03b9c..7d360284082 100644 --- a/ckan/tests/lib/test_cli.py +++ b/ckan/tests/lib/test_cli.py @@ -6,6 +6,7 @@ import os.path from StringIO import StringIO import sys +import tempfile from nose.tools import (assert_raises, eq_ as eq, ok_ as ok, assert_in, assert_not_in, assert_not_equal as neq, assert_false as nok) @@ -320,7 +321,7 @@ def test_worker_default_queue(self): ''' Test ``jobs worker`` with the default queue. ''' - with helpers.temp_file() as f: + with tempfile.NamedTemporaryFile(delete=False) as f: self.enqueue(os.remove, args=[f.name]) paster(u'jobs', u'worker', u'--burst') all_jobs = self.all_jobs() @@ -331,8 +332,8 @@ def test_worker_specific_queues(self): ''' Test ``jobs worker`` with specific queues. ''' - with helpers.temp_file() as f: - with helpers.temp_file() as g: + with tempfile.NamedTemporaryFile(delete=False) as f: + with tempfile.NamedTemporaryFile(delete=False) as g: job1 = self.enqueue() job2 = self.enqueue(queue=u'q2') self.enqueue(os.remove, args=[f.name], queue=u'q3') From 4bd5521812fab111dfa8708cb629e96f27abc260 Mon Sep 17 00:00:00 2001 From: Florian Brucker Date: Mon, 22 Aug 2016 15:12:39 +0200 Subject: [PATCH 45/65] [#2977] Improve docs of `ckan.tests.helpers.RecordingLogHandler` Mention that it's usually best to simply use the `ckan.tests.recorded_logs` context manager. --- ckan/tests/helpers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ckan/tests/helpers.py b/ckan/tests/helpers.py index c1bb355abd7..5f8e61dba0a 100644 --- a/ckan/tests/helpers.py +++ b/ckan/tests/helpers.py @@ -623,6 +623,9 @@ class RecordingLogHandler(logging.Handler): You can inspect the recorded messages via the ``messages`` attribute (a dict that maps log levels to lists of messages) or by using ``assert_log``. + + This class is rarely useful on its own, instead use + :py:func:`recorded_logs` to temporarily record log messages. ''' def __init__(self, *args, **kwargs): super(RecordingLogHandler, self).__init__(*args, **kwargs) From 74aabf9d7159bd74b0f933725f2ac11bb4da95f2 Mon Sep 17 00:00:00 2001 From: Florian Brucker Date: Mon, 12 Sep 2016 13:53:31 +0200 Subject: [PATCH 46/65] [#2977] Add `ckan.plugins.toolkit.enqueue_job`. --- ckan/plugins/toolkit.py | 4 ++++ doc/extensions/best-practices.rst | 2 ++ doc/extensions/plugins-toolkit.rst | 2 ++ doc/maintaining/background-tasks.rst | 7 +++++++ 4 files changed, 15 insertions(+) diff --git a/ckan/plugins/toolkit.py b/ckan/plugins/toolkit.py index 899763806ea..ac475ed5811 100644 --- a/ckan/plugins/toolkit.py +++ b/ckan/plugins/toolkit.py @@ -96,6 +96,8 @@ class _Toolkit(object): 'auth_disallow_anonymous_access', # Helper not found error. 'HelperError', + # Enqueue background job + 'enqueue_job', # Fully defined in this file ## 'add_template_directory', @@ -134,6 +136,7 @@ def _initialize(self): CkanVersionException, HelperError ) + from ckan.lib.jobs import enqueue as enqueue_job from paste.deploy import converters import pylons @@ -271,6 +274,7 @@ def _initialize(self): t['check_ckan_version'] = self._check_ckan_version t['CkanVersionException'] = CkanVersionException t['HelperError'] = HelperError + t['enqueue_job'] = enqueue_job # check contents list correct errors = set(t).symmetric_difference(set(self.contents)) diff --git a/doc/extensions/best-practices.rst b/doc/extensions/best-practices.rst index 67e341ea3ac..e3a8fecda2f 100644 --- a/doc/extensions/best-practices.rst +++ b/doc/extensions/best-practices.rst @@ -10,6 +10,8 @@ Follow CKAN's coding standards See :doc:`/contributing/index`. +.. _use the plugins toolkit: + ------------------------------------------------- Use the plugins toolkit instead of importing CKAN ------------------------------------------------- diff --git a/doc/extensions/plugins-toolkit.rst b/doc/extensions/plugins-toolkit.rst index fea2fc57421..905a11954df 100644 --- a/doc/extensions/plugins-toolkit.rst +++ b/doc/extensions/plugins-toolkit.rst @@ -1,3 +1,5 @@ +.. py:module:: ckan.plugins.toolkit + ------------------------- Plugins toolkit reference ------------------------- diff --git a/doc/maintaining/background-tasks.rst b/doc/maintaining/background-tasks.rst index 790782609e3..c5641dee57e 100644 --- a/doc/maintaining/background-tasks.rst +++ b/doc/maintaining/background-tasks.rst @@ -80,6 +80,13 @@ Once you have a job function, all you need to do is to use This will place a job on the :ref:`job queue ` where it can be picked up and executed by a worker. +.. note:: + + Extensions should use :py:func:`ckan.plugins.toolkit.enqueue_job` instead. + It's the same function but accessing it via :py:mod:`ckan.plugins.toolkit` + :ref:`decouples your code from CKAN's internal structure `. + The first argument to ``enqueue`` is the job function to use. The second is a list of the arguments which should be passed to the function. You can omit it in which case no arguments will be passed. You can also pass keyword arguments From 4ef202b7423c8e64db4b0cf758fb7fc946255843 Mon Sep 17 00:00:00 2001 From: Florian Brucker Date: Mon, 12 Sep 2016 14:28:35 +0200 Subject: [PATCH 47/65] [#2977] Improve wording of best practice regarding name clashes. The documentation previously talked about sharing databases which could be misunderstood as the possibility of sharing PostgreSQL databases between CKAN instances (instead of sharing Redis databases, which was the intended meaning). --- doc/extensions/best-practices.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/extensions/best-practices.rst b/doc/extensions/best-practices.rst index e3a8fecda2f..c291313fd81 100644 --- a/doc/extensions/best-practices.rst +++ b/doc/extensions/best-practices.rst @@ -80,10 +80,10 @@ the name of your extension. For example: begin with the name of your extension. For example ``my_extension:super-special-job-queue``. -In some situations, resources like databases may even be shared between -multiple CKAN *instances*, which requires an even higher degree of uniqueness -for the corresponding names. In that case, you should also prefix your -identifiers with the CKAN site ID, which is available via +In some situations, a resource may even be shared between multiple CKAN +*instances*, which requires an even higher degree of uniqueness for the +corresponding names. In that case, you should also prefix your identifiers with +the CKAN site ID, which is available via :: From 18e140b5c07e952630747dbd1789d0b089b5e5f4 Mon Sep 17 00:00:00 2001 From: Florian Brucker Date: Mon, 12 Sep 2016 14:34:39 +0200 Subject: [PATCH 48/65] [#2977] Don't remove ckan/config/celery-supervisor.conf after all. A previous commit for #2977 removed `ckan/config/celery-supervisor.conf` as part of deprecating the old Celery background task system. However, the old documentation told people to copy *or link* that file, so removing it could break existing installations. Hence this commit restores the file, it should be kept around until support for the Celery system is removed completely. --- ckan/config/celery-supervisor.conf | 31 ++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 ckan/config/celery-supervisor.conf diff --git a/ckan/config/celery-supervisor.conf b/ckan/config/celery-supervisor.conf new file mode 100644 index 00000000000..a70ed812803 --- /dev/null +++ b/ckan/config/celery-supervisor.conf @@ -0,0 +1,31 @@ +; =============================== +; ckan celeryd supervisor example +; =============================== + +; symlink or copy this file to /etc/supervisr/conf.d +; change the path/to/virtualenv below to the virtualenv ckan is in. + + +[program:celery] +; Full Path to executable, should be path to virtural environment, +; Full path to config file too. + +command=/path/to/pyenv/bin/paster --plugin=ckan celeryd --config=/path/to/config/testing.ini + +; user that owns virtual environment. +user=ckan + +numprocs=1 +stdout_logfile=/var/log/celeryd.log +stderr_logfile=/var/log/celeryd.log +autostart=true +autorestart=true +startsecs=10 + +; Need to wait for currently executing tasks to finish at shutdown. +; Increase this if you have very long running tasks. +stopwaitsecs = 600 + +; if rabbitmq is supervised, set its priority higher +; so it starts first +priority=998 From e855b55ad7b69a72057ea4b1c9d52cbe9a40d73c Mon Sep 17 00:00:00 2001 From: Florian Brucker Date: Mon, 12 Sep 2016 17:49:07 +0200 Subject: [PATCH 49/65] [#2977] Remove CKAN prefix from suggested configuration option name. The docs now suggest to use `my_extension.my_setting` instead of the previously suggested `ckan.my_extension.my_setting`. --- doc/extensions/best-practices.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/extensions/best-practices.rst b/doc/extensions/best-practices.rst index c291313fd81..d3ae992f04e 100644 --- a/doc/extensions/best-practices.rst +++ b/doc/extensions/best-practices.rst @@ -50,7 +50,7 @@ conflicts you should prefix any public name that your extension introduces with the name of your extension. For example: * The names of *configuration settings* introduced by your extension should - have the form ``ckan.my_extension.my_config_setting``. + have the form ``my_extension.my_config_setting``. * The names of *templates and template snippets* introduced by your extension should begin with the name of your extension:: From b9b1bd66c0deb0288c752af28eb4e164f3adb656 Mon Sep 17 00:00:00 2001 From: Jared Smith Date: Thu, 8 Sep 2016 14:24:19 -0700 Subject: [PATCH 50/65] Issue #147 - Activity Time stored in UTC bcgov/ckanext-bcgov#147 ckan/ckan#2882 ckan/ckan#2970 The Activity model now stores its timestamp in utc In Formatters, localized_nice_date, removed the datetime replace method calls with actually ensuring the comparing timestamps have a timezone specified fixing typo --- ckan/lib/formatters.py | 10 ++++------ ckan/model/activity.py | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/ckan/lib/formatters.py b/ckan/lib/formatters.py index f731144c606..ccf9f437439 100644 --- a/ckan/lib/formatters.py +++ b/ckan/lib/formatters.py @@ -99,12 +99,10 @@ def months_between(date1, date2): return months if not show_date: - now = datetime.datetime.utcnow() - if datetime_.tzinfo is not None: - now = now.replace(tzinfo=datetime_.tzinfo) - else: - now = now.replace(tzinfo=pytz.utc) - datetime_ = datetime_.replace(tzinfo=pytz.utc) + now = datetime.datetime.now(pytz.utc) + if datetime_.tzinfo is None: + datetime_ = datetime_.astimezone(pytz.utc) + date_diff = now - datetime_ days = date_diff.days if days < 1 and now > datetime_: diff --git a/ckan/model/activity.py b/ckan/model/activity.py index cfd6d02e2a5..fdfa52660cc 100644 --- a/ckan/model/activity.py +++ b/ckan/model/activity.py @@ -40,7 +40,7 @@ class Activity(domain_object.DomainObject): def __init__(self, user_id, object_id, revision_id, activity_type, data=None): self.id = _types.make_uuid() - self.timestamp = datetime.datetime.now() + self.timestamp = datetime.datetime.utcnow() self.user_id = user_id self.object_id = object_id self.revision_id = revision_id From 9733d90b0f00b3ae999672990130ecf29e488598 Mon Sep 17 00:00:00 2001 From: Jared Smith Date: Mon, 12 Sep 2016 15:41:52 -0700 Subject: [PATCH 51/65] Adding migration script to adjust current activity timestamps to utc --- .../085_adjust_activity_timestamps.py | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 ckan/migration/versions/085_adjust_activity_timestamps.py diff --git a/ckan/migration/versions/085_adjust_activity_timestamps.py b/ckan/migration/versions/085_adjust_activity_timestamps.py new file mode 100644 index 00000000000..2c95ace6b5e --- /dev/null +++ b/ckan/migration/versions/085_adjust_activity_timestamps.py @@ -0,0 +1,33 @@ +# encoding: utf-8 + +import pytz +import tzlocal +import logging + +log = logging.getLogger(__name__) + +def upgrade(migrate_engine): +""" +The script assumes that the current timestamp was recorded with the server's current set timezone +""" + local_tz = tzlocal.get_localzone() + + with migrate_engine.begin() as connection: + sql = "select id, timestamp from activity" + results = connection.execute(sql) + + log.info("Adjusting Activity timestamp, server's localtime zone: {tz}".format(tz=local_tz)) + + for row in results: + id, timestamp = row + + timestamp = timestamp.replace(tzinfo=local_tz) + to_utc = timestamp.astimezone(pytz.utc) + to_utc = to_utc.replace(tzinfo=None) + + update_sql = "update activity set timestamp = %s where id = %s" + + connection.execute(update_sql, to_utc, id) + + log.info("""Adjusting Activity timestamp to UTC for {id}: {old_ts} --> {new_ts} + """.format(id=id, old_ts=timestamp, new_ts=to_utc)) From c3cf34e9b852f7c0198bd2ea1cacf6a5b13ebb34 Mon Sep 17 00:00:00 2001 From: Florian Brucker Date: Tue, 13 Sep 2016 14:28:54 +0200 Subject: [PATCH 52/65] [#2977] Change CKAN release from 2.6 to 2.7. --- ckan/lib/jobs.py | 2 +- ckan/lib/redis.py | 2 +- ckan/logic/action/delete.py | 4 ++-- ckan/logic/action/get.py | 4 ++-- doc/maintaining/background-tasks.rst | 6 +++--- doc/maintaining/configuration.rst | 2 +- doc/maintaining/paster.rst | 2 +- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/ckan/lib/jobs.py b/ckan/lib/jobs.py index 3223f948b0e..cd06bde46af 100644 --- a/ckan/lib/jobs.py +++ b/ckan/lib/jobs.py @@ -15,7 +15,7 @@ prefixed names. Use the functions ``add_queue_name_prefix`` and ``remove_queue_name_prefix`` to manage queue name prefixes. -.. versionadded:: 2.6 +.. versionadded:: 2.7 ''' import logging diff --git a/ckan/lib/redis.py b/ckan/lib/redis.py index d3c33098cbf..77779bbcb4f 100644 --- a/ckan/lib/redis.py +++ b/ckan/lib/redis.py @@ -3,7 +3,7 @@ u''' Redis utilities. -.. versionadded:: 2.6 +.. versionadded:: 2.7 ''' from __future__ import absolute_import diff --git a/ckan/logic/action/delete.py b/ckan/logic/action/delete.py index d98bd054822..e4529d5ff0a 100644 --- a/ckan/logic/action/delete.py +++ b/ckan/logic/action/delete.py @@ -721,7 +721,7 @@ def job_clear(context, data_dict): :returns: The cleared queues. :rtype: list - .. versionadded:: 2.6 + .. versionadded:: 2.7 ''' _check_access(u'job_clear', context, data_dict) queues = data_dict.get(u'queues') @@ -743,7 +743,7 @@ def job_cancel(context, data_dict): :param string id: The ID of the background job. - .. versionadded:: 2.6 + .. versionadded:: 2.7 ''' _check_access(u'job_cancel', context, data_dict) id = _get_or_bust(data_dict, u'id') diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 7ca225d9a89..39c7f40cd62 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -3515,7 +3515,7 @@ def job_list(context, data_dict): :returns: The currently enqueued background jobs. :rtype: list - .. versionadded:: 2.6 + .. versionadded:: 2.7 ''' _check_access(u'job_list', context, data_dict) dictized_jobs = [] @@ -3538,7 +3538,7 @@ def job_show(context, data_dict): :returns: Details about the background job. :rtype: dict - .. versionadded:: 2.6 + .. versionadded:: 2.7 ''' _check_access(u'job_show', context, data_dict) id = _get_or_bust(data_dict, u'id') diff --git a/doc/maintaining/background-tasks.rst b/doc/maintaining/background-tasks.rst index c5641dee57e..8c68c380dca 100644 --- a/doc/maintaining/background-tasks.rst +++ b/doc/maintaining/background-tasks.rst @@ -22,7 +22,7 @@ application is waiting is a good candidate for a background job. .. note:: The current background job system is based on RQ_ and was introduced in - CKAN 2.6. See :ref:`background jobs migration` for details on how to + CKAN 2.7. See :ref:`background jobs migration` for details on how to migrate your jobs from the previous system introduced in CKAN 1.5. .. _RQ: http://python-rq.org @@ -260,7 +260,7 @@ to use non-standard queues. Migrating from CKAN's previous background job system ==================================================== -Before version 2.6 (starting from 1.5), CKAN offered a different background job +Before version 2.7 (starting from 1.5), CKAN offered a different background job system built around Celery_. That system is still available but deprecated and will be removed in future versions of CKAN. You should therefore update your code to use the new system described above. @@ -304,7 +304,7 @@ job ID, that will be done automatically for you. Supporting both systems at once ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Not all CKAN installations will immediately update to CKAN 2.6. It might +Not all CKAN installations will immediately update to CKAN 2.7. It might therefore make sense for you to support both the new and the old job system. That way you are ready when the old system is removed but can continue to support older CKAN installations. diff --git a/doc/maintaining/configuration.rst b/doc/maintaining/configuration.rst index 616769ba9de..35596ba442c 100644 --- a/doc/maintaining/configuration.rst +++ b/doc/maintaining/configuration.rst @@ -711,7 +711,7 @@ Default value: ``redis://localhost:6379/0`` URL to your Redis instance, including the database to be used. -.. versionadded:: 2.6 +.. versionadded:: 2.7 CORS Settings diff --git a/doc/maintaining/paster.rst b/doc/maintaining/paster.rst index 25e1a46b287..88ac3b769e9 100644 --- a/doc/maintaining/paster.rst +++ b/doc/maintaining/paster.rst @@ -265,7 +265,7 @@ jobs: Manage background jobs The ``jobs`` command can be used to manage :ref:`background jobs`. -.. versionadded:: 2.6 +.. versionadded:: 2.7 .. _paster jobs worker: From 9c04a786604a286d19cda89caef07837941db68d Mon Sep 17 00:00:00 2001 From: Jared Smith Date: Tue, 13 Sep 2016 10:01:37 -0700 Subject: [PATCH 53/65] Using only sql statement, removed python and logging --- .../085_adjust_activity_timestamps.py | 35 +++++-------------- 1 file changed, 8 insertions(+), 27 deletions(-) diff --git a/ckan/migration/versions/085_adjust_activity_timestamps.py b/ckan/migration/versions/085_adjust_activity_timestamps.py index 2c95ace6b5e..d822a659677 100644 --- a/ckan/migration/versions/085_adjust_activity_timestamps.py +++ b/ckan/migration/versions/085_adjust_activity_timestamps.py @@ -1,33 +1,14 @@ # encoding: utf-8 -import pytz -import tzlocal -import logging - -log = logging.getLogger(__name__) +import datetime def upgrade(migrate_engine): -""" -The script assumes that the current timestamp was recorded with the server's current set timezone -""" - local_tz = tzlocal.get_localzone() + """ + The script assumes that the current timestamp was recorded with the server's current set timezone + """ + utc_now = datetime.datetime.utcnow() + local_now = datetime.datetime.now() with migrate_engine.begin() as connection: - sql = "select id, timestamp from activity" - results = connection.execute(sql) - - log.info("Adjusting Activity timestamp, server's localtime zone: {tz}".format(tz=local_tz)) - - for row in results: - id, timestamp = row - - timestamp = timestamp.replace(tzinfo=local_tz) - to_utc = timestamp.astimezone(pytz.utc) - to_utc = to_utc.replace(tzinfo=None) - - update_sql = "update activity set timestamp = %s where id = %s" - - connection.execute(update_sql, to_utc, id) - - log.info("""Adjusting Activity timestamp to UTC for {id}: {old_ts} --> {new_ts} - """.format(id=id, old_ts=timestamp, new_ts=to_utc)) + sql = "update activity set timestamp = timestamp + (%s - %s);" + connection.execute(sql, utc_now, local_now) From b6d51cb1712d3ea3886df22d43b9a35b666170ea Mon Sep 17 00:00:00 2001 From: Jared Smith Date: Tue, 13 Sep 2016 10:30:41 -0700 Subject: [PATCH 54/65] Fixed pep8 issues --- ckan/migration/versions/085_adjust_activity_timestamps.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ckan/migration/versions/085_adjust_activity_timestamps.py b/ckan/migration/versions/085_adjust_activity_timestamps.py index d822a659677..6cfc1d4a295 100644 --- a/ckan/migration/versions/085_adjust_activity_timestamps.py +++ b/ckan/migration/versions/085_adjust_activity_timestamps.py @@ -2,9 +2,11 @@ import datetime + def upgrade(migrate_engine): """ - The script assumes that the current timestamp was recorded with the server's current set timezone + The script assumes that the current timestamp was + recorded with the server's current set timezone """ utc_now = datetime.datetime.utcnow() local_now = datetime.datetime.now() From 596f5fdb1696fbc4e1714db7bd574a702b9d4998 Mon Sep 17 00:00:00 2001 From: Jared Smith Date: Tue, 13 Sep 2016 10:53:10 -0700 Subject: [PATCH 55/65] Need to use replace for naive timestamps --- ckan/lib/formatters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckan/lib/formatters.py b/ckan/lib/formatters.py index ccf9f437439..6c927a53399 100644 --- a/ckan/lib/formatters.py +++ b/ckan/lib/formatters.py @@ -101,7 +101,7 @@ def months_between(date1, date2): if not show_date: now = datetime.datetime.now(pytz.utc) if datetime_.tzinfo is None: - datetime_ = datetime_.astimezone(pytz.utc) + datetime_ = datetime_.replace(tzinfo=pytz.utc) date_diff = now - datetime_ days = date_diff.days From 29d88a8d7ae607bfda552164fee1035729c40fec Mon Sep 17 00:00:00 2001 From: Jared Smith Date: Tue, 13 Sep 2016 10:56:36 -0700 Subject: [PATCH 56/65] Fixing another pep8 issue --- ckan/migration/versions/085_adjust_activity_timestamps.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckan/migration/versions/085_adjust_activity_timestamps.py b/ckan/migration/versions/085_adjust_activity_timestamps.py index 6cfc1d4a295..34c5eb24a11 100644 --- a/ckan/migration/versions/085_adjust_activity_timestamps.py +++ b/ckan/migration/versions/085_adjust_activity_timestamps.py @@ -5,7 +5,7 @@ def upgrade(migrate_engine): """ - The script assumes that the current timestamp was + The script assumes that the current timestamp was recorded with the server's current set timezone """ utc_now = datetime.datetime.utcnow() From a967a9b990954d78b156a8161219aed3f4e81631 Mon Sep 17 00:00:00 2001 From: Florian Brucker Date: Wed, 14 Sep 2016 09:21:50 +0200 Subject: [PATCH 57/65] [#2977] Import `config` from `ckan.common` instead of `pylons`. --- ckan/lib/jobs.py | 2 +- ckan/lib/redis.py | 3 ++- ckan/tests/lib/test_cli.py | 2 +- ckan/tests/lib/test_jobs.py | 2 +- doc/extensions/best-practices.rst | 8 +++++++- 5 files changed, 12 insertions(+), 5 deletions(-) diff --git a/ckan/lib/jobs.py b/ckan/lib/jobs.py index cd06bde46af..2f90ec57c77 100644 --- a/ckan/lib/jobs.py +++ b/ckan/lib/jobs.py @@ -20,7 +20,6 @@ import logging -from pylons import config import rq from rq.connections import push_connection from rq.exceptions import NoSuchJobError @@ -28,6 +27,7 @@ from rq.utils import ensure_list from ckan.lib.redis import connect_to_redis +from ckan.common import config log = logging.getLogger(__name__) diff --git a/ckan/lib/redis.py b/ckan/lib/redis.py index 77779bbcb4f..0e1c5f80d9b 100644 --- a/ckan/lib/redis.py +++ b/ckan/lib/redis.py @@ -11,9 +11,10 @@ import datetime import logging -from pylons import config from redis import ConnectionPool, Redis +from ckan.common import config + log = logging.getLogger(__name__) diff --git a/ckan/tests/lib/test_cli.py b/ckan/tests/lib/test_cli.py index 7d360284082..e94664687d4 100644 --- a/ckan/tests/lib/test_cli.py +++ b/ckan/tests/lib/test_cli.py @@ -10,12 +10,12 @@ from nose.tools import (assert_raises, eq_ as eq, ok_ as ok, assert_in, assert_not_in, assert_not_equal as neq, assert_false as nok) -from pylons import config from paste.script.command import run import ckan.lib.cli as cli import ckan.lib.jobs as jobs import ckan.tests.helpers as helpers +from ckan.common import config log = logging.getLogger(__name__) diff --git a/ckan/tests/lib/test_jobs.py b/ckan/tests/lib/test_jobs.py index ed842ffa4e7..b69e7ab3bd2 100644 --- a/ckan/tests/lib/test_jobs.py +++ b/ckan/tests/lib/test_jobs.py @@ -7,11 +7,11 @@ import datetime from nose.tools import ok_, assert_equal, raises -from pylons import config import rq import ckan.lib.jobs as jobs from ckan.tests.helpers import changed_config, recorded_logs, RQTestBase +from ckan.common import config class TestQueueNamePrefixes(RQTestBase): diff --git a/doc/extensions/best-practices.rst b/doc/extensions/best-practices.rst index d3ae992f04e..dcc27ff5608 100644 --- a/doc/extensions/best-practices.rst +++ b/doc/extensions/best-practices.rst @@ -87,7 +87,13 @@ the CKAN site ID, which is available via :: - from pylons import config + try: + # CKAN 2.7 and later + from ckan.common import config + except ImportError: + # CKAN 2.6 and earlier + from pylons import config + site_id = config[u'ckan.site_id'] Currently this only affects the :ref:`Redis database `: From f2fbc4b084f438c5d99d36830b6d55a1af317c94 Mon Sep 17 00:00:00 2001 From: Jared Smith Date: Wed, 14 Sep 2016 09:11:32 -0700 Subject: [PATCH 58/65] Prefixing strings for pep8 --- ckan/migration/versions/085_adjust_activity_timestamps.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ckan/migration/versions/085_adjust_activity_timestamps.py b/ckan/migration/versions/085_adjust_activity_timestamps.py index 34c5eb24a11..94491fe509c 100644 --- a/ckan/migration/versions/085_adjust_activity_timestamps.py +++ b/ckan/migration/versions/085_adjust_activity_timestamps.py @@ -4,7 +4,7 @@ def upgrade(migrate_engine): - """ + u""" The script assumes that the current timestamp was recorded with the server's current set timezone """ @@ -12,5 +12,5 @@ def upgrade(migrate_engine): local_now = datetime.datetime.now() with migrate_engine.begin() as connection: - sql = "update activity set timestamp = timestamp + (%s - %s);" + sql = u"update activity set timestamp = timestamp + (%s - %s);" connection.execute(sql, utc_now, local_now) From a7d266fd8d91166234d0852edba6bad705960261 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Thu, 15 Sep 2016 12:27:59 -0400 Subject: [PATCH 59/65] [#2882] ignore current daylight setting, skip when not necessary --- .../versions/085_adjust_activity_timestamps.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/ckan/migration/versions/085_adjust_activity_timestamps.py b/ckan/migration/versions/085_adjust_activity_timestamps.py index 94491fe509c..ddcea2ece31 100644 --- a/ckan/migration/versions/085_adjust_activity_timestamps.py +++ b/ckan/migration/versions/085_adjust_activity_timestamps.py @@ -8,9 +8,16 @@ def upgrade(migrate_engine): The script assumes that the current timestamp was recorded with the server's current set timezone """ - utc_now = datetime.datetime.utcnow() - local_now = datetime.datetime.now() + # choose a fixed date (within DST) so migration depends only on + # server time zone not the current daylight savings state + magic_timestamp = datetime.datetime(2016, 6, 20).toordinal() + + utc_date = datetime.datetime.utcfromtimestamp(magic_timestamp) + local_date = datetime.datetime.fromtimestamp(magic_timestamp) + + if utc_date == local_date: + return with migrate_engine.begin() as connection: sql = u"update activity set timestamp = timestamp + (%s - %s);" - connection.execute(sql, utc_now, local_now) + connection.execute(sql, utc_date, local_date) From b7e2971c3549a6df831f5e761ce9831030d74363 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Fri, 16 Sep 2016 09:33:48 -0400 Subject: [PATCH 60/65] [#3194] Setting HTTP_HOST shouldn't be required --- ckan/tests/helpers.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/ckan/tests/helpers.py b/ckan/tests/helpers.py index 0c3910b1a75..bfda83a1a1e 100644 --- a/ckan/tests/helpers.py +++ b/ckan/tests/helpers.py @@ -19,7 +19,6 @@ This module is reserved for these very useful functions. ''' -import urlparse import webtest import nose.tools from nose.tools import assert_in, assert_not_in @@ -214,12 +213,6 @@ def submit_and_follow(app, form, extra_environ=None, name=None, Call webtest_submit with name/value passed expecting a redirect and return the response from following that redirect. ''' - if extra_environ is None: - extra_environ = {} - if not extra_environ.get('HTTP_HOST'): - extra_environ['HTTP_HOST'] = str( - urlparse.urlparse(config['ckan.site_url']).netloc) - response = webtest_submit(form, name, value=value, status=302, extra_environ=extra_environ, **args) return app.get(url=response.headers['Location'], From 6f3b1dc1ab037c6163350623f002b7dc12f8e605 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Fri, 16 Sep 2016 10:52:11 -0400 Subject: [PATCH 61/65] [#3192] doc: add IPermissionLabels to authorization docs --- doc/maintaining/authorization.rst | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/doc/maintaining/authorization.rst b/doc/maintaining/authorization.rst index acf9adf0265..342eec63b0c 100644 --- a/doc/maintaining/authorization.rst +++ b/doc/maintaining/authorization.rst @@ -86,5 +86,14 @@ Extensions CKAN extensions can implement custom authorization rules by overriding the authorization functions that CKAN uses. This is done by implementing the -:py:class:`~ckan.plugins.interfaces.IAuthFunctions` plugin interface. To get -started with writing CKAN extensions, see :doc:`/extensions/index`. +:py:class:`~ckan.plugins.interfaces.IAuthFunctions` plugin interface. + +Dataset visibility is determined by permission labels stored in the +search index. +Implement the :py:class:`~ckan.plugins.interfaces.IPermissionLabels` +plugin interface then :ref:`rebuild your search index ` +to change your dataset visibility rules. There is no +no need to override the ``package_show`` auth function, it will inherit +these changes automatically. + +To get started with writing CKAN extensions, see :doc:`/extensions/index`. From 91f34b3ce1fe0b340df114be2fe6c388d95108f7 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Fri, 16 Sep 2016 12:55:48 -0400 Subject: [PATCH 62/65] [#3192] simplify test org set-up --- .../tests/test_example_ipermissionlabels.py | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py b/ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py index de96d08321e..e805906a20f 100644 --- a/ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py +++ b/ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py @@ -7,7 +7,7 @@ import ckan.plugins from ckan.plugins.toolkit import get_action, NotAuthorized -from ckan.tests.helpers import FunctionalTestBase, call_action, call_auth +from ckan.tests.helpers import FunctionalTestBase, call_auth from ckan.tests import factories from ckan import model @@ -28,10 +28,9 @@ def test_normal_dataset_permissions_are_normal(self): user2 = factories.User() user3 = factories.User() org = factories.Organization(user=user) - org2 = factories.Organization(user=user2) - call_action( - u'organization_member_create', None, username=user3['name'], - id=org2['id'], role=u'member') + org2 = factories.Organization( + user=user2, + users=[{'name': user3['id'], 'capacity': 'member'}]) dataset = factories.Dataset( name=u'd1', user=user, private=True, owner_org=org['id']) @@ -79,10 +78,9 @@ def test_proposed_dataset_visible_to_creator(self): def test_proposed_dataset_visible_to_org_admin(self): user = factories.User() user2 = factories.User() - org = factories.Organization(user=user2) - call_action( - u'organization_member_create', None, username=user['name'], - id=org['id'], role=u'editor') + org = factories.Organization( + user=user2, + users=[{'name': user['id'], 'capacity': 'editor'}]) dataset = factories.Dataset( name=u'd1', notes=u'Proposed:', user=user, owner_org=org['id']) @@ -98,10 +96,9 @@ def test_proposed_dataset_visible_to_org_admin(self): def test_proposed_dataset_invisible_to_another_editor(self): user = factories.User() user2 = factories.User() - org = factories.Organization(user=user2) - call_action( - u'organization_member_create', None, username=user['name'], - id=org['id'], role=u'editor') + org = factories.Organization( + user=user2, + users=[{'name': user['id'], 'capacity': 'editor'}]) dataset = factories.Dataset( name=u'd1', notes=u'Proposed:', user=user2, owner_org=org['id']) From 81f02f2cdaca4f06223e4f86475955610a587b16 Mon Sep 17 00:00:00 2001 From: Ian Ward Date: Fri, 16 Sep 2016 13:09:44 -0400 Subject: [PATCH 63/65] [#3192] u'literals' --- .../tests/test_example_ipermissionlabels.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py b/ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py index e805906a20f..77a1572dedb 100644 --- a/ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py +++ b/ckanext/example_ipermissionlabels/tests/test_example_ipermissionlabels.py @@ -30,7 +30,7 @@ def test_normal_dataset_permissions_are_normal(self): org = factories.Organization(user=user) org2 = factories.Organization( user=user2, - users=[{'name': user3['id'], 'capacity': 'member'}]) + users=[{u'name': user3['id'], u'capacity': u'member'}]) dataset = factories.Dataset( name=u'd1', user=user, private=True, owner_org=org['id']) @@ -80,7 +80,7 @@ def test_proposed_dataset_visible_to_org_admin(self): user2 = factories.User() org = factories.Organization( user=user2, - users=[{'name': user['id'], 'capacity': 'editor'}]) + users=[{u'name': user['id'], u'capacity': u'editor'}]) dataset = factories.Dataset( name=u'd1', notes=u'Proposed:', user=user, owner_org=org['id']) @@ -98,7 +98,7 @@ def test_proposed_dataset_invisible_to_another_editor(self): user2 = factories.User() org = factories.Organization( user=user2, - users=[{'name': user['id'], 'capacity': 'editor'}]) + users=[{u'name': user['id'], u'capacity': u'editor'}]) dataset = factories.Dataset( name=u'd1', notes=u'Proposed:', user=user2, owner_org=org['id']) From fabcc7f2b73a552ed7c44ed9c75182ae4b2c7dbc Mon Sep 17 00:00:00 2001 From: Olaf Veerman Date: Fri, 16 Sep 2016 17:13:38 -0400 Subject: [PATCH 64/65] [#2364] Add resource format for GeoJSON As listed on: https://tools.ietf.org/html/rfc7946 --- ckan/config/resource_formats.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckan/config/resource_formats.json b/ckan/config/resource_formats.json index fad65202015..d4749a99b1f 100644 --- a/ckan/config/resource_formats.json +++ b/ckan/config/resource_formats.json @@ -69,7 +69,7 @@ ["TAR", "TAR Compressed File", "application/x-tar", []], ["PNG", "PNG Image File", "image/png", []], ["RSS", "RSS feed", "application/rss+xml", []], - ["GeoJSON", "Geographic JavaScript Object Notation", null, []], + ["GeoJSON", "Geographic JavaScript Object Notation", "application/geo+json", ["geojson"]], ["SHP", "Shapefile", null, ["esri shapefile"]], ["TORRENT", "Torrent", "application/x-bittorrent", ["bittorrent"]], ["ICS", "iCalendar", "text/calendar", ["ifb", "iCal"]], From 076a4498c6ce96adf677dd9fd1e34b3ca5f9fe8d Mon Sep 17 00:00:00 2001 From: Ralf Gommers Date: Tue, 20 Sep 2016 20:04:44 +1200 Subject: [PATCH 65/65] Fix html rendering of apache and nginx config file paths. Sphinx doesn't allow substitutions in verbatim (double backticks) blocks. This caused the substitutions to not be expanded. See http://docs.ckan.org/en/latest/maintaining/installing/deployment.html at the start of sections 6 and 8. --- doc/maintaining/installing/deployment.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/maintaining/installing/deployment.rst b/doc/maintaining/installing/deployment.rst index 3f452072339..8f7bd183031 100644 --- a/doc/maintaining/installing/deployment.rst +++ b/doc/maintaining/installing/deployment.rst @@ -112,7 +112,7 @@ CKAN to run in). 6. Create the Apache config file -------------------------------- -Create your site's Apache config file at ``|apache_config_file|``, with the +Create your site's Apache config file at |apache_config_file|, with the following contents: .. parsed-literal:: @@ -203,7 +203,7 @@ Open ``/etc/apache2/ports.conf``. We need to replace the default port 80 with th 8. Create the Nginx config file ------------------------------- -Create your site's Nginx config file at ``|nginx_config_file|``, with the +Create your site's Nginx config file at |nginx_config_file|, with the following contents: .. parsed-literal::