diff --git a/ckan/config/deployment.ini_tmpl b/ckan/config/deployment.ini_tmpl index 738ff22858d..97c4f9412ba 100644 --- a/ckan/config/deployment.ini_tmpl +++ b/ckan/config/deployment.ini_tmpl @@ -133,6 +133,12 @@ ckan.feeds.author_link = #ofs.aws_access_key_id = .... #ofs.aws_secret_access_key = .... +## Datapusher settings + +# Make sure you have set up the DataStore + +datapusher.formats = csv +datapusher.url = http://datapusher.ckan.org/ ## Activity Streams Settings diff --git a/ckan/config/middleware.py b/ckan/config/middleware.py index bdf1ff016a0..4df0aa84cb0 100644 --- a/ckan/config/middleware.py +++ b/ckan/config/middleware.py @@ -338,8 +338,8 @@ def __call__(self, environ, start_response): key = ''.join([ environ['HTTP_USER_AGENT'], environ['REMOTE_ADDR'], - environ['HTTP_ACCEPT_LANGUAGE'], - environ['HTTP_ACCEPT_ENCODING'], + environ.get('HTTP_ACCEPT_LANGUAGE', ''), + environ.get('HTTP_ACCEPT_ENCODING', ''), ]) key = hashlib.md5(key).hexdigest() # store key/data here diff --git a/ckan/config/solr/schema-2.0.xml b/ckan/config/solr/schema-2.0.xml index 7e681738c5b..26b8f0dedd6 100644 --- a/ckan/config/solr/schema-2.0.xml +++ b/ckan/config/solr/schema-2.0.xml @@ -129,6 +129,7 @@ + diff --git a/ckan/lib/dictization/model_dictize.py b/ckan/lib/dictization/model_dictize.py index caad1e88359..793f0733c95 100644 --- a/ckan/lib/dictization/model_dictize.py +++ b/ckan/lib/dictization/model_dictize.py @@ -300,10 +300,6 @@ def package_dictize(pkg, context): result_dict['metadata_created'] = pkg.metadata_created.isoformat() \ if pkg.metadata_created else None - if context.get('for_view'): - for item in plugins.PluginImplementations( plugins.IPackageController): - result_dict = item.before_view(result_dict) - return result_dict def _get_members(context, group, member_type): diff --git a/ckan/lib/formatters.py b/ckan/lib/formatters.py index 193580aa28f..dde93325f4e 100644 --- a/ckan/lib/formatters.py +++ b/ckan/lib/formatters.py @@ -80,6 +80,22 @@ def localised_nice_date(datetime_, show_date=False, with_hours=False): :rtype: sting ''' + + def months_between(date1, date2): + if date1 > date2: + date1, date2 = date2, date1 + m1 = date1.year * 12 + date1.month + m2 = date2.year * 12 + date2.month + months = m2 - m1 + if date1.day > date2.day: + months -= 1 + elif date1.day == date2.day: + seconds1 = date1.hour * 3600 + date1.minute + date1.second + seconds2 = date2.hour * 3600 + date2.minute + date2.second + if seconds1 > seconds2: + months -= 1 + return months + if not show_date: now = datetime.datetime.now() date_diff = now - datetime_ @@ -98,9 +114,16 @@ def localised_nice_date(datetime_, show_date=False, with_hours=False): return ungettext('{hours} hour ago', '{hours} hours ago', seconds / 3600).format(hours=seconds / 3600) # more than one day - if days < 31: + months = months_between(datetime_, now) + + if months < 1: return ungettext('{days} day ago', '{days} days ago', days).format(days=days) + if months < 13: + return ungettext('{months} month ago', '{months} months ago', + months).format(months=months) + return ungettext('over {years} year ago', 'over {years} years ago', + months / 12).format(years=months / 12) # actual date details = { 'min': datetime_.minute, diff --git a/ckan/lib/helpers.py b/ckan/lib/helpers.py index 250fbf67e73..3403ce7e58c 100644 --- a/ckan/lib/helpers.py +++ b/ckan/lib/helpers.py @@ -48,6 +48,28 @@ log = logging.getLogger(__name__) +def _datestamp_to_datetime(datetime_): + ''' Converts a datestamp to a datetime. If a datetime is provided it + just gets returned. + + :param datetime_: the timestamp + :type datetime_: string or datetime + + :rtype: datetime + ''' + if isinstance(datetime_, basestring): + try: + datetime_ = date_str_to_datetime(datetime_) + except TypeError: + return None + except ValueError: + return None + # check we are now a datetime + if not isinstance(datetime_, datetime.datetime): + return None + return datetime_ + + def redirect_to(*args, **kw): '''A routes.redirect_to wrapper to retain the i18n settings''' kw['__ckan_no_root'] = True @@ -793,15 +815,8 @@ def render_datetime(datetime_, date_format=None, with_hours=False): :rtype: string ''' - if isinstance(datetime_, basestring): - try: - datetime_ = date_str_to_datetime(datetime_) - except TypeError: - return '' - except ValueError: - return '' - # check we are now a datetime - if not isinstance(datetime_, datetime.datetime): + datetime_ = _datestamp_to_datetime(datetime_) + if not datetime_: return '' # if date_format was supplied we use it if date_format: @@ -910,8 +925,11 @@ def dst(self, dt): def tzname(self, dt): return None - +@maintain.deprecated('h.time_ago_in_words_from_str is deprecated in 2.2 ' + 'and will be removed. Please use ' + 'h.time_ago_from_timestamp instead') def time_ago_in_words_from_str(date_str, granularity='month'): + '''Deprecated in 2.2 use time_ago_from_timestamp''' if date_str: return date.time_ago_in_words(date_str_to_datetime(date_str), granularity=granularity) @@ -919,6 +937,21 @@ def time_ago_in_words_from_str(date_str, granularity='month'): return _('Unknown') +def time_ago_from_timestamp(timestamp): + ''' Returns a string like `5 months ago` for a datetime relative to now + :param timestamp: the timestamp or datetime + :type timestamp: string or datetime + + :rtype: string + ''' + datetime_ = _datestamp_to_datetime(timestamp) + if not datetime_: + return _('Unknown') + + # the localised date + return formatters.localised_nice_date(datetime_, show_date=False) + + def button_attr(enable, type='primary'): if enable: return 'class="btn %s"' % type @@ -1681,6 +1714,7 @@ def new_activities(): 'localised_filesize', 'list_dict_filter', 'new_activities', + 'time_ago_from_timestamp', # imported into ckan.lib.helpers 'literal', 'link_to', diff --git a/ckan/lib/navl/dictization_functions.py b/ckan/lib/navl/dictization_functions.py index 5eadebdc30b..e05ad851287 100644 --- a/ckan/lib/navl/dictization_functions.py +++ b/ckan/lib/navl/dictization_functions.py @@ -1,6 +1,7 @@ import copy import formencode as fe import inspect +import json from pylons import config from ckan.common import _ @@ -402,3 +403,11 @@ def unflatten(data): unflattened[key] = [unflattened[key][s] for s in sorted(unflattened[key])] return unflattened + + +class MissingNullEncoder(json.JSONEncoder): + '''json encoder that treats missing objects as null''' + def default(self, obj): + if isinstance(obj, Missing): + return None + return json.JSONEncoder.default(self, obj) diff --git a/ckan/lib/search/__init__.py b/ckan/lib/search/__init__.py index d7afc157e3a..1d6762d68a4 100644 --- a/ckan/lib/search/__init__.py +++ b/ckan/lib/search/__init__.py @@ -123,7 +123,8 @@ def notify(self, entity, operation): dispatch_by_operation( entity.__class__.__name__, logic.get_action('package_show')( - {'model': model, 'ignore_auth': True, 'validate': False}, + {'model': model, 'ignore_auth': True, 'validate': False, + 'use_cache': False}, {'id': entity.id}), operation ) @@ -147,18 +148,18 @@ def rebuild(package_id=None, only_missing=False, force=False, refresh=False, def log.info("Rebuilding search index...") package_index = index_for(model.Package) + context = {'model': model, 'ignore_auth': True, 'validate': False, + 'use_cache': False} if package_id: - pkg_dict = logic.get_action('package_show')( - {'model': model, 'ignore_auth': True, 'validate': False}, + pkg_dict = logic.get_action('package_show')(context, {'id': package_id}) log.info('Indexing just package %r...', pkg_dict['name']) package_index.remove_dict(pkg_dict) package_index.insert_dict(pkg_dict) elif package_ids: for package_id in package_ids: - pkg_dict = logic.get_action('package_show')( - {'model': model, 'ignore_auth': True, 'validate': False}, + pkg_dict = logic.get_action('package_show')(context, {'id': package_id}) log.info('Indexing just package %r...', pkg_dict['name']) package_index.update_dict(pkg_dict, True) @@ -185,9 +186,7 @@ def rebuild(package_id=None, only_missing=False, force=False, refresh=False, def for pkg_id in package_ids: try: package_index.update_dict( - logic.get_action('package_show')( - {'model': model, 'ignore_auth': True, - 'validate': False}, + logic.get_action('package_show')(context, {'id': pkg_id} ), defer_commit diff --git a/ckan/lib/search/index.py b/ckan/lib/search/index.py index 21b301e6f6d..1f6d8bed8ae 100644 --- a/ckan/lib/search/index.py +++ b/ckan/lib/search/index.py @@ -18,9 +18,13 @@ from ckan.plugins import (PluginImplementations, IPackageController) import ckan.logic as logic +import ckan.lib.plugins as lib_plugins +import ckan.lib.navl.dictization_functions log = logging.getLogger(__name__) +_validate = ckan.lib.navl.dictization_functions.validate + TYPE_FIELD = "entity_type" PACKAGE_TYPE = "package" KEY_CHARS = string.digits + string.letters + "_-" @@ -102,8 +106,19 @@ def update_dict(self, pkg_dict, defer_commit=False): def index_package(self, pkg_dict, defer_commit=False): if pkg_dict is None: return + pkg_dict['data_dict'] = json.dumps(pkg_dict) + if config.get('ckan.cache_validated_datasets', True): + package_plugin = lib_plugins.lookup_package_plugin( + pkg_dict.get('type')) + + schema = package_plugin.show_package_schema() + validated_pkg_dict, errors = _validate(pkg_dict, schema, { + 'model': model, 'session': model.Session}) + pkg_dict['validated_data_dict'] = json.dumps(validated_pkg_dict, + cls=ckan.lib.navl.dictization_functions.MissingNullEncoder) + # add to string field for sorting title = pkg_dict.get('title') if title: diff --git a/ckan/lib/search/query.py b/ckan/lib/search/query.py index 21ab4261607..881ddc96938 100644 --- a/ckan/lib/search/query.py +++ b/ckan/lib/search/query.py @@ -279,11 +279,12 @@ def get_index(self,reference): data = json.loads(solr_response) if data['response']['numFound'] == 0: - raise SearchError('Dataset not found in the search index: %s' % reference) + raise SearchError('Dataset not found in the search index: %s' % reference) else: return data['response']['docs'][0] except Exception, e: - log.exception(e) + if not isinstance(e, SearchError): + log.exception(e) raise SearchError(e) finally: conn.close() diff --git a/ckan/logic/action/create.py b/ckan/logic/action/create.py index 1fc0fb6e058..fcb55ceae64 100644 --- a/ckan/logic/action/create.py +++ b/ckan/logic/action/create.py @@ -155,8 +155,9 @@ def package_create(context, data_dict): admins = [] if user: user_obj = model.User.by_name(user.decode('utf8')) - admins = [user_obj] - data['creator_user_id'] = user_obj.id + if user_obj: + admins = [user_obj] + data['creator_user_id'] = user_obj.id pkg = model_save.package_dict_save(data, context) diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 51258796df3..7c9cdfddace 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -4,6 +4,7 @@ import logging import json import datetime +import socket from pylons import config import sqlalchemy @@ -64,6 +65,13 @@ def site_read(context,data_dict=None): def package_list(context, data_dict): '''Return a list of the names of the site's datasets (packages). + :param limit: if given, the list of datasets will be broken into pages of + at most ``limit`` datasets per page and only one page will be returned + at a time (optional) + :type limit: int + :param offset: when ``limit`` is given, the offset to start returning packages from + :type offset: int + :rtype: list of strings ''' @@ -72,6 +80,12 @@ def package_list(context, data_dict): _check_access('package_list', context, data_dict) + schema = context.get('schema', logic.schema.default_pagination_schema()) + data_dict, errors = _validate(data_dict, schema, context) + if errors: + raise ValidationError(errors) + + package_revision_table = model.package_revision_table col = (package_revision_table.c.id if api == 2 else package_revision_table.c.name) @@ -79,6 +93,14 @@ def package_list(context, data_dict): query = query.where(_and_(package_revision_table.c.state=='active', package_revision_table.c.current==True)) query = query.order_by(col) + + limit = data_dict.get('limit') + if limit: + query = query.limit(limit) + + offset = data_dict.get('offset') + if offset: + query = query.offset(offset) return list(zip(*query.execute())[0]) def current_package_list_with_resources(context, data_dict): @@ -747,19 +769,54 @@ def package_show(context, data_dict): _check_access('package_show', context, data_dict) - package_dict = model_dictize.package_dictize(pkg, context) + package_dict = None + use_cache = (context.get('use_cache', True) + and not 'revision_id' in context + and not 'revision_date' in context) + if use_cache: + try: + search_result = search.show(name_or_id) + except (search.SearchError, socket.error): + pass + else: + use_validated_cache = 'schema' not in context + if use_validated_cache and 'validated_data_dict' in search_result: + package_dict = json.loads(search_result['validated_data_dict']) + package_dict_validated = True + else: + package_dict = json.loads(search_result['data_dict']) + package_dict_validated = False + metadata_modified = pkg.metadata_modified.isoformat() + search_metadata_modified = search_result['metadata_modified'] + # solr stores less precice datetime, + # truncate to 22 charactors to get good enough match + if metadata_modified[:22] != search_metadata_modified[:22]: + package_dict = None + + if not package_dict: + package_dict = model_dictize.package_dictize(pkg, context) + package_dict_validated = False + + if context.get('for_view'): + for item in plugins.PluginImplementations(plugins.IPackageController): + package_dict = item.before_view(package_dict) for item in plugins.PluginImplementations(plugins.IPackageController): item.read(pkg) - package_plugin = lib_plugins.lookup_package_plugin(package_dict['type']) - if 'schema' in context: - schema = context['schema'] - else: - schema = package_plugin.show_package_schema() + for resource_dict in package_dict['resources']: + for item in plugins.PluginImplementations(plugins.IResourceController): + resource_dict = item.before_show(resource_dict) - if schema and context.get('validate', True): - package_dict, errors = _validate(package_dict, schema, context=context) + if not package_dict_validated: + package_plugin = lib_plugins.lookup_package_plugin(package_dict['type']) + if 'schema' in context: + schema = context['schema'] + else: + schema = package_plugin.show_package_schema() + if schema and context.get('validate', True): + package_dict, errors = _validate(package_dict, schema, + context=context) for item in plugins.PluginImplementations(plugins.IPackageController): item.after_show(context, package_dict) @@ -786,7 +843,12 @@ def resource_show(context, data_dict): raise NotFound _check_access('resource_show', context, data_dict) - return model_dictize.resource_dictize(resource, context) + resource_dict = model_dictize.resource_dictize(resource, context) + + for item in plugins.PluginImplementations(plugins.IResourceController): + resource_dict = item.before_show(resource_dict) + + return resource_dict def resource_status_show(context, data_dict): '''Return the statuses of a resource's tasks. @@ -850,6 +912,10 @@ def _group_or_org_show(context, data_dict, is_org=False): if group is None: raise NotFound + if is_org and not group.is_organization: + raise NotFound + if not is_org and group.is_organization: + raise NotFound if is_org: _check_access('organization_show',context, data_dict) @@ -2119,8 +2185,7 @@ def activity_detail_list(context, data_dict): # authorized to read. model = context['model'] activity_id = _get_or_bust(data_dict, 'id') - activity_detail_objects = model.Session.query( - model.activity.ActivityDetail).filter_by(activity_id=activity_id).all() + activity_detail_objects = model.ActivityDetail.by_activity_id(activity_id) return model_dictize.activity_detail_list_dictize(activity_detail_objects, context) diff --git a/ckan/logic/auth/create.py b/ckan/logic/auth/create.py index bf9c3d17ea3..e96b789bcf5 100644 --- a/ckan/logic/auth/create.py +++ b/ckan/logic/auth/create.py @@ -4,10 +4,10 @@ from ckan.common import _ -@logic.auth_sysadmins_check def package_create(context, data_dict=None): user = context['user'] - if not new_authz.auth_is_registered_user(): + + if new_authz.auth_is_anon_user(context): check1 = new_authz.check_config_permission('anon_create_dataset') else: check1 = new_authz.check_config_permission('create_dataset_if_not_in_organization') \ @@ -32,7 +32,7 @@ def package_create(context, data_dict=None): def file_upload(context, data_dict=None): user = context['user'] - if not new_authz.auth_is_registered_user(): + if new_authz.auth_is_anon_user(context): return {'success': False, 'msg': _('User %s not authorized to create packages') % user} return {'success': True} diff --git a/ckan/logic/auth/update.py b/ckan/logic/auth/update.py index 59ec935a52a..5f8f4b03c25 100644 --- a/ckan/logic/auth/update.py +++ b/ckan/logic/auth/update.py @@ -23,7 +23,7 @@ def package_update(context, data_dict): ) else: # If dataset is not owned then we can edit if config permissions allow - if new_authz.auth_is_registered_user(): + if not new_authz.auth_is_anon_user(context): check1 = new_authz.check_config_permission( 'create_dataset_if_not_in_organization') else: diff --git a/ckan/model/activity.py b/ckan/model/activity.py index 5ab31a27514..04edaf6819c 100644 --- a/ckan/model/activity.py +++ b/ckan/model/activity.py @@ -2,6 +2,7 @@ from sqlalchemy import orm, types, Column, Table, ForeignKey, desc, or_ +import ckan.model import meta import types as _types import domain_object @@ -62,6 +63,11 @@ def __init__(self, activity_id, object_id, object_type, activity_type, else: self.data = data + @classmethod + def by_activity_id(cls, activity_id): + return ckan.model.Session.query(cls) \ + .filter_by(activity_id = activity_id).all() + meta.mapper(ActivityDetail, activity_detail_table, properties = { 'activity':orm.relation ( Activity, backref=orm.backref('activity_detail')) diff --git a/ckan/new_authz.py b/ckan/new_authz.py index 01fb07927ac..00a7dfe0e17 100644 --- a/ckan/new_authz.py +++ b/ckan/new_authz.py @@ -9,6 +9,8 @@ import ckan.model as model from ckan.common import OrderedDict, _, c +import ckan.lib.maintain as maintain + log = getLogger(__name__) @@ -334,11 +336,34 @@ def check_config_permission(permission): return CONFIG_PERMISSIONS[permission] return False - +@maintain.deprecated('Use auth_is_loggedin_user instead') def auth_is_registered_user(): + ''' + This function is deprecated, please use the auth_is_loggedin_user instead + ''' + return auth_is_loggedin_user() + +def auth_is_loggedin_user(): ''' Do we have a logged in user ''' try: context_user = c.user except TypeError: context_user = None return bool(context_user) + +def auth_is_anon_user(context): + ''' Is this an anonymous user? + eg Not logged in if a web request and not user defined in context + if logic functions called directly + + See ckan/lib/base.py:232 for pylons context object logic + ''' + try: + is_anon_user = (not bool(c.user) and bool(c.author)) + except TypeError: + # No c object set, this is not a call done via the web interface, + # but directly, eg from an extension + context_user = context.get('user') + is_anon_user = not bool(context_user) + + return is_anon_user diff --git a/ckan/plugins/core.py b/ckan/plugins/core.py index 62290d38503..8f3752117ea 100644 --- a/ckan/plugins/core.py +++ b/ckan/plugins/core.py @@ -237,15 +237,13 @@ def _get_service(plugin_name): if isinstance(plugin_name, basestring): for group in GROUPS: - try: - (plugin,) = iter_entry_points( - group=group, - name=plugin_name - ) + iterator = iter_entry_points( + group=group, + name=plugin_name + ) + plugin = next(iterator, None) + if plugin: return plugin.load()(name=plugin_name) - except ValueError: - pass - else: - raise PluginNotFoundException(plugin_name) + raise PluginNotFoundException(plugin_name) else: raise TypeError('Expected a plugin name', plugin_name) diff --git a/ckan/plugins/interfaces.py b/ckan/plugins/interfaces.py index 8acfc34aad6..aa08fc64bab 100644 --- a/ckan/plugins/interfaces.py +++ b/ckan/plugins/interfaces.py @@ -14,6 +14,7 @@ 'IConfigurable', 'IConfigurer', 'IActions', 'IResourceUrlChange', 'IDatasetForm', 'IResourcePreview', + 'IResourceController', 'IGroupForm', 'ITagController', 'ITemplateHelpers', @@ -195,33 +196,40 @@ def notify(self, resource): class IResourcePreview(Interface): - """ - Hook into the resource previews in helpers.py. This lets you - create custom previews for example for xml files. - """ + '''Add custom data previews for resource file-types. + ''' def can_preview(self, data_dict): - ''' - Returns info on whether the plugin can preview the resource. + '''Return info on whether the plugin can preview the resource. - This can be done in two ways. - The old way is to just return True or False. - The new way is to return a dict with the following - { - 'can_preview': bool - if the extension can preview the resource - 'fixable': string - if the extension cannot preview but could for - example if the resource_proxy was enabled. - 'quality': int - how good the preview is 1-poor, 2-average, 3-good - used if multiple extensions can preview - } + This can be done in two ways: - The ``data_dict`` contains the resource and the package. + 1. The old way is to just return ``True`` or ``False``. + + 2. The new way is to return a dict with three keys: + + ``'can_preview'`` (``boolean``) + ``True`` if the extension can preview the resource. + + ``'fixable'`` (``string``) + A string explaining how preview for the resource could be enabled, + for example if the ``resource_proxy`` plugin was enabled. + + ``'quality'`` (``int``) + How good the preview is: ``1`` (poor), ``2`` (average) or + ``3`` (good). When multiple preview extensions can preview the + same resource, this is used to determine which extension will + be used. + + :param data_dict: the resource to be previewed and the dataset that it + belongs to. + :type data_dict: dictionary + + Make sure to check the ``on_same_domain`` value of the resource or the + url if your preview requires the resource to be on the same domain + because of the same-origin policy. To find out how to preview + resources that are on a different domain, read :ref:`resource-proxy`. - Make sure to ckeck the ``on_same_domain`` value of the - resource or the url if your preview requires the resource to be on - the same domain because of the same origin policy. - To find out how to preview resources that are on a - different domain, read :ref:`resource_proxy`. ''' def setup_template_variables(self, context, data_dict): @@ -432,6 +440,20 @@ def before_view(self, pkg_dict): return pkg_dict +class IResourceController(Interface): + """ + Hook into the resource controller. + (see IGroupController) + """ + + def before_show(self, resource_dict): + ''' + Extensions will receive the validated data dict before the resource + is ready for display. + ''' + return resource_dict + + class IPluginObserver(Interface): """ Plugin to the plugin loading mechanism diff --git a/ckan/templates/group/snippets/group_form.html b/ckan/templates/group/snippets/group_form.html index f8b45f2f3d9..6d4d3216b26 100644 --- a/ckan/templates/group/snippets/group_form.html +++ b/ckan/templates/group/snippets/group_form.html @@ -7,7 +7,7 @@ {% block basic_fields %} {% set attrs = {'data-module': 'slug-preview-target'} %} - {{ form.input('title', label=_('Title'), id='field-title', placeholder=_('My Group'), value=data.title, error=errors.title, classes=['control-full'], attrs=attrs) }} + {{ form.input('title', label=_('Name'), id='field-name', placeholder=_('My Group'), value=data.title, error=errors.title, classes=['control-full'], attrs=attrs) }} {# Perhaps these should be moved into the controller? #} {% set prefix = h.url_for(controller='group', action='read', id='') %} diff --git a/ckan/templates/organization/snippets/organization_form.html b/ckan/templates/organization/snippets/organization_form.html index 70eb35de227..7b05e7d1542 100644 --- a/ckan/templates/organization/snippets/organization_form.html +++ b/ckan/templates/organization/snippets/organization_form.html @@ -7,7 +7,7 @@ {% block basic_fields %} {% set attrs = {'data-module': 'slug-preview-target'} %} - {{ form.input('title', label=_('Title'), id='field-title', placeholder=_('My Organization'), value=data.title, error=errors.title, classes=['control-full'], attrs=attrs) }} + {{ form.input('title', label=_('Name'), id='field-name', placeholder=_('My Organization'), value=data.title, error=errors.title, classes=['control-full'], attrs=attrs) }} {# Perhaps these should be moved into the controller? #} {% set prefix = h.url_for(controller='organization', action='read', id='') %} diff --git a/ckan/templates/snippets/activity_item.html b/ckan/templates/snippets/activity_item.html index 1923dee083c..e2bf699a7f4 100644 --- a/ckan/templates/snippets/activity_item.html +++ b/ckan/templates/snippets/activity_item.html @@ -5,6 +5,6 @@

{{ h.literal(activity.msg.format(**activity.data)) }} - {{ h.time_ago_in_words_from_str(activity.timestamp, 'hour') }} ago + {{ h.time_ago_from_timestamp(activity.timestamp) }}

- \ No newline at end of file + diff --git a/ckan/tests/functional/test_package.py b/ckan/tests/functional/test_package.py index 1cff46110ec..662ad0c2458 100644 --- a/ckan/tests/functional/test_package.py +++ b/ckan/tests/functional/test_package.py @@ -1083,7 +1083,7 @@ def test_new(self): prefix = '' fv[prefix + 'name'] = 'annakarenina' self.pkg_names.append('annakarenina') - res = fv.submit('save') + res = fv.submit('save', extra_environ=self.extra_environ_tester) assert not 'Error' in res, res def test_new_bad_name(self): diff --git a/ckan/tests/logic/test_action.py b/ckan/tests/logic/test_action.py index 73a3b838f0c..4e496f7899c 100644 --- a/ckan/tests/logic/test_action.py +++ b/ckan/tests/logic/test_action.py @@ -60,8 +60,8 @@ def _add_basic_package(self, package_name=u'test_package', **kwargs): return json.loads(res.body)['result'] def test_01_package_list(self): - postparams = '%s=1' % json.dumps({}) - res = json.loads(self.app.post('/api/action/package_list', params=postparams).body) + res = json.loads(self.app.post('/api/action/package_list', + headers={'content-type': 'application/json'}).body) assert res['success'] is True assert len(res['result']) == 2 assert 'warandpeace' in res['result'] @@ -69,6 +69,13 @@ def test_01_package_list(self): assert res['help'].startswith( "Return a list of the names of the site's datasets (packages).") + postparams = '%s=1' % json.dumps({'limit': 1}) + res = json.loads(self.app.post('/api/action/package_list', + params=postparams).body) + assert res['success'] is True + assert len(res['result']) == 1 + assert 'warandpeace' in res['result'] or 'annakarenina' in res['result'] + # Test GET request res = json.loads(self.app.get('/api/action/package_list').body) assert len(res['result']) == 2 @@ -1644,3 +1651,57 @@ def test_02_bulk_delete(self): res = self.app.get('/api/action/package_search?q=*:*') assert json.loads(res.body)['result']['count'] == 0 + + +class TestGroupOrgView(WsgiAppCase): + + @classmethod + def setup_class(cls): + model.Session.add_all([ + model.User(name=u'sysadmin', apikey=u'sysadmin', + password=u'sysadmin', sysadmin=True), + ]) + model.Session.commit() + + org_dict = '%s=1' % json.dumps({ + 'name': 'org', + }) + res = cls.app.post('/api/action/organization_create', + extra_environ={'Authorization': 'sysadmin'}, + params=org_dict) + cls.org_id = json.loads(res.body)['result']['id'] + + group_dict = '%s=1' % json.dumps({ + 'name': 'group', + }) + res = cls.app.post('/api/action/group_create', + extra_environ={'Authorization': 'sysadmin'}, + params=group_dict) + cls.group_id = json.loads(res.body)['result']['id'] + + @classmethod + def teardown_class(self): + model.repo.rebuild_db() + + def test_1_view_org(self): + res = self.app.get('/api/action/organization_show', + params={'id': self.org_id}) + res_json = json.loads(res.body) + assert res_json['success'] is True + + res = self.app.get('/api/action/group_show', + params={'id': self.org_id}, expect_errors=True) + res_json = json.loads(res.body) + assert res_json['success'] is False + + def test_2_view_group(self): + res = self.app.get('/api/action/group_show', + params={'id': self.group_id}) + res_json = json.loads(res.body) + assert res_json['success'] is True + + res = self.app.get('/api/action/organization_show', + params={'id': self.group_id}, expect_errors=True) + res_json = json.loads(res.body) + assert res_json['success'] is False + diff --git a/ckan/tests/models/test_activity.py b/ckan/tests/models/test_activity.py new file mode 100644 index 00000000000..2aa020987fb --- /dev/null +++ b/ckan/tests/models/test_activity.py @@ -0,0 +1,16 @@ +import ckan.model as model + +Activity = model.Activity +ActivityDetail = model.ActivityDetail + + +class TestActivityDetail(object): + def test_by_activity_id(self): + activity = Activity('user-id', 'object-id', + 'revision-id', 'activity-type') + activity.save() + activity_detail = ActivityDetail(activity.id, 'object-id', + 'object-type', 'activity-type') + activity_detail.save() + activities = ActivityDetail.by_activity_id(activity.id) + assert activities == [activity_detail], activity_detail diff --git a/ckan/tests/test_plugins.py b/ckan/tests/test_plugins.py index b18810cf213..f8a5d9929b0 100644 --- a/ckan/tests/test_plugins.py +++ b/ckan/tests/test_plugins.py @@ -164,3 +164,7 @@ def test_auth_plugin_override(self): with plugins.use_plugin('auth_plugin'): assert new_authz.is_authorized('package_list', {}) != package_list_original assert new_authz.is_authorized('package_list', {}) == package_list_original + + @raises(plugins.PluginNotFoundException) + def test_inexistent_plugin_loading(self): + plugins.load('inexistent-plugin') diff --git a/ckanext/datastore/logic/action.py b/ckanext/datastore/logic/action.py index 9fad955756f..47a464518ce 100644 --- a/ckanext/datastore/logic/action.py +++ b/ckanext/datastore/logic/action.py @@ -1,6 +1,10 @@ import logging -import pylons +import json +import urlparse +import datetime +import pylons +import requests import sqlalchemy import ckan.lib.navl.dictization_functions @@ -25,10 +29,20 @@ def datastore_create(context, data_dict): times to initially insert more data, add fields, change the aliases or indexes as well as the primary keys. + To create an empty datastore resource and a CKAN resource at the same time, + provide ``resource`` with a valid ``package_id`` and omit the ``resource_id``. + + If you want to create a datastore resource from the content of a file, + provide ``resource`` with a valid ``url``. + See :ref:`fields` and :ref:`records` for details on how to lay out records. :param resource_id: resource id that the data is going to be stored against. :type resource_id: string + :param resource: resource dictionary that is passed to + :meth:`~ckan.logic.action.create.resource_create`. + Use instead of ``resource_id`` (optional) + :type resource: dictionary :param aliases: names for read only aliases of the resource. (optional) :type aliases: list or comma separated string :param fields: fields/columns and their extra metadata. (optional) @@ -53,14 +67,46 @@ def datastore_create(context, data_dict): ''' schema = context.get('schema', dsschema.datastore_create_schema()) records = data_dict.pop('records', None) + resource = data_dict.pop('resource', None) data_dict, errors = _validate(data_dict, schema, context) if records: data_dict['records'] = records + if resource: + data_dict['resource'] = resource if errors: raise p.toolkit.ValidationError(errors) p.toolkit.check_access('datastore_create', context, data_dict) + if 'resource' in data_dict and 'resource_id' in data_dict: + raise p.toolkit.ValidationError({ + 'resource': ['resource cannot be used with resource_id'] + }) + + if not 'resource' in data_dict and not 'resource_id' in data_dict: + raise p.toolkit.ValidationError({ + 'resource_id': ['resource_id or resource required'] + }) + + if 'resource' in data_dict: + has_url = 'url' in data_dict['resource'] + data_dict['resource'].setdefault('url', '_tmp') + res = p.toolkit.get_action('resource_create')(context, + data_dict['resource']) + data_dict['resource_id'] = res['id'] + + # create resource from file + if has_url: + p.toolkit.get_action('datapusher_submit')(context, { + 'resource_id': res['id'], + 'set_url_to_dump': True + }) + # create empty resource + else: + # no need to set the full url because it will be set in before_show + res['url_type'] = 'datastore' + p.toolkit.get_action('resource_update')(context, res) + data_dict['connection_url'] = pylons.config['ckan.datastore.write_url'] # validate aliases @@ -295,7 +341,7 @@ def datastore_search_sql(context, data_dict): this action. Use :meth:`~ckanext.datastore.logic.action.datastore_search` instead. .. note:: This action is only available when using PostgreSQL 9.X and using a read-only user on the database. - It is not available in :ref:`legacy mode`. + It is not available in :ref:`legacy mode`. :param sql: a single SQL select statement :type sql: string @@ -338,7 +384,7 @@ def datastore_make_private(context, data_dict): private or a new DataStore table is created for a CKAN resource that belongs to a private dataset. - :param resource_id: if of resource that should become private + :param resource_id: id of resource that should become private :type resource_id: string ''' if 'id' in data_dict: @@ -383,6 +429,127 @@ def datastore_make_public(context, data_dict): db.make_public(context, data_dict) +def datapusher_submit(context, data_dict): + ''' Submit a job to the datapusher. The datapusher is a service that + imports tabular data into the datastore. + + :param resource_id: The resource id of the resource that the data + should be imported in. The resource's URL will be used to get the data. + :type resource_id: string + :param set_url_type: If set to true, the ``url_type`` of the resource will + be set to ``datastore`` and the resource URL will automatically point + to the :ref:`datastore dump ` URL. (optional, default: False) + :type set_url_type: boolean + + Returns ``True`` if the job has been submitted and ``False`` if the job + has not been submitted, i.e. when the datapusher is not configured. + + :rtype: boolean + ''' + + if 'id' in data_dict: + data_dict['resource_id'] = data_dict['id'] + res_id = _get_or_bust(data_dict, 'resource_id') + + p.toolkit.check_access('datapusher_submit', context, data_dict) + + datapusher_url = pylons.config.get('datapusher.url') + + # no datapusher url means the datapusher should not be used + if not datapusher_url: + return False + + callback_url = p.toolkit.url_for( + controller='api', action='action', logic_function='datapusher_hook', + ver=3, qualified=True) + + user = p.toolkit.get_action('user_show')(context, {'id': context['user']}) + try: + r = requests.post( + urlparse.urljoin(datapusher_url, 'job'), + headers={ + 'Content-Type': 'application/json' + }, + data=json.dumps({ + 'api_key': user['apikey'], + 'job_type': 'push_to_datastore', + 'result_url': callback_url, + 'metadata': { + 'ckan_url': pylons.config['ckan.site_url'], + 'resource_id': res_id, + 'set_url_type': data_dict.get('set_url_type', False) + } + })) + r.raise_for_status() + except requests.exceptions.ConnectionError, e: + raise p.toolkit.ValidationError({'datapusher': { + 'message': 'Could not connect to DataPusher.', + 'details': str(e)}}) + except requests.exceptions.HTTPError, e: + m = 'An Error occurred while sending the job: {0}'.format(e.message) + try: + body = e.response.json() + except ValueError: + body = e.response.text + raise p.toolkit.ValidationError({'datapusher': { + 'message': m, + 'details': body, + 'status_code': r.status_code}}) + + empty_task = { + 'entity_id': res_id, + 'entity_type': 'resource', + 'task_type': 'datapusher', + 'last_updated': str(datetime.datetime.now()), + 'state': 'pending' + } + + tasks = [] + for (k, v) in [('job_id', r.json()['job_id']), + ('job_key', r.json()['job_key'])]: + t = empty_task.copy() + t['key'] = k + t['value'] = v + tasks.append(t) + p.toolkit.get_action('task_status_update_many')(context, {'data': tasks}) + + return True + + +def datapusher_hook(context, data_dict): + """ Update datapusher task. This action is typically called by the + datapusher whenever the status of a job changes. + + Expects a job with ``status`` and ``metadata`` with a ``resource_id``. + """ + + # TODO: use a schema to validate + + p.toolkit.check_access('datapusher_submit', context, data_dict) + + res_id = data_dict['metadata']['resource_id'] + + task_id = p.toolkit.get_action('task_status_show')(context, { + 'entity_id': res_id, + 'task_type': 'datapusher', + 'key': 'job_id' + }) + + task_key = p.toolkit.get_action('task_status_show')(context, { + 'entity_id': res_id, + 'task_type': 'datapusher', + 'key': 'job_key' + }) + + tasks = [task_id, task_key] + + for task in tasks: + task['state'] = data_dict['status'] + task['last_updated'] = str(datetime.datetime.now()) + + p.toolkit.get_action('task_status_update_many')(context, {'data': tasks}) + + def _resource_exists(context, data_dict): # Returns true if the resource exists in CKAN and in the datastore model = _get_or_bust(context, 'model') diff --git a/ckanext/datastore/logic/auth.py b/ckanext/datastore/logic/auth.py index 641eb5cdddf..4d044755035 100644 --- a/ckanext/datastore/logic/auth.py +++ b/ckanext/datastore/logic/auth.py @@ -34,5 +34,9 @@ def datastore_search(context, data_dict): return _datastore_auth(context, data_dict, 'resource_show') +def datapusher_submit(context, data_dict): + return _datastore_auth(context, data_dict) + + def datastore_change_permissions(context, data_dict): return _datastore_auth(context, data_dict) diff --git a/ckanext/datastore/logic/schema.py b/ckanext/datastore/logic/schema.py index 3c4e0d95986..018eb249d79 100644 --- a/ckanext/datastore/logic/schema.py +++ b/ckanext/datastore/logic/schema.py @@ -8,6 +8,7 @@ not_missing = get_validator('not_missing') not_empty = get_validator('not_empty') resource_id_exists = get_validator('resource_id_exists') +package_id_exists = get_validator('package_id_exists') ignore_missing = get_validator('ignore_missing') empty = get_validator('empty') boolean_validator = get_validator('boolean_validator') @@ -66,7 +67,7 @@ def json_validator(value, context): def datastore_create_schema(): schema = { - 'resource_id': [not_missing, unicode, resource_id_exists], + 'resource_id': [ignore_missing, unicode, resource_id_exists], 'id': [ignore_missing], 'aliases': [ignore_missing, list_of_strings_or_string], 'fields': { diff --git a/ckanext/datastore/plugin.py b/ckanext/datastore/plugin.py index 1c3175d665d..4477ad70526 100644 --- a/ckanext/datastore/plugin.py +++ b/ckanext/datastore/plugin.py @@ -11,6 +11,8 @@ log = logging.getLogger(__name__) _get_or_bust = logic.get_or_bust +DEFAULT_FORMATS = [] + class DatastoreException(Exception): pass @@ -20,8 +22,10 @@ class DatastorePlugin(p.SingletonPlugin): p.implements(p.IConfigurable, inherit=True) p.implements(p.IActions) p.implements(p.IAuthFunctions) + p.implements(p.IResourceUrlChange) p.implements(p.IDomainObjectModification, inherit=True) p.implements(p.IRoutes, inherit=True) + p.implements(p.IResourceController, inherit=True) legacy_mode = False resource_show_action = None @@ -38,6 +42,9 @@ def configure(self, config): # datastore runs on PG prior to 9.0 (for example 8.4). self.legacy_mode = 'ckan.datastore.read_url' not in self.config + datapusher_formats = config.get('datapusher.formats', '').split() + self.datapusher_formats = datapusher_formats or DEFAULT_FORMATS + # Check whether we are running one of the paster commands which means # that we should ignore the following tests. if sys.argv[0].split('/')[-1] == 'paster' and 'datastore' in sys.argv[1:]: @@ -94,12 +101,37 @@ def new_resource_show(context, data_dict): self.resource_show_action = new_resource_show - def notify(self, entity, operation): + def notify(self, entity, operation=None): + ''' + if not isinstance(entity, model.Resource): + return + if operation: + if operation == model.domain_object.DomainObjectOperation.new: + self._create_datastorer_task(entity) + else: + # if operation is None, resource URL has been changed, as the + # notify function in IResourceUrlChange only takes 1 parameter + self._create_datastorer_task(entity) + ''' + context = {'model': model, 'ignore_auth': True} + if isinstance(entity, model.Resource): + if (operation == model.domain_object.DomainObjectOperation.new + or not operation): + # if operation is None, resource URL has been changed, as + # the notify function in IResourceUrlChange only takes + # 1 parameter + package = p.toolkit.get_action('package_show')(context, { + 'id': entity.get_package_id() + }) + if (not package['private'] and + entity.format in self.datapusher_formats): + p.toolkit.get_action('datapusher_submit')(context, { + 'resource_id': entity.id + }) if not isinstance(entity, model.Package) or self.legacy_mode: return # if a resource is new, it cannot have a datastore resource, yet if operation == model.domain_object.DomainObjectOperation.changed: - context = {'model': model, 'ignore_auth': True} if entity.private: func = p.toolkit.get_action('datastore_make_private') else: @@ -224,6 +256,8 @@ def get_actions(self): 'datastore_upsert': action.datastore_upsert, 'datastore_delete': action.datastore_delete, 'datastore_search': action.datastore_search, + 'datapusher_submit': action.datapusher_submit, + 'datapusher_hook': action.datapusher_hook, 'resource_show': self.resource_show_action, } if not self.legacy_mode: @@ -238,10 +272,21 @@ def get_auth_functions(self): 'datastore_upsert': auth.datastore_upsert, 'datastore_delete': auth.datastore_delete, 'datastore_search': auth.datastore_search, - 'datastore_change_permissions': auth.datastore_change_permissions} + 'datastore_change_permissions': auth.datastore_change_permissions, + 'datapusher_submit': auth.datapusher_submit} def before_map(self, m): m.connect('/datastore/dump/{resource_id}', controller='ckanext.datastore.controller:DatastoreController', action='dump') return m + + def before_show(self, resource_dict): + ''' Modify the resource url of datastore resources so that + they link to the datastore dumps. + ''' + if resource_dict['url_type'] == 'datastore': + resource_dict['url'] = p.toolkit.url_for( + controller='ckanext.datastore.controller:DatastoreController', + action='dump', resource_id=resource_dict['id']) + return resource_dict diff --git a/ckanext/datastore/tests/test_create.py b/ckanext/datastore/tests/test_create.py index c657f6e1ed1..d7b893235ed 100644 --- a/ckanext/datastore/tests/test_create.py +++ b/ckanext/datastore/tests/test_create.py @@ -1,25 +1,40 @@ import json +import httpretty import nose +import sys +import datetime from nose.tools import assert_equal import pylons +from pylons import config import sqlalchemy.orm as orm +import paste.fixture import ckan.plugins as p import ckan.lib.create_test_data as ctd import ckan.model as model import ckan.tests as tests +import ckan.config.middleware as middleware import ckanext.datastore.db as db from ckanext.datastore.tests.helpers import rebuild_all_dbs +# avoid hanging tests https://github.com/gabrielfalcao/HTTPretty/issues/34 +if sys.version_info < (2, 7, 0): + import socket + socket.setdefaulttimeout(1) + + class TestDatastoreCreate(tests.WsgiAppCase): sysadmin_user = None normal_user = None @classmethod def setup_class(cls): + + wsgiapp = middleware.make_app(config['global_conf'], **config) + cls.app = paste.fixture.TestApp(wsgiapp) if not tests.is_datastore_supported(): raise nose.SkipTest("Datastore not supported") p.load('datastore') @@ -521,6 +536,144 @@ def test_create_basic(self): assert res_dict['success'] is True, res_dict + def test_create_ckan_resource_in_package(self): + package = model.Package.get('annakarenina') + data = { + 'resource': {'package_id': package.id} + } + postparams = '%s=1' % json.dumps(data) + auth = {'Authorization': str(self.sysadmin_user.apikey)} + res = self.app.post('/api/action/datastore_create', params=postparams, + extra_environ=auth, status=200) + res_dict = json.loads(res.body) + + assert 'resource_id' in res_dict['result'] + assert len(model.Package.get('annakarenina').resources) == 3 + + res = tests.call_action_api( + self.app, 'resource_show', id=res_dict['result']['resource_id']) + assert res['url'] == '/datastore/dump/' + res['id'], res + + @httpretty.activate + def test_providing_res_with_url_calls_datapusher_correctly(self): + pylons.config['datapusher.url'] = 'http://datapusher.ckan.org' + httpretty.HTTPretty.register_uri( + httpretty.HTTPretty.POST, + 'http://datapusher.ckan.org/job', + content_type='application/json', + body=json.dumps({'job_id': 'foo', 'job_key': 'bar'})) + + package = model.Package.get('annakarenina') + + tests.call_action_api( + self.app, 'datastore_create', apikey=self.sysadmin_user.apikey, + resource=dict(package_id=package.id, url='demo.ckan.org')) + + assert len(package.resources) == 4, len(package.resources) + resource = package.resources[3] + data = json.loads(httpretty.last_request().body) + assert data['metadata']['resource_id'] == resource.id, data + assert data['result_url'].endswith('/action/datapusher_hook'), data + assert data['result_url'].startswith('http://'), data + + def test_cant_provide_resource_and_resource_id(self): + package = model.Package.get('annakarenina') + resource = package.resources[0] + data = { + 'resource_id': resource.id, + 'resource': {'package_id': package.id} + } + postparams = '%s=1' % json.dumps(data) + auth = {'Authorization': str(self.sysadmin_user.apikey)} + res = self.app.post('/api/action/datastore_create', params=postparams, + extra_environ=auth, status=409) + res_dict = json.loads(res.body) + + assert res_dict['error']['__type'] == 'Validation Error' + + @httpretty.activate + def test_send_datapusher_creates_task(self): + httpretty.HTTPretty.register_uri( + httpretty.HTTPretty.POST, + 'http://datapusher.ckan.org/job', + content_type='application/json', + body=json.dumps({'job_id': 'foo', 'job_key': 'bar'})) + + package = model.Package.get('annakarenina') + resource = package.resources[0] + + context = { + 'ignore_auth': True, + 'user': self.sysadmin_user.name + } + + p.toolkit.get_action('datapusher_submit')(context, { + 'resource_id': resource.id + }) + + task = p.toolkit.get_action('task_status_show')(context, { + 'entity_id': resource.id, + 'task_type': 'datapusher', + 'key': 'job_id' + }) + + assert task['state'] == 'pending', task + + def test_datapusher_hook(self): + package = model.Package.get('annakarenina') + resource = package.resources[0] + + context = { + 'user': self.sysadmin_user.name + } + + p.toolkit.get_action('task_status_update')(context, { + 'entity_id': resource.id, + 'entity_type': 'resource', + 'task_type': 'datapusher', + 'key': 'job_id', + 'value': 'my_id', + 'last_updated': str(datetime.datetime.now()), + 'state': 'pending' + }) + + p.toolkit.get_action('task_status_update')(context, { + 'entity_id': resource.id, + 'entity_type': 'resource', + 'task_type': 'datapusher', + 'key': 'job_key', + 'value': 'my_key', + 'last_updated': str(datetime.datetime.now()), + 'state': 'pending' + }) + + data = { + 'status': 'success', + 'metadata': { + 'resource_id': resource.id + } + } + postparams = '%s=1' % json.dumps(data) + auth = {'Authorization': str(self.sysadmin_user.apikey)} + res = self.app.post('/api/action/datapusher_hook', params=postparams, + extra_environ=auth, status=200) + print res.body + res_dict = json.loads(res.body) + + assert res_dict['success'] is True + + task = tests.call_action_api( + self.app, 'task_status_show', entity_id=resource.id, + task_type='datapusher', key='job_id') + + assert task['state'] == 'success', task + + task = tests.call_action_api( + self.app, 'task_status_show', entity_id=resource.id, + task_type='datapusher', key='job_key') + + assert task['state'] == 'success', task + def test_guess_types(self): resource = model.Package.get('annakarenina').resources[1] diff --git a/ckanext/multilingual/solr/schema.xml b/ckanext/multilingual/solr/schema.xml index 82f3e40769b..10f08696681 100644 --- a/ckanext/multilingual/solr/schema.xml +++ b/ckanext/multilingual/solr/schema.xml @@ -436,6 +436,7 @@ + diff --git a/ckanext/resourceproxy/controller.py b/ckanext/resourceproxy/controller.py index 81a8123e3b1..0e8fd9bfd11 100644 --- a/ckanext/resourceproxy/controller.py +++ b/ckanext/resourceproxy/controller.py @@ -1,4 +1,5 @@ from logging import getLogger +import urlparse import requests @@ -15,13 +16,17 @@ def proxy_resource(context, data_dict): ''' Chunked proxy for resources. To make sure that the file is not too large, first, we try to get the content length from the headers. If the headers to not contain a content length (if it is a chinked - response), we only transfer as long as the transfered data is less + response), we only transfer as long as the transferred data is less than the maximum file size. ''' resource_id = data_dict['resource_id'] log.info('Proxify resource {id}'.format(id=resource_id)) resource = logic.get_action('resource_show')(context, {'id': resource_id}) url = resource['url'] + parts = urlparse.urlsplit(url) + if not parts.scheme or not parts.netloc: + base.abort(409, detail='Invalid URL.') + try: # first we try a HEAD request which may not be supported did_get = False diff --git a/ckanext/resourceproxy/tests/test_proxy.py b/ckanext/resourceproxy/tests/test_proxy.py index 3d2d6091b54..52b96f373ff 100644 --- a/ckanext/resourceproxy/tests/test_proxy.py +++ b/ckanext/resourceproxy/tests/test_proxy.py @@ -129,7 +129,17 @@ def test_large_file_streaming(self): assert result.status == 409, result.status assert 'too large' in result.body, result.body - def test_resource_proxy_non_existent(self): + @httpretty.activate + def test_invalid_url(self): + self.data_dict = set_resource_url('javascript:downloadFile(foo)') + + proxied_url = proxy.get_proxified_resource_url(self.data_dict) + result = self.app.get(proxied_url, status='*') + assert result.status == 409, result.status + assert 'Invalid URL' in result.body, result.body + + + def test_non_existent_url(self): self.data_dict = set_resource_url('http://foo.bar') def f1(): diff --git a/dev-requirements.txt b/dev-requirements.txt index a445712d239..2c1c6cb5041 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,9 +1,9 @@ -# These are packages that required when running ckan tests +# These are packages that required when running ckan tests and building the docs -e git+https://github.com/okfn/ckanclient@a315a72eef74dda4831acd022ef84a1246803c73#egg=ckanclient-dev docutils==0.8.1 httpretty==0.6.2 -nose==1.3.0 +# nose==1.3.0 # already in requirements.txt pep8==1.4.6 -Sphinx==1.2b1 +Sphinx==1.1.3 polib==1.0.3 diff --git a/doc/conf.py b/doc/conf.py index 06cc6161f3b..e2443cc94b6 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -84,7 +84,7 @@ # General information about the project. project = u'CKAN Documentation' project_short_name = u'CKAN' -copyright = u'''© 2009-2012, Open Knowledge Foundation. +copyright = u'''© 2009-2013, Open Knowledge Foundation. Licensed under Creative Commons Attribution ShareAlike (Unported) v3.0 License.
diff --git a/doc/configuration.rst b/doc/configuration.rst index ac6081899d9..0efe9573226 100644 --- a/doc/configuration.rst +++ b/doc/configuration.rst @@ -1062,6 +1062,30 @@ Only used with the Google storage backend. .. todo:: Expand +DataPusher Settings +------------------- + +.. _datapusher.formats: + +datapusher.formats +^^^^^^^^^^^^^^^^^^ + +Example:: + datapusher.formats = csv xls xlsx + +.. todo:: Expand + +.. _datapusher.url: + +datapusher.url +^^^^^^^^^^^^^^ + +Example:: + datapusher.url = http://datapusher.ckan.org/ + +.. todo:: Expand + + Activity Streams Settings ------------------------- diff --git a/doc/data-viewer.rst b/doc/data-viewer.rst index b97d24c8b1b..50f9169ec22 100644 --- a/doc/data-viewer.rst +++ b/doc/data-viewer.rst @@ -19,12 +19,12 @@ use a custom widget. The data preview functionality that is provided by CKAN is described in the following sections: -* `Viewing images and text files`_ -* `Viewing structured data: the Data Explorer`_ -* `Viewing highlighted XML, JSON and plain text data`_ -* `Viewing PDF documents`_ -* `Viewing remote resources: the resource proxy`_ -* `Embedding Previews In Other Web Pages`_ +* :ref:`image-preview` +* :ref:`data-explorer` +* :ref:`text-preview` +* :ref:`pdf-preview` +* :ref:`resource-proxy` +* :ref:`embed-previews` These sections list the resource formats that each extension can preview and provide instructions for how to enable each extension. @@ -34,6 +34,8 @@ For more information on this topic see `Writing Extensions `_. +.. _image-preview: + Viewing images and text files ----------------------------- @@ -78,6 +80,8 @@ way in which they are shown may vary. If you want to ensure for instance that XML based documents are correctly previewed, have a look at `Viewing highlighted XML, JSON and plain text data`_. +.. _data-explorer: + Viewing structured data: the Data Explorer ------------------------------------------ @@ -113,6 +117,8 @@ Or: reliable than viewing data that is in the DataStore. +.. _text-preview: + Viewing highlighted XML, JSON and plain text data ------------------------------------------------- @@ -138,13 +144,15 @@ set to one of the resource formats from above (case insensitive). .. seealso:: - :ref:`The resourceproxy extension ` + :ref:`The resourceproxy extension ` If you want to preview linked-to text files (and not only files that have been uploaded to CKAN) you need to enable the ``resource_proxy`` extension as well. +.. _pdf-preview: + Viewing PDF documents --------------------- @@ -160,14 +168,14 @@ have been added to a CKAN instance. This extension uses Mozilla's `pdf.js .. seealso:: - :ref:`The resourceproxy extension ` + :ref:`The resourceproxy extension ` If you want to preview linked-to PDF files (and not only files that have been uploaded to CKAN) you need to enable the ``resource_proxy`` extension as well. -.. _resource_proxy: +.. _resource-proxy: Viewing remote resources: the resource proxy -------------------------------------------- @@ -195,6 +203,7 @@ To find out whether the resource proxy is enabled, check ``ckan.resource_proxy_e from the config. You can find a complete example in the `CKAN source `_. +.. _embed-previews: Embedding Previews In Other Web Pages ------------------------------------- diff --git a/doc/datastore.rst b/doc/datastore.rst index d629146202c..a0a38b3be3c 100644 --- a/doc/datastore.rst +++ b/doc/datastore.rst @@ -2,19 +2,18 @@ DataStore Extension =================== -.. todo:: - What features does the datastore actually provide that users care about? - Why would they want to use it? +The CKAN DataStore extension provides an *ad hoc* database for storage of structured data from +CKAN resources. Data can be pulled out of resource files and stored in +the DataStore. - - API for reading, writing data without downloading, uploading entire file - - Enables Recline previews - - API for searching data, including search across resources +When a resource is added to the DataStore, you get: -The CKAN DataStore provides a database for structured storage of data together -with a powerful Web-accessible Data API, all seamlessly integrated into the CKAN -interface and authorization system. At the same time, we kept the layer between the -underlying database and the user as thin as possible. +* Automatic data previews on the resource's page, using the :ref:`Data Explorer extension ` +* The `The DataStore API`_: search, filter and update the data, without having to download + and upload the entire data file + +The DataStore is integrated into the :doc:`CKAN API ` and authorization system. .. contents:: :depth: 1 @@ -45,7 +44,7 @@ Setting up the DataStore DataStore on versions prior to 9.0 (for example 8.4). However, the :meth:`~ckanext.datastore.logic.action.datastore_search_sql` will not be available and the set-up is slightly different. Make sure, you read - :ref:`legacy_mode` for more details. + :ref:`legacy-mode` for more details. .. warning:: @@ -63,7 +62,7 @@ Add the ``datastore`` plugin to your CKAN config file:: .. warning:: Make sure that you follow the steps in `Set Permissions`_ below correctly. Wrong settings could lead to serious security issues. -The DataStore requires a separate PostgreSQL database to save the resources to. +The DataStore requires a separate PostgreSQL database to save the DataStore resources to. List existing databases:: @@ -118,7 +117,7 @@ Replace ``pass`` with the passwords you created for your |database_user| and Set Permissions --------------- -.. tip:: See :ref:`legacy_mode` if these steps continue to fail or seem too complicated for your set-up. However, keep in mind that the legacy mode is limited in its capabilities. +.. tip:: See :ref:`legacy-mode` if these steps continue to fail or seem too complicated for your set-up. However, keep in mind that the legacy mode is limited in its capabilities. Once the DataStore database and the users are created, the permissions on the DataStore and CKAN database have to be set. Since there are different set-ups, there are different ways of setting the permissions. Only **one** of the options should be used. @@ -170,19 +169,19 @@ Copy the ``set_permissions.sql`` file to the server that the database runs on. M ================== The DataStore is now set-up. To test the set-up, (re)start CKAN and run the -following command to list all resources that are in the DataStore:: +following command to list all DataStore resources:: curl -X GET "http://127.0.0.1:5000/api/3/action/datastore_search?resource_id=_table_metadata" This should return a JSON page without errors. -To test the whether the set-up allows writing, you can create a new resource in -the DataStore. To do so, run the following command:: +To test the whether the set-up allows writing, you can create a new DataStore resource. +To do so, run the following command:: curl -X POST http://127.0.0.1:5000/api/3/action/datastore_create -H "Authorization: {YOUR-API-KEY}" -d '{"resource_id": "{RESOURCE-ID}", "fields": [ {"id": "a"}, {"id": "b"} ], "records": [ { "a": 1, "b": "xyz"}, {"a": 2, "b": "zzz"} ]}' -Replace ``{YOUR-API-KEY}`` with a valid API key and ``{RESOURCE-ID}`` with a -resource id of an existing CKAN resource. +Replace ``{YOUR-API-KEY}`` with a valid API key and ``{RESOURCE-ID}`` with the +id of an existing CKAN resource. A table named after the resource id should have been created on your DataStore database. Visiting this URL should return a response from the DataStore with @@ -192,12 +191,12 @@ the records inserted above:: You can now delete the DataStore table with:: - curl -X POST http://127.0.0.1:5000/api/3/action/datastore_delete -H "Authorization: {YOUR-API-KEY}" -d '{"resource_id": "{RESOURCE-ID}"}' + curl -X POST http://127.0.0.1:5000/api/3/action/datastore_delete -H "Authorization: {YOUR-API-KEY}" -d '{"resource_id": "{RESOURCE-ID}"}' To find out more about the DataStore API, see `The DataStore API`_. -.. _legacy_mode: +.. _legacy-mode: Legacy mode: use the DataStore with old PostgreSQL versions =========================================================== @@ -224,9 +223,24 @@ There is no need for a read-only user or special permissions. Therefore the lega The DataStore API ----------------- -The DataStore API allows tabular data to be stored inside CKAN quickly and -easily. Each resource in a CKAN instance can have an associated DataStore -table. The API for using the DataStore is outlined below. +The CKAN DataStore offers an API for reading, searching and filtering data without +the need to download the entire file first. The DataStore is an ad hoc database which +means that it is a collection of tables with unknown relationships. This allows +you to search in one DataStore resource (a *table* in the database) as well as queries +across DataStore resources. + +Data can be written incrementally to the DataStore through the API. New data can be +inserted, existing data can be updated or deleted. You can also add a new column to +an existing table even if the DataStore resource already contains some data. + +You will notice that we tried to keep the layer between the underlying PostgreSQL +database and the API as thin as possible to allow you to use the features you would +expect from a powerful database management system. + +A DataStore resource can not be created on its own. It is always required to have an +associated CKAN resource. If data is stored in the DataStore, it will automatically be +previewed by the :ref:`recline preview extension `. + Making a DataStore API Request ============================== @@ -289,7 +303,7 @@ Example:: Records ------- -A record is the data to be inserted in a table and is defined as follows:: +A record is the data to be inserted in a DataStore resource and is defined as follows:: { "": # data to be set @@ -343,7 +357,7 @@ You can find more information about the formatting of dates in the `date/time ty .. _date/time types section of the PostgreSQL documentation: http://www.postgresql.org/docs/9.1/static/datatype-datetime.html -.. _resource_aliases: +.. _resource-aliases: Resource aliases ---------------- @@ -383,7 +397,7 @@ Internal structure of the database The DataStore is a thin layer on top of a PostgreSQL database. Each DataStore resource belongs to a CKAN resource. The name of a table in the DataStore is always the resource id of the CKAN resource for the data. -As explained in :ref:`resource_aliases`, a resource can have mnemonic aliases which are stored as views in the database. +As explained in :ref:`resource-aliases`, a resource can have mnemonic aliases which are stored as views in the database. All aliases (views) and resources (tables respectively relations) of the DataStore can be found in a special view called ``_table_metadata``. To access the list, open ``http://{YOUR-CKAN-INSTALLATION}/api/3/action/datastore_search?resource_id=_table_metadata``. diff --git a/doc/documentation-guidelines.rst b/doc/documentation-guidelines.rst index 809292e38b6..10a3050e1cf 100644 --- a/doc/documentation-guidelines.rst +++ b/doc/documentation-guidelines.rst @@ -386,6 +386,9 @@ or to define a URL once and then link to it in multiple places, do:: see `Hyperlinks `_ for details. +Use ``:py:`` to reference other Python or JavaScript functions, modules, +classes, etc. See :ref:`Referencing other code objects`. + .. _sphinx substitutions: diff --git a/doc/python-coding-standards.rst b/doc/python-coding-standards.rst index 8e4f590bfae..ed396398f38 100644 --- a/doc/python-coding-standards.rst +++ b/doc/python-coding-standards.rst @@ -1,5 +1,5 @@ ======================= -Python Coding Standards +Python coding standards ======================= For Python code style follow `PEP 8`_ plus the guidelines below. @@ -12,15 +12,7 @@ Some good links about Python code style: - `Google Python Style Guide `_ -Commit Formatting Cleanups on master ------------------------------------- - -Clean up formatting and PEP 8 issues on master, not on a feature branch. -Unless of course you're changing that piece of code anyway. This will help -avoid spurious merge conflicts, and aid in reading pull requests. - - -Use Single Quotes +Use single quotes ----------------- Use single-quotes for string literals, e.g. ``'my-identifier'``, *but* use @@ -61,6 +53,17 @@ Imports Logging ------- +We use `the Python standard library's logging module `_ +to log messages in CKAN, e.g.:: + + import logging + ... + logger = logging.getLogger(__name__) + ... + logger.debug('some debug message') + +When logging: + - Keep log messages short. - Don't include object representations in the log message. It *is* useful @@ -71,10 +74,10 @@ Logging .. _Python's Logging HOWTO: http://docs.python.org/2/howto/logging.html -String Formatting +String formatting ------------------ -Don't use the old `%s` style string formatting, e.g. ``"i am a %s" % sub``. +Don't use the old ``%s`` style string formatting, e.g. ``"i am a %s" % sub``. This kind of string formatting is not helpful for internationalization and is going away in Python 3. @@ -100,10 +103,11 @@ as it changes over time. So: - All modules and all public functions, classes and methods exported by a module should normally have docstrings (see `PEP 257`_). -- Keep docstrings short, describe only what's necessary and no more, +- Keep docstrings short, describe only what's necessary and no more. - Keep docstrings simple: use plain, concise English. - Try to avoid repetition. + PEP 257 (Docstring Conventions) ``````````````````````````````` @@ -115,10 +119,130 @@ CKAN docstrings deviate from PEP 257 in a couple of ways: - We use ``'''triple single quotes'''`` around docstrings, not ``"""triple double quotes"""`` (put triple single quotes around one-line docstrings as well as multi-line ones, it makes them easier to expand later) +- We use Sphinx domain object cross-references to cross-reference to other + code objects (see below) - We use Sphinx directives for documenting parameters, exceptions and return values (see below) -Sphinx Field Lists + +.. _Referencing other code objects: + +Referencing other code objects with ``:py:`` +-------------------------------------------- + +If you want to refer to another Python or JavaScript module, function or class +etc. in a docstring (or from a ``.rst`` file), use `Sphinx domain object +cross-references +`_, for +example:: + + See :py:mod:`ckan.lib.helpers`. + + See :py:func:`ckan.logic.action.create.package_create`. + + See :py:class:`ckan.logic.NotFound`. + +For the full list of types of cross-reference, see the +`Sphinx docs `_. + + +.. note:: + + These kinds of cross-references can also be used to reference other types + of object besides Python objects, for example `JavaScript objects `_ + or even command-line scripts and options and environment variables. See + `the Sphinx docs `_ for the full + details. + + +Cross-referencing objects like this means that Sphinx will style the reference +with the right CSS, and hyperlink the reference to the docs for the referenced +object. Sphinx can also generate error messages when non-existent objects are +referenced, which helps to keep the docs up to date as the code changes. + +.. tip:: + + Sphinx will render a cross-reference like + ``:py:func:`ckan.logic.action.create.package_create``` as the full name of + the function: :py:func:`ckan.logic.action.create.package_create`. If you want the + docs to contain only the local name of the function (e.g. just + :py:func:`~ckan.logic.action.create.package_create`), put a ``~`` at the + start:: + + :py:func:`~ckan.logic.action.create.package_create` + + (But you should always use the fully qualified name in your docstring or + ``*.rst`` file.) + + +Documenting exceptions raised with ``:raises`` +`````````````````````````````````````````````` + +There are a few guidelines that CKAN code should follow regarding exceptions: + +1. **All public functions that CKAN exports for third-party code to use + should document any exceptions they raise**. See below for how to document + exceptions raised. + + For example the template helper functions in :py:mod:`ckan.lib.helpers`, + anything imported into :py:mod:`ckan.plugins.toolkit`, and all of the + action API functions defined in :py:mod:`ckan.logic.action`, should list + exceptions raised in their docstrings. + + This is because CKAN themes, extensions and API clients need to be able to + call CKAN code without crashing, so they need to know what exceptions they + should handle (and extension developers shouldn't have to understand the + CKAN core source code). + +2. On the other hand, **internal functions that are only used within CKAN + shouldn't list exceptions in their docstrings**. + + This is because it would be difficult to keep all the exception lists up to + date with the actual code behaviour, so the docstrings would become more + misleading than useful. + +3. **Code should only raise exceptions from within its allowed set**. + + Each module in CKAN has a set of zero or more exceptions, defined somewhere + near the module, that code in that module is allowed to raise. For example + ``ckan/logic/__init__.py`` defines a number of exception types for code + in ``ckan/logic/`` to use. CKAN code should never raise exceptions types + defined elsewhere in CKAN, in third-party code or in the Python standard + library. + +4. **All code should catch any exceptions raised by called functions**, and + either handle the exception, re-raise the exception (if it's from the code's + set of allowed exception types), or wrap the exception in an allowed + exception type and re-raise it. + + This is to make it easy for a CKAN core developer to look at the source code + of an internal function, scan it for the keyword ``raise``, and see what + types of exception the function may raise, so they know what exceptions they + need to catch if they're going to call the function. Developers shouldn't + have to read the source of all the functions that a function calls (and + the functions they call...) to find out what exceptions they needs to catch + to call a function without crashing. + +.. todo:: + + Insert examples of how to re-raise and how to wrap-and-re-raise an + exception. + +Use ``:raises:`` to document exceptions raised by public functions. The +docstring should say what type of exception is raised and under what +conditions. Use ``:py:class:`` to reference exception types. For example:: + + def member_list(context, data_dict=None): + '''Return the members of a group. + + ... (parameters and return values documented here) ... + + :raises: :py:class:`ckan.logic.NotFound`: if the group doesn't exist + + ''' + + +Sphinx field lists `````````````````` Use `Sphinx field lists`_ for documenting the parameters, exceptions and @@ -163,7 +287,6 @@ Example of a longer docstring: ''' - The phrases that follow ``:param foo:``, ``:type foo:``, or ``:returns:`` should not start with capital letters or end with full stops. These should be short phrases and not full sentences. If more detail is required put it in the @@ -182,7 +305,7 @@ You can also use a little inline `reStructuredText markup`_ in docstrings, e.g. .. _Action API Docstrings: -Action API Docstrings +Action API docstrings ````````````````````` Docstrings from CKAN's action API are processed with `autodoc`_ and @@ -231,7 +354,7 @@ Example of a ckan.logic.action API docstring: .. _Autodoc: http://sphinx.pocoo.org/ext/autodoc.html -Some Helpful Tools for Python Code Quality +Some helpful tools for Python code quality ------------------------------------------ There are various tools that can help you to check your Python code for PEP8 diff --git a/pip-requirements-docs.txt b/pip-requirements-docs.txt new file mode 100644 index 00000000000..e5f043cbb7a --- /dev/null +++ b/pip-requirements-docs.txt @@ -0,0 +1,4 @@ +# This will install the requirements used to build the docs + +-r requirements.txt +-r dev-requirements.txt