diff --git a/bin/travis-install-dependencies b/bin/travis-install-dependencies index 8b9b1430d8f..77514786c2e 100755 --- a/bin/travis-install-dependencies +++ b/bin/travis-install-dependencies @@ -29,6 +29,7 @@ sudo -E -u postgres ./bin/postgres_init/1_create_ckan_db.sh sudo -E -u postgres ./bin/postgres_init/2_create_ckan_datastore_db.sh export PIP_USE_MIRRORS=true +pip install -r requirement-setuptools.txt --allow-all-external pip install -r requirements.txt --allow-all-external pip install -r dev-requirements.txt --allow-all-external diff --git a/circle.yml b/circle.yml index d9fff638dea..e560a8057a0 100644 --- a/circle.yml +++ b/circle.yml @@ -23,6 +23,7 @@ dependencies: && chmod +x ~/.local/bin/circleci-matrix" override: + - pip install -r requirement-setuptools.txt - pip install -r requirements.txt - pip install -r dev-requirements.txt - python setup.py develop diff --git a/ckan/controllers/group.py b/ckan/controllers/group.py index e711fea8561..40b7ef2b5b3 100644 --- a/ckan/controllers/group.py +++ b/ckan/controllers/group.py @@ -382,13 +382,16 @@ def bulk_process(self, id): data_dict = {'id': id, 'type': group_type} try: + self._check_access('bulk_update_public', context, {'org_id': id}) # Do not query for the group datasets when dictizing, as they will # be ignored and get requested on the controller anyway data_dict['include_datasets'] = False c.group_dict = self._action('group_show')(context, data_dict) c.group = context['group'] - except (NotFound, NotAuthorized): + except NotFound: abort(404, _('Group not found')) + except NotAuthorized: + abort(403, _('User %r not authorized to edit %s') % (c.user, id)) if not c.group_dict['is_organization']: # FIXME: better error @@ -634,14 +637,21 @@ def members(self, id): 'user': c.user} try: + data_dict = {'id': id} + check_access('group_edit_permissions', context, data_dict) c.members = self._action('member_list')( context, {'id': id, 'object_type': 'user'} ) - data_dict = {'id': id} data_dict['include_datasets'] = False c.group_dict = self._action('group_show')(context, data_dict) - except (NotFound, NotAuthorized): + except NotFound: abort(404, _('Group not found')) + except NotAuthorized: + abort( + 403, + _('User %r not authorized to edit members of %s') % ( + c.user, id)) + return self._render_template('group/members.html', group_type) def member_new(self, id): diff --git a/ckan/lib/lazyjson.py b/ckan/lib/lazyjson.py new file mode 100644 index 00000000000..05eb8be8a54 --- /dev/null +++ b/ckan/lib/lazyjson.py @@ -0,0 +1,55 @@ +# encoding: utf-8 + + +from simplejson import loads, RawJSON, dumps + + +class LazyJSONObject(RawJSON): + u''' + An object that behaves like a dict returned from json.loads + but when passed to simplejson.dumps will render original + string passed when possible. Accepts and produces only + unicode strings containing a single JSON object. + ''' + def __init__(self, json_string): + assert isinstance(json_string, unicode), json_string + self._json_string = json_string + self._json_dict = None + + def _loads(self): + if not self._json_dict: + self._json_dict = loads(self._json_string) + self._json_string = None + return self._json_dict + + def __nonzero__(self): + return True + + def __repr__(self): + if self._json_string: + return u'' % self._json_string + return u'' % self._json_dict + + @property + def encoded_json(self): + if self._json_string: + return self._json_string + return dumps( + self._json_dict, + ensure_ascii=False, + separators=(u',', u':')) + + +def _loads_method(name): + def method(self, *args, **kwargs): + return getattr(self._loads(), name)(*args, **kwargs) + return method + + +for fn in [u'__contains__', u'__delitem__', u'__eq__', u'__ge__', + u'__getitem__', u'__gt__', u'__iter__', u'__le__', u'__len__', + u'__lt__', u'__ne__', u'__setitem__', u'clear', u'copy', + u'fromkeys', u'get', u'has_key', u'items', u'iteritems', + u'iterkeys', u'itervalues', u'keys', u'pop', u'popitem', + u'setdefault', u'update', u'values']: + setattr(LazyJSONObject, fn, _loads_method(fn)) diff --git a/ckan/lib/navl/validators.py b/ckan/lib/navl/validators.py index 6e508c846a8..cfcb1a2d5d2 100644 --- a/ckan/lib/navl/validators.py +++ b/ckan/lib/navl/validators.py @@ -117,3 +117,9 @@ def convert_int(value, context): except ValueError: raise Invalid(_('Please enter an integer value')) +def unicode_only(value): + '''Accept only unicode values''' + + if not isinstance(value, unicode): + raise Invalid(_('Must be a Unicode string value')) + return value diff --git a/ckan/logic/auth/update.py b/ckan/logic/auth/update.py index 3d0e86c25cb..ae75f8cedb8 100644 --- a/ckan/logic/auth/update.py +++ b/ckan/logic/auth/update.py @@ -153,14 +153,15 @@ def group_edit_permissions(context, data_dict): user = context['user'] group = logic_auth.get_group_object(context, data_dict) - authorized = authz.has_user_permission_for_group_or_org(group.id, - user, - 'update') + authorized = authz.has_user_permission_for_group_or_org( + group.id, user, 'update') if not authorized: - return {'success': False, - 'msg': _('User %s not authorized to edit permissions of group %s') % - (str(user), group.id)} + return { + 'success': False, + 'msg': _('User %s not authorized to' + ' edit permissions of group %s') % + (str(user), group.id)} else: return {'success': True} diff --git a/ckan/logic/schema.py b/ckan/logic/schema.py index 4a6eff941d3..fe8e05c38fc 100644 --- a/ckan/logic/schema.py +++ b/ckan/logic/schema.py @@ -70,6 +70,7 @@ extra_key_not_in_root_schema, empty_if_not_sysadmin, package_id_does_not_exist, + email_validator ) @@ -146,9 +147,9 @@ def default_create_package_schema(): 'name': [not_empty, unicode, name_validator, package_name_validator], 'title': [if_empty_same_as("name"), unicode], 'author': [ignore_missing, unicode], - 'author_email': [ignore_missing, unicode], + 'author_email': [ignore_missing, unicode, email_validator], 'maintainer': [ignore_missing, unicode], - 'maintainer_email': [ignore_missing, unicode], + 'maintainer_email': [ignore_missing, unicode, email_validator], 'license_id': [ignore_missing, unicode], 'notes': [ignore_missing, unicode], 'url': [ignore_missing, unicode], # , URL(add_http=False)], diff --git a/ckan/logic/validators.py b/ckan/logic/validators.py index 5173b909167..1f751e5fe88 100644 --- a/ckan/logic/validators.py +++ b/ckan/logic/validators.py @@ -831,3 +831,17 @@ def empty_if_not_sysadmin(key, data, errors, context): return empty(key, data, errors, context) + +#pattern from https://html.spec.whatwg.org/#e-mail-state-(type=email) +email_pattern = re.compile(r"^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9]"\ + "(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9]"\ + "(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$") + + +def email_validator(value, context): + '''Validate email input ''' + + if value: + if not email_pattern.match(value): + raise Invalid(_('Email {email} is not a valid format').format(email=value)) + return value diff --git a/ckan/public/base/javascript/modules/resource-view-filters.js b/ckan/public/base/javascript/modules/resource-view-filters.js index 0a108acffbd..787e9443e41 100644 --- a/ckan/public/base/javascript/modules/resource-view-filters.js +++ b/ckan/public/base/javascript/modules/resource-view-filters.js @@ -6,7 +6,7 @@ this.ckan.module('resource-view-filters', function (jQuery) { resourceId = self.options.resourceId, fields = self.options.fields, dropdownTemplate = self.options.dropdownTemplate, - addFilterTemplate = '' + self._('Add Filter') + ''; + addFilterTemplate = '' + self._('Add Filter') + '', filtersDiv = $('
'); var filters = ckan.views.filters.get(); diff --git a/ckan/public/base/less/forms.less b/ckan/public/base/less/forms.less index 2800dde3e98..a024a6f51fb 100644 --- a/ckan/public/base/less/forms.less +++ b/ckan/public/base/less/forms.less @@ -170,7 +170,7 @@ textarea { @media (min-width: 980px) { .form-horizontal .info-block { - padding: 6px 0 6px 25px; + padding: 0 0 6px 25px; } .form-horizontal .info-inline { float: right; diff --git a/ckan/public/base/less/media.less b/ckan/public/base/less/media.less index 900af8dd028..c4b9abbefb0 100644 --- a/ckan/public/base/less/media.less +++ b/ckan/public/base/less/media.less @@ -28,6 +28,11 @@ .media-grid { margin-left:-27px; } + .module-content { + .wide .media-grid { + margin-left:-25px; + } + } } .media-item { diff --git a/ckan/templates/package/resource_read.html b/ckan/templates/package/resource_read.html index be0bba84aa6..a7ffbc0b3b8 100644 --- a/ckan/templates/package/resource_read.html +++ b/ckan/templates/package/resource_read.html @@ -37,7 +37,7 @@ {{ _('View') }} {% elif res.resource_type == 'api' %} {{ _('API Endpoint') }} - {% elif not res.has_views or not res.can_be_previewed %} + {% elif (not res.has_views or not res.can_be_previewed) and not res.url_type == 'upload' %} {{ _('Go to resource') }} {% else %} {{ _('Download') }} diff --git a/ckan/templates/package/snippets/resource_item.html b/ckan/templates/package/snippets/resource_item.html index 8e437da6f7d..992397e10f4 100644 --- a/ckan/templates/package/snippets/resource_item.html +++ b/ckan/templates/package/snippets/resource_item.html @@ -39,7 +39,7 @@ {% if res.url and h.is_url(res.url) %}
  • - {% if res.has_views %} + {% if res.has_views or res.url_type == 'upload' %} {{ _('Download') }} {% else %} diff --git a/ckan/tests/controllers/test_group.py b/ckan/tests/controllers/test_group.py index 67e06c92ce9..a0fdc00c13f 100644 --- a/ckan/tests/controllers/test_group.py +++ b/ckan/tests/controllers/test_group.py @@ -282,9 +282,12 @@ def test_membership_list(self): group = self._create_group(user_one['name'], other_users) + env = {'REMOTE_USER': user_one['name'].encode('ascii')} + member_list_url = url_for(controller='group', action='members', id=group['id']) - member_list_response = app.get(member_list_url) + member_list_response = app.get( + member_list_url, extra_environ=env) assert_true('2 members' in member_list_response) @@ -375,7 +378,7 @@ def test_remove_member(self): env = {'REMOTE_USER': user_one['name'].encode('ascii')} remove_response = app.post(remove_url, extra_environ=env, status=302) # redirected to member list after removal - remove_response = remove_response.follow() + remove_response = remove_response.follow(extra_environ=env) assert_true('Group member has been deleted.' in remove_response) assert_true('1 members' in remove_response) diff --git a/ckan/tests/helpers.py b/ckan/tests/helpers.py index 0d0e80a64db..49d099f9fc1 100644 --- a/ckan/tests/helpers.py +++ b/ckan/tests/helpers.py @@ -179,7 +179,8 @@ class FunctionalTestBase(object): Allows configuration changes by overriding _apply_config_changes and resetting the CKAN config after your test class has run. It creates a webtest.TestApp at self.app for your class to use to make HTTP requests - to the CKAN web UI or API. + to the CKAN web UI or API. Also loads plugins defined by + _load_plugins in the class definition. If you're overriding methods that this class provides, like setup_class() and teardown_class(), make sure to use super() to call this class's methods @@ -196,10 +197,13 @@ def _get_test_app(cls): # leading _ because nose is terrible @classmethod def setup_class(cls): + import ckan.plugins as p # Make a copy of the Pylons config, so we can restore it in teardown. cls._original_config = dict(config) cls._apply_config_changes(config) cls._get_test_app() + for plugin in getattr(cls, '_load_plugins', []): + p.load(plugin) @classmethod def _apply_config_changes(cls, cfg): @@ -214,6 +218,9 @@ def setup(self): @classmethod def teardown_class(cls): + import ckan.plugins as p + for plugin in reversed(getattr(cls, '_load_plugins', [])): + p.unload(plugin) # Restore the Pylons config to its original values, in case any tests # changed any config settings. config.clear() diff --git a/ckan/tests/legacy/functional/api/test_activity.py b/ckan/tests/legacy/functional/api/test_activity.py index 52a2514c1c2..e7a233b4724 100644 --- a/ckan/tests/legacy/functional/api/test_activity.py +++ b/ckan/tests/legacy/functional/api/test_activity.py @@ -139,9 +139,9 @@ def make_package(name=None): 'name': name, 'title': 'My Test Package', 'author': 'test author', - 'author_email': 'test_author@test_author.com', + 'author_email': 'test_author@testauthor.com', 'maintainer': 'test maintainer', - 'maintainer_email': 'test_maintainer@test_maintainer.com', + 'maintainer_email': 'test_maintainer@testmaintainer.com', 'notes': 'some test notes', 'url': 'www.example.com', } diff --git a/ckanext/datapusher/tests/test.py b/ckanext/datapusher/tests/test.py index d603ca386ab..cdf4024374d 100644 --- a/ckanext/datapusher/tests/test.py +++ b/ckanext/datapusher/tests/test.py @@ -68,8 +68,7 @@ def setup_class(cls): ctd.CreateTestData.create() cls.sysadmin_user = model.User.get('testsysadmin') cls.normal_user = model.User.get('annafan') - engine = db._get_engine( - {'connection_url': config['ckan.datastore.write_url']}) + engine = db.get_write_engine() cls.Session = orm.scoped_session(orm.sessionmaker(bind=engine)) set_url_type( model.Package.get('annakarenina').resources, cls.sysadmin_user) diff --git a/ckanext/datapusher/tests/test_interfaces.py b/ckanext/datapusher/tests/test_interfaces.py index 49b8efd7fad..8439dbff45c 100644 --- a/ckanext/datapusher/tests/test_interfaces.py +++ b/ckanext/datapusher/tests/test_interfaces.py @@ -61,8 +61,7 @@ def setup_class(cls): cls.sysadmin_user = factories.User(name='testsysadmin', sysadmin=True) cls.normal_user = factories.User(name='annafan') - engine = db._get_engine( - {'connection_url': config['ckan.datastore.write_url']}) + engine = db.get_write_engine() cls.Session = orm.scoped_session(orm.sessionmaker(bind=engine)) @classmethod diff --git a/ckanext/datastore/controller.py b/ckanext/datastore/controller.py index 09f91d160ab..ffd346359be 100644 --- a/ckanext/datastore/controller.py +++ b/ckanext/datastore/controller.py @@ -28,7 +28,7 @@ boolean_validator = get_validator('boolean_validator') DUMP_FORMATS = 'csv', 'tsv', 'json', 'xml' -PAGINATE_BY = 10000 +PAGINATE_BY = 32000 class DatastoreController(BaseController): @@ -100,16 +100,22 @@ def dictionary(self, id, resource_id): def dump_to(resource_id, output, fmt, offset, limit, options): + if fmt == 'csv': + writer_factory = csv_writer + records_format = 'csv' + elif fmt == 'tsv': + writer_factory = tsv_writer + records_format = 'tsv' + elif fmt == 'json': + writer_factory = json_writer + records_format = 'lists' + elif fmt == 'xml': + writer_factory = xml_writer + records_format = 'objects' + def start_writer(fields): bom = options.get(u'bom', False) - if fmt == 'csv': - return csv_writer(output, fields, resource_id, bom) - if fmt == 'tsv': - return tsv_writer(output, fields, resource_id, bom) - if fmt == 'json': - return json_writer(output, fields, resource_id, bom) - if fmt == 'xml': - return xml_writer(output, fields, resource_id, bom) + return writer_factory(output, fields, resource_id, bom) def result_page(offs, lim): return get_action('datastore_search')(None, { @@ -118,6 +124,8 @@ def result_page(offs, lim): PAGINATE_BY if limit is None else min(PAGINATE_BY, lim), 'offset': offs, + 'records_format': records_format, + 'include_total': 'false', # XXX: default() is broken }) result = result_page(offset, limit) @@ -128,13 +136,20 @@ def result_page(offs, lim): if limit is not None and limit <= 0: break - for record in result['records']: - wr.writerow([record[column] for column in columns]) + records = result['records'] + + wr.write_records(records) - if len(result['records']) < PAGINATE_BY: + if records_format == 'objects' or records_format == 'lists': + if len(records) < PAGINATE_BY: + break + elif not records: break + offset += PAGINATE_BY if limit is not None: limit -= PAGINATE_BY + if limit <= 0: + break result = result_page(offset, limit) diff --git a/ckanext/datastore/db.py b/ckanext/datastore/db.py index ed01ce95bfd..f30d1ecbe92 100644 --- a/ckanext/datastore/db.py +++ b/ckanext/datastore/db.py @@ -11,10 +11,13 @@ import urllib import urllib2 import urlparse +from cStringIO import StringIO import ckan.lib.cli as cli import ckan.plugins as p import ckan.plugins.toolkit as toolkit +from ckan.lib.lazyjson import LazyJSONObject + import ckanext.datastore.helpers as datastore_helpers import ckanext.datastore.interfaces as interfaces import psycopg2.extras @@ -98,9 +101,16 @@ def _is_valid_table_name(name): return _is_valid_field_name(name) -def _get_engine(data_dict): +def get_read_engine(): + return _get_engine_from_url(config['ckan.datastore.read_url']) + + +def get_write_engine(): + return _get_engine_from_url(config['ckan.datastore.write_url']) + + +def _get_engine_from_url(connection_url): '''Get either read or write engine.''' - connection_url = data_dict['connection_url'] engine = _engines.get(connection_url) if not engine: @@ -127,9 +137,7 @@ def _cache_types(context): log.info("Create nested type. Native JSON: {0!r}".format( native_json)) - data_dict = { - 'connection_url': config['ckan.datastore.write_url']} - engine = _get_engine(data_dict) + engine = get_write_engine() with engine.begin() as connection: connection.execute( 'CREATE TYPE "nested" AS (json {0}, extra text)'.format( @@ -714,6 +722,9 @@ def upsert_data(context, data_dict): toolkit._("The data was invalid (for example: a numeric value " "is out of range or was inserted into a text field)." )) + except sqlalchemy.exc.DatabaseError as err: + raise ValidationError( + {u'records': [_programming_error_summary(err)]}) elif method in [_UPDATE, _UPSERT]: unique_keys = _get_unique_key(context, data_dict) @@ -774,8 +785,13 @@ def upsert_data(context, data_dict): [u'"{0}"'.format(part) for part in unique_keys]), primary_value=u','.join(["%s"] * len(unique_keys)) ) - results = context['connection'].execute( - sql_string, used_values + [full_text] + unique_values) + try: + results = context['connection'].execute( + sql_string, used_values + [full_text] + unique_values) + except sqlalchemy.exc.DatabaseError as err: + raise ValidationError({ + u'records': [_programming_error_summary(err)], + u'_records_row': num}) # validate that exactly one row has been updated if results.rowcount != 1: @@ -803,9 +819,14 @@ def upsert_data(context, data_dict): for part in unique_keys]), primary_value=u','.join(["%s"] * len(unique_keys)) ) - context['connection'].execute( - sql_string, - (used_values + [full_text] + unique_values) * 2) + try: + context['connection'].execute( + sql_string, + (used_values + [full_text] + unique_values) * 2) + except sqlalchemy.exc.DatabaseError as err: + raise ValidationError({ + u'records': [_programming_error_summary(err)], + u'_records_row': num}) def _get_unique_key(context, data_dict): @@ -967,10 +988,10 @@ def validate(context, data_dict): fields_types) # Remove default elements in data_dict - del data_dict_copy['connection_url'] del data_dict_copy['resource_id'] data_dict_copy.pop('id', None) data_dict_copy.pop('include_total', None) + data_dict_copy.pop('records_format', None) for key, values in data_dict_copy.iteritems(): if not values: @@ -1025,9 +1046,41 @@ def search_data(context, data_dict): else: sort_clause = '' - sql_string = u'''SELECT {distinct} {select} - FROM "{resource}" {ts_query} - {where} {sort} LIMIT {limit} OFFSET {offset}'''.format( + records_format = data_dict['records_format'] + if records_format == u'objects': + sql_fmt = u''' + SELECT array_to_json(array_agg(j))::text FROM ( + SELECT {distinct} {select} + FROM "{resource}" {ts_query} + {where} {sort} LIMIT {limit} OFFSET {offset} + ) AS j''' + elif records_format == u'lists': + select_columns = u" || ',' || ".join( + s for s in query_dict['select'] + ).replace('%', '%%') + sql_fmt = u''' + SELECT '[' || array_to_string(array_agg(j.v), ',') || ']' FROM ( + SELECT '[' || {select} || ']' v + FROM ( + SELECT {distinct} * FROM "{resource}" {ts_query} + {where} {sort} LIMIT {limit} OFFSET {offset}) as z + ) AS j''' + elif records_format == u'csv': + sql_fmt = u''' + COPY ( + SELECT {distinct} {select} + FROM "{resource}" {ts_query} + {where} {sort} LIMIT {limit} OFFSET {offset} + ) TO STDOUT csv DELIMITER ',' ''' + elif records_format == u'tsv': + sql_fmt = u''' + COPY ( + SELECT {distinct} {select} + FROM "{resource}" {ts_query} + {where} {sort} LIMIT {limit} OFFSET {offset} + ) TO STDOUT csv DELIMITER '\t' ''' + + sql_string = sql_fmt.format( distinct=distinct, select=select_columns, resource=resource_id, @@ -1037,11 +1090,32 @@ def search_data(context, data_dict): limit=limit, offset=offset) - results = _execute_single_statement(context, sql_string, where_values) + if records_format == u'csv' or records_format == u'tsv': + buf = StringIO() + _execute_single_statement_copy_to( + context, sql_string, where_values, buf) + records = buf.getvalue() + else: + v = list(_execute_single_statement( + context, sql_string, where_values))[0][0] + if v is None: + records = [] + else: + records = LazyJSONObject(v) + data_dict['records'] = records + + field_info = _get_field_info( + context['connection'], data_dict['resource_id']) + result_fields = [] + for field_id, field_type in fields_types.iteritems(): + f = {u'id': field_id, u'type': field_type} + if field_id in field_info: + f['info'] = field_info[f['id']] + result_fields.append(f) + data_dict['fields'] = result_fields + _unrename_json_field(data_dict) _insert_links(data_dict, limit, offset) - r = format_results(context, results, data_dict, _get_field_info( - context['connection'], data_dict['resource_id'])) if data_dict.get('include_total', True): count_sql_string = u'''SELECT {distinct} count(*) @@ -1054,7 +1128,7 @@ def search_data(context, data_dict): context, count_sql_string, where_values) data_dict['total'] = count_result.fetchall()[0][0] - return r + return data_dict def _execute_single_statement(context, sql_string, where_values): @@ -1068,16 +1142,24 @@ def _execute_single_statement(context, sql_string, where_values): return results -def format_results(context, results, data_dict, field_info=None): +def _execute_single_statement_copy_to(context, sql_string, where_values, buf): + if not datastore_helpers.is_single_statement(sql_string): + raise ValidationError({ + 'query': ['Query is not a single statement.'] + }) + + cursor = context['connection'].connection.cursor() + cursor.copy_expert(cursor.mogrify(sql_string, where_values), buf) + cursor.close() + + +def format_results(context, results, data_dict): result_fields = [] for field in results.cursor.description: - f = { + result_fields.append({ 'id': field[0].decode('utf-8'), 'type': _get_type(context, field[1]) - } - if field_info and f['id'] in field_info: - f['info'] = field_info[f['id']] - result_fields.append(f) + }) records = [] for row in results: @@ -1116,7 +1198,7 @@ def create(context, data_dict): :raises InvalidDataError: if there is an invalid value in the given data ''' - engine = _get_engine(data_dict) + engine = get_write_engine() context['connection'] = engine.connect() timeout = context.get('query_timeout', _TIMEOUT) _cache_types(context) @@ -1136,6 +1218,11 @@ def create(context, data_dict): create_table(context, data_dict) else: alter_table(context, data_dict) + if 'triggers' in data_dict: + _create_triggers( + context['connection'], + data_dict['resource_id'], + data_dict['triggers']) insert_data(context, data_dict) create_indexes(context, data_dict) create_alias(context, data_dict) @@ -1173,6 +1260,37 @@ def create(context, data_dict): context['connection'].close() +def _create_triggers(connection, resource_id, triggers): + u''' + Delete existing triggers on table then create triggers + + Currently our schema requires "before insert or update" + triggers run on each row, so we're not reading "when" + or "for_each" parameters from triggers list. + ''' + existing = connection.execute( + u'SELECT tgname FROM pg_trigger WHERE tgrelid = %s::regclass', + resource_id) + sql_list = ( + [u'DROP TRIGGER {name} ON {table}'.format( + name=datastore_helpers.identifier(r[0]), + table=datastore_helpers.identifier(resource_id)) + for r in existing] + + [u'''CREATE TRIGGER {name} + BEFORE INSERT OR UPDATE ON {table} + FOR EACH ROW EXECUTE PROCEDURE {function}()'''.format( + # 1000 triggers per table should be plenty + name=datastore_helpers.identifier(u't%03d' % i), + table=datastore_helpers.identifier(resource_id), + function=datastore_helpers.identifier(t['function'])) + for i, t in enumerate(triggers)]) + try: + if sql_list: + connection.execute(u';\n'.join(sql_list)) + except ProgrammingError as pe: + raise ValidationError({u'triggers': [_programming_error_summary(pe)]}) + + def upsert(context, data_dict): ''' This method combines upsert insert and update on the datastore. The method @@ -1181,7 +1299,7 @@ def upsert(context, data_dict): Any error results in total failure! For now pass back the actual error. Should be transactional. ''' - engine = _get_engine(data_dict) + engine = get_write_engine() context['connection'] = engine.connect() timeout = context.get('query_timeout', _TIMEOUT) @@ -1224,7 +1342,7 @@ def upsert(context, data_dict): def delete(context, data_dict): - engine = _get_engine(data_dict) + engine = get_write_engine() context['connection'] = engine.connect() _cache_types(context) @@ -1248,7 +1366,7 @@ def delete(context, data_dict): def search(context, data_dict): - engine = _get_engine(data_dict) + engine = get_read_engine() context['connection'] = engine.connect() timeout = context.get('query_timeout', _TIMEOUT) _cache_types(context) @@ -1275,7 +1393,7 @@ def search(context, data_dict): def search_sql(context, data_dict): - engine = _get_engine(data_dict) + engine = get_read_engine() context['connection'] = engine.connect() timeout = context.get('query_timeout', _TIMEOUT) _cache_types(context) @@ -1367,7 +1485,7 @@ def _change_privilege(context, data_dict, what): def make_private(context, data_dict): log.info('Making resource {resource_id!r} private'.format(**data_dict)) - engine = _get_engine(data_dict) + engine = get_write_engine() context['connection'] = engine.connect() trans = context['connection'].begin() try: @@ -1379,7 +1497,7 @@ def make_private(context, data_dict): def make_public(context, data_dict): log.info('Making resource {resource_id!r} public'.format(**data_dict)) - engine = _get_engine(data_dict) + engine = get_write_engine() context['connection'] = engine.connect() trans = context['connection'].begin() try: @@ -1390,12 +1508,70 @@ def make_public(context, data_dict): def get_all_resources_ids_in_datastore(): - read_url = config.get('ckan.datastore.read_url') - write_url = config.get('ckan.datastore.write_url') - data_dict = { - 'connection_url': read_url or write_url - } resources_sql = sqlalchemy.text(u'''SELECT name FROM "_table_metadata" WHERE alias_of IS NULL''') - query = _get_engine(data_dict).execute(resources_sql) + query = get_read_engine().execute(resources_sql) return [q[0] for q in query.fetchall()] + + +def create_function(name, arguments, rettype, definition, or_replace): + sql = u''' + CREATE {or_replace} FUNCTION + {name}({args}) RETURNS {rettype} AS {definition} + LANGUAGE plpgsql;'''.format( + or_replace=u'OR REPLACE' if or_replace else u'', + name=datastore_helpers.identifier(name), + args=u', '.join( + u'{argname} {argtype}'.format( + argname=datastore_helpers.identifier(a['argname']), + argtype=datastore_helpers.identifier(a['argtype'])) + for a in arguments), + rettype=datastore_helpers.identifier(rettype), + definition=datastore_helpers.literal_string(definition)) + + try: + _write_engine_execute(sql) + except ProgrammingError as pe: + key = ( + u'name' if pe.args[0].startswith('(ProgrammingError) function') + else u'definition') + raise ValidationError({key: [_programming_error_summary(pe)]}) + + +def drop_function(name, if_exists): + sql = u''' + DROP FUNCTION {if_exists} {name}(); + '''.format( + if_exists=u'IF EXISTS' if if_exists else u'', + name=datastore_helpers.identifier(name)) + + try: + _write_engine_execute(sql) + except ProgrammingError as pe: + raise ValidationError({u'name': [_programming_error_summary(pe)]}) + + +def _write_engine_execute(sql): + connection = get_write_engine().connect() + # No special meaning for '%' in sql parameter: + connection = connection.execution_options(no_parameters=True) + trans = connection.begin() + try: + connection.execute(sql) + trans.commit() + except Exception: + trans.rollback() + raise + finally: + connection.close() + + +def _programming_error_summary(pe): + u''' + return the text description of a sqlalchemy DatabaseError + without the actual SQL included, for raising as a + ValidationError to send back to API users + ''' + # first line only, after the '(ProgrammingError)' text + message = pe.args[0].split('\n')[0].decode('utf8') + return message.split(u') ', 1)[-1] diff --git a/ckanext/datastore/logic/action.py b/ckanext/datastore/logic/action.py index 27e74599668..6b1b59e828a 100644 --- a/ckanext/datastore/logic/action.py +++ b/ckanext/datastore/logic/action.py @@ -55,6 +55,13 @@ def datastore_create(context, data_dict): :type primary_key: list or comma separated string :param indexes: indexes on table (optional) :type indexes: list or comma separated string + :param triggers: trigger functions to apply to this table on update/insert. + functions may be created with + :meth:`~ckanext.datastore.logic.action.datastore_function_create`. + eg: [ + {"function": "trigger_clean_reference"}, + {"function": "trigger_check_codes"}] + :type triggers: list of dictionaries Please note that setting the ``aliases``, ``indexes`` or ``primary_key`` replaces the exising aliases or constraints. Setting ``records`` appends the provided records to the resource. @@ -122,8 +129,6 @@ def datastore_create(context, data_dict): resource_id = data_dict['resource_id'] _check_read_only(context, resource_id) - data_dict['connection_url'] = config['ckan.datastore.write_url'] - # validate aliases aliases = datastore_helpers.get_list(data_dict.get('aliases', [])) for alias in aliases: @@ -153,10 +158,40 @@ def datastore_create(context, data_dict): result.pop('id', None) result.pop('private', None) - result.pop('connection_url') return result +def datastore_run_triggers(context, data_dict): + ''' update each record with trigger + + The datastore_run_triggers API action allows you to re-apply exisitng triggers to + an existing DataStore resource. + + :param resource_id: resource id that the data is going to be stored under. + :type resource_id: string + + **Results:** + + :returns: The rowcount in the table. + :rtype: int + + ''' + res_id = data_dict['resource_id'] + p.toolkit.check_access('datastore_trigger_each_row', context, data_dict) + + connection = db.get_write_engine().connect() + + sql = sqlalchemy.text(u'''update {0} set _id=_id '''.format( + datastore_helpers.identifier(res_id))) + try: + results = connection.execute(sql) + except sqlalchemy.exc.DatabaseError as err: + message = err.args[0].split('\n')[0].decode('utf8') + raise p.toolkit.ValidationError({ + u'records': [message.split(u') ', 1)[-1]]}) + return results.rowcount + + def datastore_upsert(context, data_dict): '''Updates or inserts into a table in the DataStore @@ -207,12 +242,10 @@ def datastore_upsert(context, data_dict): resource_id = data_dict['resource_id'] _check_read_only(context, resource_id) - data_dict['connection_url'] = config['ckan.datastore.write_url'] - res_id = data_dict['resource_id'] resources_sql = sqlalchemy.text(u'''SELECT 1 FROM "_table_metadata" WHERE name = :id AND alias_of IS NULL''') - results = db._get_engine(data_dict).execute(resources_sql, id=res_id) + results = db.get_write_engine().execute(resources_sql, id=res_id) res_exists = results.rowcount > 0 if not res_exists: @@ -222,7 +255,6 @@ def datastore_upsert(context, data_dict): result = db.upsert(context, data_dict) result.pop('id', None) - result.pop('connection_url') return result @@ -249,11 +281,9 @@ def _type_lookup(t): resource_id = _get_or_bust(data_dict, 'id') resource = p.toolkit.get_action('resource_show')(context, {'id':resource_id}) - data_dict['connection_url'] = config['ckan.datastore.read_url'] - resources_sql = sqlalchemy.text(u'''SELECT 1 FROM "_table_metadata" WHERE name = :id AND alias_of IS NULL''') - results = db._get_engine(data_dict).execute(resources_sql, id=resource_id) + results = db.get_read_engine().execute(resources_sql, id=resource_id) res_exists = results.rowcount > 0 if not res_exists: raise p.toolkit.ObjectNotFound(p.toolkit._( @@ -269,7 +299,7 @@ def _type_lookup(t): SELECT column_name, data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE table_name = :resource_id; ''') - schema_results = db._get_engine(data_dict).execute(schema_sql, resource_id=resource_id) + schema_results = db.get_read_engine().execute(schema_sql, resource_id=resource_id) for row in schema_results.fetchall(): k = row[0] v = row[1] @@ -282,7 +312,7 @@ def _type_lookup(t): meta_sql = sqlalchemy.text(u''' SELECT count(_id) FROM "{0}"; '''.format(resource_id)) - meta_results = db._get_engine(data_dict).execute(meta_sql, resource_id=resource_id) + meta_results = db.get_read_engine().execute(meta_sql, resource_id=resource_id) info['meta']['count'] = meta_results.fetchone()[0] finally: if schema_results: @@ -334,12 +364,10 @@ def datastore_delete(context, data_dict): resource_id = data_dict['resource_id'] _check_read_only(context, resource_id) - data_dict['connection_url'] = config['ckan.datastore.write_url'] - res_id = data_dict['resource_id'] resources_sql = sqlalchemy.text(u'''SELECT 1 FROM "_table_metadata" WHERE name = :id AND alias_of IS NULL''') - results = db._get_engine(data_dict).execute(resources_sql, id=res_id) + results = db.get_read_engine().execute(resources_sql, id=res_id) res_exists = results.rowcount > 0 if not res_exists: @@ -361,7 +389,6 @@ def datastore_delete(context, data_dict): set_datastore_active_flag(model, data_dict, False) result.pop('id', None) - result.pop('connection_url') return result @@ -397,6 +424,16 @@ def datastore_search(context, data_dict): :param sort: comma separated field names with ordering e.g.: "fieldname1, fieldname2 desc" :type sort: string + :param include_total: True to return total matching record count + (optional, default: true) + :type include_total: bool + :param records_format: the format for the records return value: + 'objects' (default) list of {fieldname1: value1, ...} dicts, + 'lists' list of [value1, value2, ...] lists, + 'csv' string containing comma-separated values with no header, + 'tsv' string containing tab-separated values with no header + :type records_format: controlled list + Setting the ``plain`` flag to false enables the entire PostgreSQL `full text search query language`_. @@ -422,7 +459,7 @@ def datastore_search(context, data_dict): :param total: number of total matching records :type total: int :param records: list of matching results - :type records: list of dictionaries + :type records: depends on records_format value passed ''' schema = context.get('schema', dsschema.datastore_search_schema()) @@ -431,11 +468,13 @@ def datastore_search(context, data_dict): raise p.toolkit.ValidationError(errors) res_id = data_dict['resource_id'] - data_dict['connection_url'] = config['ckan.datastore.write_url'] resources_sql = sqlalchemy.text(u'''SELECT alias_of FROM "_table_metadata" WHERE name = :id''') - results = db._get_engine(data_dict).execute(resources_sql, id=res_id) + # XXX: write connection because of private tables, we + # should be able to make this read once we stop using pg + # permissions enforcement + results = db.get_write_engine().execute(resources_sql, id=res_id) # Resource only has to exist in the datastore (because it could be an alias) if not results.rowcount > 0: @@ -453,7 +492,6 @@ def datastore_search(context, data_dict): result = db.search(context, data_dict) result.pop('id', None) - result.pop('connection_url') return result @@ -495,11 +533,8 @@ def datastore_search_sql(context, data_dict): p.toolkit.check_access('datastore_search_sql', context, data_dict) - data_dict['connection_url'] = config['ckan.datastore.read_url'] - result = db.search_sql(context, data_dict) result.pop('id', None) - result.pop('connection_url') return result @@ -518,8 +553,6 @@ def datastore_make_private(context, data_dict): data_dict['resource_id'] = data_dict['id'] res_id = _get_or_bust(data_dict, 'resource_id') - data_dict['connection_url'] = config['ckan.datastore.write_url'] - if not _resource_exists(context, data_dict): raise p.toolkit.ObjectNotFound(p.toolkit._( u'Resource "{0}" was not found.'.format(res_id) @@ -544,8 +577,6 @@ def datastore_make_public(context, data_dict): data_dict['resource_id'] = data_dict['id'] res_id = _get_or_bust(data_dict, 'resource_id') - data_dict['connection_url'] = config['ckan.datastore.write_url'] - if not _resource_exists(context, data_dict): raise p.toolkit.ObjectNotFound(p.toolkit._( u'Resource "{0}" was not found.'.format(res_id) @@ -615,7 +646,7 @@ def _resource_exists(context, data_dict): resources_sql = sqlalchemy.text(u'''SELECT 1 FROM "_table_metadata" WHERE name = :id AND alias_of IS NULL''') - results = db._get_engine(data_dict).execute(resources_sql, id=res_id) + results = db.get_read_engine().execute(resources_sql, id=res_id) return results.rowcount > 0 @@ -630,3 +661,42 @@ def _check_read_only(context, resource_id): 'read-only': ['Cannot edit read-only resource. Either pass' '"force=True" or change url-type to "datastore"'] }) + + +@logic.validate(dsschema.datastore_function_create_schema) +def datastore_function_create(context, data_dict): + u''' + Create a trigger function for use with datastore_create + + :param name: function name + :type name: string + :param or_replace: True to replace if function already exists + (default: False) + :type or_replace: bool + :param rettype: set to 'trigger' + (only trigger functions may be created at this time) + :type rettype: string + :param definition: PL/pgSQL function body for trigger function + :type definition: string + ''' + p.toolkit.check_access('datastore_function_create', context, data_dict) + + db.create_function( + name=data_dict['name'], + arguments=data_dict.get('arguments', []), + rettype=data_dict['rettype'], + definition=data_dict['definition'], + or_replace=data_dict['or_replace']) + + +@logic.validate(dsschema.datastore_function_delete_schema) +def datastore_function_delete(context, data_dict): + u''' + Delete a trigger function + + :param name: function name + :type name: string + ''' + p.toolkit.check_access('datastore_function_delete', context, data_dict) + + db.drop_function(data_dict['name'], data_dict['if_exists']) diff --git a/ckanext/datastore/logic/auth.py b/ckanext/datastore/logic/auth.py index fc54f82bd5e..0c9de0c5201 100644 --- a/ckanext/datastore/logic/auth.py +++ b/ckanext/datastore/logic/auth.py @@ -58,3 +58,17 @@ def datastore_search_sql(context, data_dict): def datastore_change_permissions(context, data_dict): return datastore_auth(context, data_dict) + + +def datastore_function_create(context, data_dict): + '''sysadmin-only: functions can be used to skip access checks''' + return {'success': False} + + +def datastore_function_delete(context, data_dict): + return {'success': False} + + +def datastore_run_triggers(context, data_dict): + '''sysadmin-only: functions can be used to skip access checks''' + return {'success': False} diff --git a/ckanext/datastore/logic/schema.py b/ckanext/datastore/logic/schema.py index d2eecc4caf9..903e016d71c 100644 --- a/ckanext/datastore/logic/schema.py +++ b/ckanext/datastore/logic/schema.py @@ -16,6 +16,7 @@ boolean_validator = get_validator('boolean_validator') int_validator = get_validator('int_validator') OneOf = get_validator('OneOf') +unicode_only = get_validator('unicode_only') default = get_validator('default') @@ -107,6 +108,17 @@ def datastore_create_schema(): }, 'primary_key': [ignore_missing, list_of_strings_or_string], 'indexes': [ignore_missing, list_of_strings_or_string], + 'triggers': { + 'when': [ + default(u'before insert or update'), + unicode_only, + OneOf([u'before insert or update'])], + 'for_each': [ + default(u'row'), + unicode_only, + OneOf([u'row'])], + 'function': [not_empty, unicode_only], + }, '__junk': [empty], '__before': [rename('id', 'resource_id')] } @@ -151,7 +163,31 @@ def datastore_search_schema(): 'sort': [ignore_missing, list_of_strings_or_string], 'distinct': [ignore_missing, boolean_validator], 'include_total': [default(True), boolean_validator], + 'records_format': [ + default(u'objects'), + OneOf([u'objects', u'lists', u'csv', u'tsv'])], '__junk': [empty], '__before': [rename('id', 'resource_id')] } return schema + + +def datastore_function_create_schema(): + return { + 'name': [unicode_only, not_empty], + 'or_replace': [default(False), boolean_validator], + # we're only exposing functions for triggers at the moment + 'arguments': { + 'argname': [unicode_only, not_empty], + 'argtype': [unicode_only, not_empty], + }, + 'rettype': [default(u'void'), unicode_only], + 'definition': [unicode_only], + } + + +def datastore_function_delete_schema(): + return { + 'name': [unicode_only, not_empty], + 'if_exists': [default(False), boolean_validator], + } diff --git a/ckanext/datastore/plugin.py b/ckanext/datastore/plugin.py index dc35bd96f28..e91712b1689 100644 --- a/ckanext/datastore/plugin.py +++ b/ckanext/datastore/plugin.py @@ -34,9 +34,7 @@ def _is_legacy_mode(config): Returns True if `ckan.datastore.read_url` is not set in the provided config object or CKAN is running on Postgres < 9.x ''' - write_url = config.get('ckan.datastore.write_url') - - engine = db._get_engine({'connection_url': write_url}) + engine = db.get_write_engine() connection = engine.connect() return (not config.get('ckan.datastore.read_url') or @@ -112,8 +110,7 @@ def configure(self, config): else: self.read_url = self.config['ckan.datastore.read_url'] - self.read_engine = db._get_engine( - {'connection_url': self.read_url}) + self.read_engine = db.get_read_engine() if not model.engine_is_pg(self.read_engine): log.warn('We detected that you do not use a PostgreSQL ' 'database. The DataStore will NOT work and DataStore ' @@ -140,7 +137,6 @@ def notify(self, entity, operation=None): for resource in entity.resources: try: func(context, { - 'connection_url': self.write_url, 'resource_id': resource.id}) except p.toolkit.ObjectNotFound: pass @@ -174,7 +170,7 @@ def _is_read_only_database(self): ''' Returns True if no connection has CREATE privileges on the public schema. This is the case if replication is enabled.''' for url in [self.ckan_url, self.write_url, self.read_url]: - connection = db._get_engine({'connection_url': url}).connect() + connection = db._get_engine_from_url(url).connect() try: sql = u"SELECT has_schema_privilege('public', 'CREATE')" is_writable = connection.execute(sql).first()[0] @@ -200,8 +196,7 @@ def _read_connection_has_correct_privileges(self): only user. A table is created by the write user to test the read only user. ''' - write_connection = db._get_engine( - {'connection_url': self.write_url}).connect() + write_connection = db.get_write_engine().connect() read_connection_user = sa_url.make_url(self.read_url).username drop_foo_sql = u'DROP TABLE IF EXISTS _foo' @@ -222,12 +217,16 @@ def _read_connection_has_correct_privileges(self): return True def get_actions(self): - actions = {'datastore_create': action.datastore_create, - 'datastore_upsert': action.datastore_upsert, - 'datastore_delete': action.datastore_delete, - 'datastore_search': action.datastore_search, - 'datastore_info': action.datastore_info, - } + actions = { + 'datastore_create': action.datastore_create, + 'datastore_upsert': action.datastore_upsert, + 'datastore_delete': action.datastore_delete, + 'datastore_search': action.datastore_search, + 'datastore_info': action.datastore_info, + 'datastore_function_create': action.datastore_function_create, + 'datastore_function_delete': action.datastore_function_delete, + 'datastore_run_triggers': action.datastore_run_triggers, + } if not self.legacy_mode: if self.enable_sql_search: # Only enable search_sql if the config does not disable it @@ -239,13 +238,18 @@ def get_actions(self): return actions def get_auth_functions(self): - return {'datastore_create': auth.datastore_create, - 'datastore_upsert': auth.datastore_upsert, - 'datastore_delete': auth.datastore_delete, - 'datastore_info': auth.datastore_info, - 'datastore_search': auth.datastore_search, - 'datastore_search_sql': auth.datastore_search_sql, - 'datastore_change_permissions': auth.datastore_change_permissions} + return { + 'datastore_create': auth.datastore_create, + 'datastore_upsert': auth.datastore_upsert, + 'datastore_delete': auth.datastore_delete, + 'datastore_info': auth.datastore_info, + 'datastore_search': auth.datastore_search, + 'datastore_search_sql': auth.datastore_search_sql, + 'datastore_change_permissions': auth.datastore_change_permissions, + 'datastore_function_create': auth.datastore_function_create, + 'datastore_function_delete': auth.datastore_function_delete, + 'datastore_run_triggers': auth.datastore_run_triggers, + } def before_map(self, m): m.connect( @@ -388,8 +392,24 @@ def datastore_search(self, context, data_dict, fields_types, query_dict): sort = self._sort(data_dict, fields_types) where = self._where(data_dict, fields_types) - select_cols = [ - datastore_helpers.identifier(field_id) for field_id in field_ids] + select_cols = [] + records_format = data_dict.get(u'records_format') + json_values = records_format in (u'objects', u'lists') + for field_id in field_ids: + fmt = u'to_json({0})' if records_format == u'lists' else u'{0}' + typ = fields_types.get(field_id) + if typ == u'nested': + fmt = u'({0}).json' + elif typ == u'timestamp': + fmt = u"to_char({0}, 'YYYY-MM-DD\"T\"HH24:MI:SS')" + if json_values: + fmt = u"to_json({0})".format(fmt) + elif typ.startswith(u'_') or typ.endswith(u'[]'): + fmt = u'array_to_json({0})' + if records_format == u'objects': + fmt += u' as {0}' + select_cols.append(fmt.format( + datastore_helpers.identifier(field_id))) if rank_column: select_cols.append(rank_column) diff --git a/ckanext/datastore/tests/helpers.py b/ckanext/datastore/tests/helpers.py index 7e707a0e75e..277253b82a6 100644 --- a/ckanext/datastore/tests/helpers.py +++ b/ckanext/datastore/tests/helpers.py @@ -1,9 +1,13 @@ # encoding: utf-8 +from sqlalchemy import orm + import ckan.model as model import ckan.lib.cli as cli import ckan.plugins as p +from ckan.tests.helpers import FunctionalTestBase +from ckanext.datastore import db def extract(d, keys): @@ -17,6 +21,17 @@ def clear_db(Session): results = c.execute(drop_tables) for result in results: c.execute(result[0]) + + drop_functions_sql = u''' + SELECT 'drop function ' || quote_ident(proname) || '();' + FROM pg_proc + INNER JOIN pg_namespace ns ON (pg_proc.pronamespace = ns.oid) + WHERE ns.nspname = 'public' + ''' + drop_functions = u''.join(r[0] for r in c.execute(drop_functions_sql)) + if drop_functions: + c.execute(drop_functions) + Session.commit() Session.remove() @@ -42,3 +57,13 @@ def set_url_type(resources, user): context, {'id': resource.id}) resource['url_type'] = 'datastore' p.toolkit.get_action('resource_update')(context, resource) + + +class DatastoreFunctionalTestBase(FunctionalTestBase): + _load_plugins = (u'datastore', ) + + @classmethod + def setup_class(cls): + engine = db.get_write_engine() + rebuild_all_dbs(orm.scoped_session(orm.sessionmaker(bind=engine))) + super(DatastoreFunctionalTestBase, cls).setup_class() diff --git a/ckanext/datastore/tests/test_create.py b/ckanext/datastore/tests/test_create.py index a95bcf68ef8..ba8c94adf08 100644 --- a/ckanext/datastore/tests/test_create.py +++ b/ckanext/datastore/tests/test_create.py @@ -18,7 +18,9 @@ import ckan.tests.factories as factories import ckanext.datastore.db as db -from ckanext.datastore.tests.helpers import rebuild_all_dbs, set_url_type +from ckanext.datastore.tests.helpers import ( + rebuild_all_dbs, set_url_type, DatastoreFunctionalTestBase) +from ckan.plugins.toolkit import ValidationError class TestDatastoreCreateNewTests(object): @@ -193,8 +195,7 @@ def _get_index_names(self, resource_id): return [result[0] for result in results] def _execute_sql(self, sql, *args): - engine = db._get_engine( - {'connection_url': config['ckan.datastore.write_url']}) + engine = db.get_write_engine() session = orm.scoped_session(orm.sessionmaker(bind=engine)) return session.connection().execute(sql, *args) @@ -255,8 +256,7 @@ def setup_class(cls): ctd.CreateTestData.create() cls.sysadmin_user = model.User.get('testsysadmin') cls.normal_user = model.User.get('annafan') - engine = db._get_engine( - {'connection_url': config['ckan.datastore.write_url']}) + engine = db.get_write_engine() cls.Session = orm.scoped_session(orm.sessionmaker(bind=engine)) set_url_type( model.Package.get('annakarenina').resources, cls.sysadmin_user) @@ -907,3 +907,201 @@ def test_datastore_create_with_invalid_data_value(self): assert res_dict['success'] is False assert res_dict['error']['__type'] == 'Validation Error' assert res_dict['error']['message'].startswith('The data was invalid') + + +class TestDatastoreFunctionCreate(DatastoreFunctionalTestBase): + def test_nop_trigger(self): + helpers.call_action( + u'datastore_function_create', + name=u'test_nop', + rettype=u'trigger', + definition=u'BEGIN RETURN NEW; END;') + + def test_invalid_definition(self): + try: + helpers.call_action( + u'datastore_function_create', + name=u'test_invalid_def', + rettype=u'trigger', + definition=u'HELLO WORLD') + except ValidationError as ve: + assert_equal( + ve.error_dict, + {u'definition': + [u'syntax error at or near "HELLO"']}) + else: + assert 0, u'no validation error' + + def test_redefined_trigger(self): + helpers.call_action( + u'datastore_function_create', + name=u'test_redefined', + rettype=u'trigger', + definition=u'BEGIN RETURN NEW; END;') + try: + helpers.call_action( + u'datastore_function_create', + name=u'test_redefined', + rettype=u'trigger', + definition=u'BEGIN RETURN NEW; END;') + except ValidationError as ve: + assert_equal( + ve.error_dict, + {u'name':[ + u'function "test_redefined" already exists ' + u'with same argument types']}) + else: + assert 0, u'no validation error' + + def test_redefined_with_or_replace_trigger(self): + helpers.call_action( + u'datastore_function_create', + name=u'test_replaceme', + rettype=u'trigger', + definition=u'BEGIN RETURN NEW; END;') + helpers.call_action( + u'datastore_function_create', + name=u'test_replaceme', + or_replace=True, + rettype=u'trigger', + definition=u'BEGIN RETURN NEW; END;') + + +class TestDatastoreCreateTriggers(DatastoreFunctionalTestBase): + def test_create_with_missing_trigger(self): + ds = factories.Dataset() + + try: + helpers.call_action( + u'datastore_create', + resource={u'package_id': ds['id']}, + fields=[{u'id': u'spam', u'type': u'text'}], + records=[{u'spam': u'SPAM'}, {u'spam': u'EGGS'}], + triggers=[{u'function': u'no_such_trigger_function'}]) + except ValidationError as ve: + assert_equal( + ve.error_dict, + {u'triggers':[ + u'function no_such_trigger_function() does not exist']}) + else: + assert 0, u'no validation error' + + def test_create_trigger_applies_to_records(self): + ds = factories.Dataset() + + helpers.call_action( + u'datastore_function_create', + name=u'spamify_trigger', + rettype=u'trigger', + definition=u''' + BEGIN + NEW.spam := 'spam spam ' || NEW.spam || ' spam'; + RETURN NEW; + END;''') + res = helpers.call_action( + u'datastore_create', + resource={u'package_id': ds['id']}, + fields=[{u'id': u'spam', u'type': u'text'}], + records=[{u'spam': u'SPAM'}, {u'spam': u'EGGS'}], + triggers=[{u'function': u'spamify_trigger'}]) + assert_equal( + helpers.call_action( + u'datastore_search', + fields=[u'spam'], + resource_id=res['resource_id'])['records'], + [ + {u'spam': u'spam spam SPAM spam'}, + {u'spam': u'spam spam EGGS spam'}]) + + def test_upsert_trigger_applies_to_records(self): + ds = factories.Dataset() + + helpers.call_action( + u'datastore_function_create', + name=u'more_spam_trigger', + rettype=u'trigger', + definition=u''' + BEGIN + NEW.spam := 'spam spam ' || NEW.spam || ' spam'; + RETURN NEW; + END;''') + res = helpers.call_action( + u'datastore_create', + resource={u'package_id': ds['id']}, + fields=[{u'id': u'spam', u'type': u'text'}], + triggers=[{u'function': u'more_spam_trigger'}]) + helpers.call_action( + u'datastore_upsert', + method=u'insert', + resource_id=res['resource_id'], + records=[{u'spam': u'BEANS'}, {u'spam': u'SPAM'}]) + assert_equal( + helpers.call_action( + u'datastore_search', + fields=[u'spam'], + resource_id=res['resource_id'])['records'], + [ + {u'spam': u'spam spam BEANS spam'}, + {u'spam': u'spam spam SPAM spam'}]) + + def test_create_trigger_exception(self): + ds = factories.Dataset() + + helpers.call_action( + u'datastore_function_create', + name=u'spamexception_trigger', + rettype=u'trigger', + definition=u''' + BEGIN + IF NEW.spam != 'spam' THEN + RAISE EXCEPTION '"%"? Yeeeeccch!', NEW.spam; + END IF; + RETURN NEW; + END;''') + try: + res = helpers.call_action( + u'datastore_create', + resource={u'package_id': ds['id']}, + fields=[{u'id': u'spam', u'type': u'text'}], + records=[{u'spam': u'spam'}, {u'spam': u'EGGS'}], + triggers=[{u'function': u'spamexception_trigger'}]) + except ValidationError as ve: + assert_equal( + ve.error_dict, + {u'records':[ + u'"EGGS"? Yeeeeccch!']}) + else: + assert 0, u'no validation error' + + def test_upsert_trigger_exception(self): + ds = factories.Dataset() + + helpers.call_action( + u'datastore_function_create', + name=u'spamonly_trigger', + rettype=u'trigger', + definition=u''' + BEGIN + IF NEW.spam != 'spam' THEN + RAISE EXCEPTION '"%"? Yeeeeccch!', NEW.spam; + END IF; + RETURN NEW; + END;''') + res = helpers.call_action( + u'datastore_create', + resource={u'package_id': ds['id']}, + fields=[{u'id': u'spam', u'type': u'text'}], + triggers=[{u'function': u'spamonly_trigger'}]) + try: + helpers.call_action( + u'datastore_upsert', + method=u'insert', + resource_id=res['resource_id'], + records=[{u'spam': u'spam'}, {u'spam': u'BEANS'}]) + except ValidationError as ve: + assert_equal( + ve.error_dict, + {u'records':[ + u'"BEANS"? Yeeeeccch!']}) + else: + assert 0, u'no validation error' diff --git a/ckanext/datastore/tests/test_delete.py b/ckanext/datastore/tests/test_delete.py index f5c14fd4944..4c806808358 100644 --- a/ckanext/datastore/tests/test_delete.py +++ b/ckanext/datastore/tests/test_delete.py @@ -2,6 +2,7 @@ import json import nose +from nose.tools import assert_equal import sqlalchemy import sqlalchemy.orm as orm @@ -10,13 +11,16 @@ import ckan.lib.create_test_data as ctd import ckan.model as model import ckan.tests.legacy as tests +from ckan.tests import helpers from ckan.common import config +from ckan.plugins.toolkit import ValidationError import ckan.tests.factories as factories import ckan.tests.helpers as helpers from ckan.logic import NotFound import ckanext.datastore.db as db -from ckanext.datastore.tests.helpers import rebuild_all_dbs, set_url_type +from ckanext.datastore.tests.helpers import ( + rebuild_all_dbs, set_url_type, DatastoreFunctionalTestBase) assert_raises = nose.tools.assert_raises @@ -47,8 +51,7 @@ def setup_class(cls): 'rating with %': '42%'}] } - engine = db._get_engine( - {'connection_url': config['ckan.datastore.write_url']}) + engine = db.get_write_engine() cls.Session = orm.scoped_session(orm.sessionmaker(bind=engine)) set_url_type( model.Package.get('annakarenina').resources, cls.sysadmin_user) @@ -259,3 +262,33 @@ def test_delete_with_blank_filters(self): assert(len(results['result']['records']) == 0) self._delete() + + +class TestDatastoreFunctionDelete(DatastoreFunctionalTestBase): + def test_create_delete(self): + helpers.call_action( + u'datastore_function_create', + name=u'test_nop', + rettype=u'trigger', + definition=u'BEGIN RETURN NEW; END;') + helpers.call_action( + u'datastore_function_delete', + name=u'test_nop') + + def test_delete_nonexistant(self): + try: + helpers.call_action( + u'datastore_function_delete', + name=u'test_not_there') + except ValidationError as ve: + assert_equal( + ve.error_dict, + {u'name': [u'function test_not_there() does not exist']}) + else: + assert 0, u'no validation error' + + def test_delete_if_exitst(self): + helpers.call_action( + u'datastore_function_delete', + name=u'test_not_there_either', + if_exists=True) diff --git a/ckanext/datastore/tests/test_dump.py b/ckanext/datastore/tests/test_dump.py index c460db873de..ad64bc47668 100644 --- a/ckanext/datastore/tests/test_dump.py +++ b/ckanext/datastore/tests/test_dump.py @@ -90,8 +90,7 @@ def setup_class(cls): res_dict = json.loads(res.body) assert res_dict['success'] is True - engine = db._get_engine({ - 'connection_url': config['ckan.datastore.write_url']}) + engine = db.get_write_engine() cls.Session = orm.scoped_session(orm.sessionmaker(bind=engine)) @classmethod @@ -109,7 +108,7 @@ def test_dump_basic(self): u',characters,random_letters,nested') assert_equals(content[:len(expected)], expected) assert_in('warandpeace', content) - assert_in('"[""Princess Anna"", ""Sergius""]"', content) + assert_in('"[""Princess Anna"",""Sergius""]"', content) # get with alias instead of id res = self.app.get('/datastore/dump/{0}'.format(str( @@ -129,9 +128,9 @@ def test_dump_limit(self): expected_content = ( u'_id,b\xfck,author,published,characters,random_letters,' u'nested\r\n1,annakarenina,tolstoy,2005-03-01T00:00:00,' - u'"[""Princess Anna"", ""Sergius""]",' - u'"[""a"", ""e"", ""x""]","[""b"", ' - u'{""moo"": ""moo""}]"\r\n') + u'"[""Princess Anna"",""Sergius""]",' + u'"[""a"",""e"",""x""]","[""b"", ' + u'{""moo"": ""moo""}]"\n') assert_equals(content, expected_content) def test_dump_tsv(self): @@ -143,9 +142,9 @@ def test_dump_tsv(self): expected_content = ( u'_id\tb\xfck\tauthor\tpublished\tcharacters\trandom_letters\t' u'nested\r\n1\tannakarenina\ttolstoy\t2005-03-01T00:00:00\t' - u'"[""Princess Anna"", ""Sergius""]"\t' - u'"[""a"", ""e"", ""x""]"\t"[""b"", ' - u'{""moo"": ""moo""}]"\r\n') + u'"[""Princess Anna"",""Sergius""]"\t' + u'"[""a"",""e"",""x""]"\t"[""b"", ' + u'{""moo"": ""moo""}]"\n') assert_equals(content, expected_content) def test_dump_json(self): @@ -154,7 +153,7 @@ def test_dump_json(self): str(self.data['resource_id'])), extra_environ=auth) content = res.body.decode('utf-8') expected_content = ( - u'{\n "fields": [{"type":"int4","id":"_id"},{"type":"text",' + u'{\n "fields": [{"type":"int","id":"_id"},{"type":"text",' u'"id":"b\xfck"},{"type":"text","id":"author"},{"type":"timestamp"' u',"id":"published"},{"type":"_text","id":"characters"},' u'{"type":"_text","id":"random_letters"},{"type":"json",' diff --git a/ckanext/datastore/tests/test_helpers.py b/ckanext/datastore/tests/test_helpers.py index 9cc05c3dded..eed67000c20 100644 --- a/ckanext/datastore/tests/test_helpers.py +++ b/ckanext/datastore/tests/test_helpers.py @@ -70,9 +70,7 @@ def setup_class(cls): if not config.get('ckan.datastore.read_url'): raise nose.SkipTest('Datastore runs on legacy mode, skipping...') - engine = db._get_engine( - {'connection_url': config['ckan.datastore.write_url']} - ) + engine = db.get_write_engine() cls.Session = orm.scoped_session(orm.sessionmaker(bind=engine)) datastore_test_helpers.clear_db(cls.Session) diff --git a/ckanext/datastore/tests/test_search.py b/ckanext/datastore/tests/test_search.py index c472b52bdc3..46eac61a288 100644 --- a/ckanext/datastore/tests/test_search.py +++ b/ckanext/datastore/tests/test_search.py @@ -20,6 +20,7 @@ assert_equals = nose.tools.assert_equals assert_raises = nose.tools.assert_raises +assert_in = nose.tools.assert_in class TestDatastoreSearchNewTest(object): @@ -165,9 +166,7 @@ def setup_class(cls): u'characters': None, u'rating with %': u'99%'}] - engine = db._get_engine( - {'connection_url': config['ckan.datastore.write_url']} - ) + engine = db.get_write_engine() cls.Session = orm.scoped_session(orm.sessionmaker(bind=engine)) @classmethod @@ -512,14 +511,13 @@ def test_search_full_text(self): ) for record in result['records']] assert results == self.expected_records, result['records'] - expected_fields = [{u'type': u'int4', u'id': u'_id'}, + expected_fields = [{u'type': u'int', u'id': u'_id'}, {u'type': u'text', u'id': u'b\xfck'}, {u'type': u'text', u'id': u'author'}, {u'type': u'timestamp', u'id': u'published'}, - {u'type': u'json', u'id': u'nested'}, - {u'type': u'float4', u'id': u'rank'}] + {u'type': u'json', u'id': u'nested'}] for field in expected_fields: - assert field in result['fields'], field + assert_in(field, result['fields']) # test multiple word queries (connected with and) data = {'resource_id': self.data['resource_id'], @@ -838,8 +836,7 @@ def setup_class(cls): u'published': None}] cls.expected_join_results = [{u'first': 1, u'second': 1}, {u'first': 1, u'second': 2}] - engine = db._get_engine( - {'connection_url': config['ckan.datastore.write_url']}) + engine = db.get_write_engine() cls.Session = orm.scoped_session(orm.sessionmaker(bind=engine)) @classmethod @@ -941,8 +938,7 @@ def test_read_private(self): 'user': self.sysadmin_user.name, 'model': model} data_dict = { - 'resource_id': self.data['resource_id'], - 'connection_url': config['ckan.datastore.write_url']} + 'resource_id': self.data['resource_id']} p.toolkit.get_action('datastore_make_private')(context, data_dict) query = 'SELECT * FROM "{0}"'.format(self.data['resource_id']) data = {'sql': query} diff --git a/ckanext/datastore/tests/test_unit.py b/ckanext/datastore/tests/test_unit.py index c02d30c60d6..2a74e3386cd 100644 --- a/ckanext/datastore/tests/test_unit.py +++ b/ckanext/datastore/tests/test_unit.py @@ -35,8 +35,7 @@ def test_is_valid_table_name(self): def test_pg_version_check(self): if not tests.is_datastore_supported(): raise nose.SkipTest("Datastore not supported") - engine = db._get_engine( - {'connection_url': config['sqlalchemy.url']}) + engine = db._get_engine_from_url(config['sqlalchemy.url']) connection = engine.connect() assert db._pg_version_is_at_least(connection, '8.0') assert not db._pg_version_is_at_least(connection, '10.0') diff --git a/ckanext/datastore/tests/test_upsert.py b/ckanext/datastore/tests/test_upsert.py index b7a09bf7dae..e798bec38dc 100644 --- a/ckanext/datastore/tests/test_upsert.py +++ b/ckanext/datastore/tests/test_upsert.py @@ -113,8 +113,7 @@ def setup_class(cls): res_dict = json.loads(res.body) assert res_dict['success'] is True - engine = db._get_engine( - {'connection_url': config['ckan.datastore.write_url']}) + engine = db.get_write_engine() cls.Session = orm.scoped_session(orm.sessionmaker(bind=engine)) @classmethod @@ -365,8 +364,7 @@ def setup_class(cls): res_dict = json.loads(res.body) assert res_dict['success'] is True - engine = db._get_engine( - {'connection_url': config['ckan.datastore.write_url']}) + engine = db.get_write_engine() cls.Session = orm.scoped_session(orm.sessionmaker(bind=engine)) @classmethod @@ -472,8 +470,7 @@ def setup_class(cls): res_dict = json.loads(res.body) assert res_dict['success'] is True - engine = db._get_engine( - {'connection_url': config['ckan.datastore.write_url']}) + engine = db.get_write_engine() cls.Session = orm.scoped_session(orm.sessionmaker(bind=engine)) @classmethod diff --git a/ckanext/datastore/writer.py b/ckanext/datastore/writer.py index 5cf75e4efeb..47907978d0a 100644 --- a/ckanext/datastore/writer.py +++ b/ckanext/datastore/writer.py @@ -2,20 +2,12 @@ from contextlib import contextmanager from email.utils import encode_rfc2231 -import json +from simplejson import dumps from xml.etree.cElementTree import Element, SubElement, ElementTree import unicodecsv -UTF8_BOM = u'\uFEFF'.encode(u'utf-8') - - -def _json_dump_nested(value): - is_nested = isinstance(value, (list, dict)) - - if is_nested: - return json.dumps(value) - return value +from codecs import BOM_UTF8 @contextmanager @@ -27,10 +19,6 @@ def csv_writer(response, fields, name=None, bom=False): :param fields: list of datastore fields :param name: file name (for headers, response-like objects only) :param bom: True to include a UTF-8 BOM at the start of the file - - >>> with csv_writer(response, fields) as d: - >>> d.writerow(row1) - >>> d.writerow(row2) ''' if hasattr(response, u'headers'): @@ -39,11 +27,12 @@ def csv_writer(response, fields, name=None, bom=False): response.headers['Content-disposition'] = ( b'attachment; filename="{name}.csv"'.format( name=encode_rfc2231(name))) - wr = CSVWriter(response, fields, encoding=u'utf-8') if bom: - response.write(UTF8_BOM) - wr.writerow(f['id'] for f in fields) - yield wr + response.write(BOM_UTF8) + + unicodecsv.writer(response, encoding=u'utf-8').writerow( + f['id'] for f in fields) + yield TextWriter(response) @contextmanager @@ -55,10 +44,6 @@ def tsv_writer(response, fields, name=None, bom=False): :param fields: list of datastore fields :param name: file name (for headers, response-like objects only) :param bom: True to include a UTF-8 BOM at the start of the file - - >>> with tsv_writer(response, fields) as d: - >>> d.writerow(row1) - >>> d.writerow(row2) ''' if hasattr(response, u'headers'): @@ -68,23 +53,22 @@ def tsv_writer(response, fields, name=None, bom=False): response.headers['Content-disposition'] = ( b'attachment; filename="{name}.tsv"'.format( name=encode_rfc2231(name))) - wr = CSVWriter( - response, fields, encoding=u'utf-8', dialect=unicodecsv.excel_tab, - ) if bom: - response.write(UTF8_BOM) - wr.writerow(f['id'] for f in fields) - yield wr + response.write(BOM_UTF8) + unicodecsv.writer( + response, encoding=u'utf-8', dialect=unicodecsv.excel_tab).writerow( + f['id'] for f in fields) + yield TextWriter(response) -class CSVWriter(object): - def __init__(self, response, columns, *args, **kwargs): - self._wr = unicodecsv.writer(response, *args, **kwargs) - self.columns = columns - def writerow(self, row): - return self._wr.writerow([ - _json_dump_nested(val) for val in row]) +class TextWriter(object): + u'text in, text out' + def __init__(self, response): + self.response = response + + def write_records(self, records): + self.response.write(records) @contextmanager @@ -96,10 +80,6 @@ def json_writer(response, fields, name=None, bom=False): :param fields: list of datastore fields :param name: file name (for headers, response-like objects only) :param bom: True to include a UTF-8 BOM at the start of the file - - >>> with json_writer(response, fields) as d: - >>> d.writerow(row1) - >>> d.writerow(row2) ''' if hasattr(response, u'headers'): @@ -110,31 +90,29 @@ def json_writer(response, fields, name=None, bom=False): b'attachment; filename="{name}.json"'.format( name=encode_rfc2231(name))) if bom: - response.write(UTF8_BOM) + response.write(BOM_UTF8) response.write( - b'{\n "fields": %s,\n "records": [' % json.dumps( + b'{\n "fields": %s,\n "records": [' % dumps( fields, ensure_ascii=False, separators=(u',', u':'))) - yield JSONWriter(response, [f['id'] for f in fields]) + yield JSONWriter(response) response.write(b'\n]}\n') class JSONWriter(object): - def __init__(self, response, columns): + def __init__(self, response): self.response = response - self.columns = columns self.first = True - def writerow(self, row): - if self.first: - self.first = False - self.response.write(b'\n ') - else: - self.response.write(b',\n ') - self.response.write(json.dumps( - row, - ensure_ascii=False, - separators=(u',', u':'), - sort_keys=True).encode(u'utf-8')) + def write_records(self, records): + for r in records: + if self.first: + self.first = False + self.response.write(b'\n ') + else: + self.response.write(b',\n ') + + self.response.write(dumps( + r, ensure_ascii=False, separators=(u',', u':'))) @contextmanager @@ -146,10 +124,6 @@ def xml_writer(response, fields, name=None, bom=False): :param fields: list of datastore fields :param name: file name (for headers, response-like objects only) :param bom: True to include a UTF-8 BOM at the start of the file - - >>> with xml_writer(response, fields) as d: - >>> d.writerow(row1) - >>> d.writerow(row2) ''' if hasattr(response, u'headers'): @@ -160,7 +134,7 @@ def xml_writer(response, fields, name=None, bom=False): b'attachment; filename="{name}.xml"'.format( name=encode_rfc2231(name))) if bom: - response.write(UTF8_BOM) + response.write(BOM_UTF8) response.write(b'\n') yield XMLWriter(response, [f['id'] for f in fields]) response.write(b'\n') @@ -194,12 +168,12 @@ def _insert_node(self, root, k, v, key_attr=None): if key_attr is not None: element.attrib[self._key_attr] = unicode(key_attr) - def writerow(self, row): - root = Element(u'row') - if self.id_col: - root.attrib[u'_id'] = unicode(row[0]) - row = row[1:] - for k, v in zip(self.columns, row): - self._insert_node(root, k, v) - ElementTree(root).write(self.response, encoding=u'utf-8') - self.response.write(b'\n') + def write_records(self, records): + for r in records: + root = Element(u'row') + if self.id_col: + root.attrib[u'_id'] = unicode(r[u'_id']) + for c in self.columns: + self._insert_node(root, c, r[c]) + ElementTree(root).write(self.response, encoding=u'utf-8') + self.response.write(b'\n') diff --git a/contrib/docker/docker-compose.yml b/contrib/docker/docker-compose.yml index 78ca3a948fd..51ffd3bbc56 100644 --- a/contrib/docker/docker-compose.yml +++ b/contrib/docker/docker-compose.yml @@ -12,17 +12,11 @@ ckan: db: container_name: db image: ckan/postgresql:latest - ports: - - "5432:5432" solr: container_name: solr image: ckan/solr:latest - ports: - - "8983:8983" redis: container_name: redis image: redis:latest - ports: - - "6379:6379" diff --git a/doc/conf.py b/doc/conf.py index 3ed57700fbb..7a5692d42b8 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -52,7 +52,7 @@ .. |storage_path| replace:: |storage_parent_dir|/default .. |reload_apache| replace:: sudo service apache2 reload .. |restart_apache| replace:: sudo service apache2 restart -.. |restart_solr| replace:: sudo service jetty restart +.. |restart_solr| replace:: sudo service jetty8 restart .. |solr| replace:: Solr .. |restructuredtext| replace:: reStructuredText .. |nginx| replace:: Nginx diff --git a/doc/maintaining/datastore.rst b/doc/maintaining/datastore.rst index b1b1790d314..9fd0f995d22 100644 --- a/doc/maintaining/datastore.rst +++ b/doc/maintaining/datastore.rst @@ -245,6 +245,10 @@ Data can be written incrementally to the DataStore through the API. New data can inserted, existing data can be updated or deleted. You can also add a new column to an existing table even if the DataStore resource already contains some data. +Triggers may be added to enforce validation, clean data as it is loaded or +even record record histories. Triggers are PL/pgSQL functions that must be +created by a sysadmin. + You will notice that we tried to keep the layer between the underlying PostgreSQL database and the API as thin as possible to allow you to use the features you would expect from a powerful database management system. diff --git a/doc/maintaining/installing/install-from-source.rst b/doc/maintaining/installing/install-from-source.rst index c022946a9e7..25b116b8a94 100644 --- a/doc/maintaining/installing/install-from-source.rst +++ b/doc/maintaining/installing/install-from-source.rst @@ -7,8 +7,8 @@ Installing CKAN from source This section describes how to install CKAN from source. Although :doc:`install-from-package` is simpler, it requires Ubuntu 14.04 64-bit or Ubuntu 12.04 64-bit. Installing CKAN from source works with other versions of -Ubuntu and with other operating systems (e.g. RedHat, Fedora, CentOS, OS X). -If you install CKAN from source on your own operating system, please share your +Ubuntu and with other operating systems (e.g. RedHat, Fedora, CentOS, OS X). If +you install CKAN from source on your own operating system, please share your experiences on our `How to Install CKAN `_ wiki page. @@ -21,7 +21,11 @@ work on CKAN. -------------------------------- If you're using a Debian-based operating system (such as Ubuntu) install the -required packages with this command:: +required packages with this command for Ubuntu 16.04:: + + sudo apt-get install python-dev postgresql libpq-dev python-pip python-virtualenv git-core solr-jetty openjdk-8-jdk redis-server + +or for Ubuntu 14.04:: sudo apt-get install python-dev postgresql libpq-dev python-pip python-virtualenv git-core solr-jetty openjdk-6-jdk redis-server @@ -41,7 +45,7 @@ virtualenv `The virtual Python environment builder `_ Apache Solr `A search platform `_ Jetty `An HTTP server `_ (used for Solr). -OpenJDK 6 JDK `The Java Development Kit `_ +OpenJDK JDK `The Java Development Kit `_ (used by Jetty) Redis `An in-memory data structure store `_ ===================== =============================================== @@ -94,7 +98,13 @@ a. Create a Python `virtual environment `_ |activate| -b. Install the CKAN source code into your virtualenv. +b. Install the recommended version of 'setuptools': + + .. parsed-literal:: + + pip install -r |virtualenv|/src/ckan/requirement-setuptools.txt + +c. Install the CKAN source code into your virtualenv. To install the latest stable release of CKAN (CKAN |latest_release_version|), run: @@ -116,7 +126,7 @@ b. Install the CKAN source code into your virtualenv. production websites! Only install this version if you're doing CKAN development. -c. Install the Python modules that CKAN requires into your virtualenv: +d. Install the Python modules that CKAN requires into your virtualenv: .. versionchanged:: 2.1 In CKAN 2.0 and earlier the requirement file was called @@ -126,7 +136,7 @@ c. Install the Python modules that CKAN requires into your virtualenv: pip install -r |virtualenv|/src/ckan/requirements.txt -d. Deactivate and reactivate your virtualenv, to make sure you're using the +e. Deactivate and reactivate your virtualenv, to make sure you're using the virtualenv's copies of commands like ``paster`` rather than any system-wide installed copies: @@ -321,9 +331,13 @@ If ``javac`` isn't installed, do:: and then restart Solr: -.. parsed-literal:: +For Ubuntu 16.04:: - |restart_solr| + sudo service jetty8 restart + +or for Ubuntu 14.04:: + + sudo service jetty restart AttributeError: 'module' object has no attribute 'css/main.debug.css' --------------------------------------------------------------------- @@ -331,11 +345,22 @@ AttributeError: 'module' object has no attribute 'css/main.debug.css' This error is likely to show up when `debug` is set to `True`. To fix this error, install frontend dependencies. See :doc:`/contributing/frontend/index`. -After installing the dependencies, run `bin/less` and then start paster server +After installing the dependencies, run ``bin/less`` and then start paster server again. If you do not want to compile CSS, you can also copy the main.css to -main.debug.css to get CKAN running. +main.debug.css to get CKAN running:: cp /usr/lib/ckan/default/src/ckan/ckan/public/base/css/main.css \ /usr/lib/ckan/default/src/ckan/ckan/public/base/css/main.debug.css + +JSP support not configured +-------------------------- + +This is seen occasionally with Jetty and Ubuntu 14.04. It requires a solr-jetty fix:: + + cd /tmp + wget https://launchpad.net/~vshn/+archive/ubuntu/solr/+files/solr-jetty-jsp-fix_1.0.2_all.deb + sudo dpkg -i solr-jetty-jsp-fix_1.0.2_all.deb + sudo service jetty restart + diff --git a/doc/maintaining/installing/solr.rst b/doc/maintaining/installing/solr.rst index 2bd057537c1..5da3851ce85 100644 --- a/doc/maintaining/installing/solr.rst +++ b/doc/maintaining/installing/solr.rst @@ -12,8 +12,8 @@ installed, we need to install and configure Solr. server, but CKAN doesn't require Jetty - you can deploy Solr to another web server, such as Tomcat, if that's convenient on your operating system. -#. Edit the Jetty configuration file (``/etc/default/jetty``) and change the - following variables:: +#. Edit the Jetty configuration file (``/etc/default/jetty8`` or + ``/etc/default/jetty``) and change the following variables:: NO_START=0 # (line 4) JETTY_HOST=127.0.0.1 # (line 16) @@ -26,9 +26,20 @@ installed, we need to install and configure Solr. change it to the relevant host or to 0.0.0.0 (and probably set up your firewall accordingly). - Start the Jetty server:: + Start or restart the Jetty server. - sudo service jetty start + For Ubuntu 16.04:: + + sudo service jetty8 restart + + Or for Ubuntu 14.04:: + + sudo service jetty restart + + .. note:: + + Ignore any warning that it wasn't already running - some Ubuntu + distributions choose not to start Jetty on install, but it's not important. You should now see a welcome page from Solr if you open http://localhost:8983/solr/ in your web browser (replace localhost with @@ -57,11 +68,15 @@ installed, we need to install and configure Solr. Now restart Solr: - .. parsed-literal:: + For Ubuntu 16.04:: + + sudo service jetty8 restart + + or for Ubuntu 14.04:: - |restart_solr| + sudo service jetty restart - and check that Solr is running by opening http://localhost:8983/solr/. + Check that Solr is running by opening http://localhost:8983/solr/. #. Finally, change the :ref:`solr_url` setting in your :ref:`config_file` (|production.ini|) to diff --git a/doc/maintaining/upgrading/upgrade-postgres.rst b/doc/maintaining/upgrading/upgrade-postgres.rst index d0c57c3e84a..b43790aaf2c 100644 --- a/doc/maintaining/upgrading/upgrade-postgres.rst +++ b/doc/maintaining/upgrading/upgrade-postgres.rst @@ -212,7 +212,7 @@ Upgrading sudo -u postgres psql --cluster 9.4/main -d |database| -f /usr/share/postgresql/9.4/contrib/postgis-2.1/postgis.sql sudo -u postgres psql --cluster 9.4/main -d |database| -f /usr/share/postgresql/9.4/contrib/postgis-2.1/spatial_ref_sys.sql sudo -u postgres psql --cluster 9.4/main -d |database| -c 'ALTER TABLE geometry_columns OWNER TO ckan_default;' - sudo -u postgres psql --cluster 9.4/main -d |database| -c 'ALTER TABLE geometry_columns OWNER TO ckan_default;' + sudo -u postgres psql --cluster 9.4/main -d |database| -c 'ALTER TABLE spatial_ref_sys OWNER TO ckan_default;' To check if PostGIS was properly installed: diff --git a/requirement-setuptools.txt b/requirement-setuptools.txt new file mode 100644 index 00000000000..44e41169c85 --- /dev/null +++ b/requirement-setuptools.txt @@ -0,0 +1 @@ +setuptools==20.4 \ No newline at end of file diff --git a/requirements.in b/requirements.in index 547b7156473..9acc369a4fa 100644 --- a/requirements.in +++ b/requirements.in @@ -26,6 +26,7 @@ repoze.who==2.3 requests==2.11.1 Routes==1.13 rq==0.6.0 +simplejson==3.10.0 sqlalchemy-migrate==0.10.0 SQLAlchemy==0.9.6 sqlparse==0.2.2 diff --git a/requirements.txt b/requirements.txt index 1995936865b..8851fa81c4e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -45,7 +45,7 @@ repoze.who==2.3 requests==2.11.1 Routes==1.13 # via pylons rq==0.6.0 -simplejson==3.8.2 # via pylons +simplejson==3.10.0 six==1.10.0 # via bleach, html5lib, pastescript, pyutilib.component.core, sqlalchemy-migrate sqlalchemy-migrate==0.10.0 SQLAlchemy==0.9.6 # via sqlalchemy-migrate diff --git a/setup.py b/setup.py index 273b6d528f7..a973a71eca7 100644 --- a/setup.py +++ b/setup.py @@ -6,15 +6,31 @@ del os.link try: - from setuptools import setup, find_packages + from setuptools import (setup, find_packages, + __version__ as setuptools_version) except ImportError: from ez_setup import use_setuptools use_setuptools() - from setuptools import setup, find_packages + from setuptools import (setup, find_packages, + __version__ as setuptools_version) from ckan import (__version__, __description__, __long_description__, __license__) +MIN_SETUPTOOLS_VERSION = 20.4 +assert setuptools_version >= str(MIN_SETUPTOOLS_VERSION) and \ + int(setuptools_version.split('.')[0]) >= int(MIN_SETUPTOOLS_VERSION),\ + ('setuptools version error' + '\nYou need a newer version of setuptools.\n' + 'You have {current}, you need at least {minimum}' + '\nInstall the recommended version:\n' + ' pip install -r requirement-setuptools.txt\n' + 'and then try again to install ckan into your python environment.'.format( + current=setuptools_version, + minimum=MIN_SETUPTOOLS_VERSION + )) + + entry_points = { 'nose.plugins.0.10': [ 'main = ckan.ckan_nose_plugin:CkanNose',