From 4d69dfea9cd44bd573e887f5d18f892c0da8c3b1 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Thu, 28 Mar 2013 18:51:51 +0100 Subject: [PATCH 01/16] [#718] Use error codes instead of relying on english error messages in datastore --- ckanext/datastore/db.py | 56 +++++++++++++++++-------------------- ckanext/datastore/plugin.py | 20 +++++++++++++ 2 files changed, 46 insertions(+), 30 deletions(-) diff --git a/ckanext/datastore/db.py b/ckanext/datastore/db.py index bc442d9d152..6be6c9fefd0 100644 --- a/ckanext/datastore/db.py +++ b/ckanext/datastore/db.py @@ -11,7 +11,7 @@ import logging import pprint import sqlalchemy -from sqlalchemy.exc import ProgrammingError, IntegrityError +from sqlalchemy.exc import ProgrammingError, IntegrityError, DBAPIError import psycopg2.extras log = logging.getLogger(__name__) @@ -156,15 +156,8 @@ def _is_valid_pg_type(context, type_name): return True else: connection = context['connection'] - try: - connection.execute('SELECT %s::regtype', type_name) - except ProgrammingError, e: - if 'invalid type name' in str(e) or 'does not exist' in str(e): - return False - else: - raise - else: - return True + return connection.execute('SELECT is_valid_type(%s)', + type_name).first()[0] def _get_type(context, oid): @@ -963,23 +956,24 @@ def create(context, data_dict): trans.commit() return _unrename_json_field(data_dict) except IntegrityError, e: - if ('duplicate key value violates unique constraint' in str(e) - or 'could not create unique index' in str(e)): + if int(e.orig.pgcode) == 23505: raise ValidationError({ - 'constraints': ['Cannot insert records or create index because of uniqueness constraint'], + 'constraints': ['Cannot insert records or create index because ' + 'of uniqueness constraint'], 'info': { 'details': str(e) } }) - else: - raise - except Exception, e: - trans.rollback() - if 'due to statement timeout' in str(e): + raise + except DBAPIError, e: + if int(e.orig.pgcode) == 57014: raise ValidationError({ 'query': ['Query took too long'] }) raise + except Exception, e: + trans.rollback() + raise finally: context['connection'].close() @@ -1005,22 +999,24 @@ def upsert(context, data_dict): trans.commit() return _unrename_json_field(data_dict) except IntegrityError, e: - if 'duplicate key value violates unique constraint' in str(e): + if int(e.orig.pgcode) == 23505: raise ValidationError({ - 'constraints': ['Cannot insert records because of uniqueness constraint'], + 'constraints': ['Cannot insert records or create index because ' + 'of uniqueness constraint'], 'info': { 'details': str(e) } }) - else: - raise - except Exception, e: - trans.rollback() - if 'due to statement timeout' in str(e): + raise + except DBAPIError, e: + if int(e.orig.pgcode) == 57014: raise ValidationError({ 'query': ['Query took too long'] }) raise + except Exception, e: + trans.rollback() + raise finally: context['connection'].close() @@ -1079,8 +1075,8 @@ def search(context, data_dict): data_dict['resource_id'])] }) return search_data(context, data_dict) - except Exception, e: - if 'due to statement timeout' in str(e): + except DBAPIError, e: + if int(e.orig.pgcode) == 57014: raise ValidationError({ 'query': ['Search took too long'] }) @@ -1112,10 +1108,10 @@ def search_sql(context, data_dict): 'orig': [str(e.orig)] } }) - except Exception, e: - if 'due to statement timeout' in str(e): + except DBAPIError, e: + if int(e.orig.pgcode) == 57014: raise ValidationError({ - 'query': ['Search took too long'] + 'query': ['Query took too long'] }) raise finally: diff --git a/ckanext/datastore/plugin.py b/ckanext/datastore/plugin.py index 6f67d4ec64b..ef01e3427b2 100644 --- a/ckanext/datastore/plugin.py +++ b/ckanext/datastore/plugin.py @@ -105,6 +105,7 @@ def new_resource_show(context, data_dict): if not hasattr(resource_show, '_datastore_wrapped'): new_resource_show._datastore_wrapped = True logic._actions['resource_show'] = new_resource_show + self._add_is_valid_type_function() def _is_read_only_database(self): for url in [self.ckan_url, self.write_url, self.read_url]: @@ -205,6 +206,25 @@ def _create_alias_table(self): {'connection_url': pylons.config['ckan.datastore.write_url']}).connect() connection.execute(create_alias_table_sql) + def _add_is_valid_type_function(self): + # syntax_error - may occur if someone provides a keyword as a type + # undefined_object - is raised if the type does not exist + create_func_sql = ''' + CREATE OR REPLACE FUNCTION is_valid_type(v_type text) + RETURNS boolean + AS $$ + BEGIN + PERFORM v_type::regtype; + RETURN true; + EXCEPTION WHEN undefined_object OR syntax_error THEN + RETURN false; + END; + $$ LANGUAGE plpgsql stable; + ''' + connection = db._get_engine(None, + {'connection_url': pylons.config['ckan.datastore.write_url']}).connect() + connection.execute(create_func_sql) + def get_actions(self): actions = {'datastore_create': action.datastore_create, 'datastore_upsert': action.datastore_upsert, From 52f5e78ee55b2f54ded61c97257968321c7814f2 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Tue, 26 Mar 2013 22:52:58 +0100 Subject: [PATCH 02/16] [#642] Add spaces to log messages where they are missing --- ckanext/datastore/plugin.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/ckanext/datastore/plugin.py b/ckanext/datastore/plugin.py index ef01e3427b2..52f6ef98243 100644 --- a/ckanext/datastore/plugin.py +++ b/ckanext/datastore/plugin.py @@ -43,8 +43,8 @@ def configure(self, config): # that we should ignore the following tests. import sys if sys.argv[0].split('/')[-1] == 'paster' and 'datastore' in sys.argv[1:]: - log.warn('Omitting permission checks because you are ' - 'running paster commands.') + log.warn("Omitting permission checks because you are " + "running paster commands.") return self.ckan_url = self.config['sqlalchemy.url'] @@ -67,11 +67,13 @@ def configure(self, config): self._create_alias_table() else: - log.warn("We detected that CKAN is running on a read only database. " - "Permission checks and the creation of _table_metadata are skipped.") + log.warn("We detected that CKAN is running on a read " + "only database. Permission checks and the creation " + "of _table_metadata are skipped.") else: - log.warn("We detected that you do not use a PostgreSQL database. " - "The DataStore will NOT work and datastore tests will be skipped.") + log.warn("We detected that you do not use a PostgreSQL " + "database. The DataStore will NOT work and datastore " + "tests will be skipped.") ## Do light wrapping around action function to add datastore_active ## to resource dict. Not using IAction extension as this prevents From 3824eb23ccc1f92879c3b75002dd9afa939ecce1 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Wed, 27 Mar 2013 12:19:37 +0100 Subject: [PATCH 03/16] [#642] Use has_table_privilege and has_schema_privilege instead of experimental privilege checks. Conflicts: ckanext/datastore/plugin.py --- ckanext/datastore/plugin.py | 67 ++++++++++++++----------------------- 1 file changed, 26 insertions(+), 41 deletions(-) diff --git a/ckanext/datastore/plugin.py b/ckanext/datastore/plugin.py index 52f6ef98243..4b8ea588a93 100644 --- a/ckanext/datastore/plugin.py +++ b/ckanext/datastore/plugin.py @@ -61,10 +61,15 @@ def configure(self, config): if not ('debug' in config and config['debug']): self._check_separate_db() if self.legacy_mode: - log.warn('Legacy mode active. The sql search will not be available.') - else: - self._check_read_permissions() - + log.warn("Legacy mode active." + "The sql search will not be available.") + elif not self._read_connection_has_correct_privileges(): + if 'debug' in self.config and self.config['debug']: + log.critical("We have write permissions " + "on the read-only database.") + else: + raise Exception("We have write permissions " + "on the read-only database.") self._create_alias_table() else: log.warn("We detected that CKAN is running on a read " @@ -112,20 +117,11 @@ def new_resource_show(context, data_dict): def _is_read_only_database(self): for url in [self.ckan_url, self.write_url, self.read_url]: connection = db._get_engine(None, - {'connection_url': url}).connect() - trans = connection.begin() - try: - sql = u"CREATE TABLE test_readonly(id INTEGER);" - connection.execute(sql) - except ProgrammingError, e: - if 'permission denied' in str(e) or 'read-only transaction' in str(e): - pass - else: - raise - else: + {'connection_url': url}).connect() + sql = u"SELECT has_schema_privilege('public', 'CREATE')" + is_writable = connection.execute(sql).first()[0] + if is_writable: return False - finally: - trans.rollback() return True def _check_separate_db(self): @@ -136,7 +132,8 @@ def _check_separate_db(self): if not self.legacy_mode: if self.write_url == self.read_url: - raise Exception("The write and read-only database connection url are the same.") + raise Exception("The write and read-only database " + "connection url are the same.") if self._get_db_from_url(self.ckan_url) == self._get_db_from_url(self.read_url): raise Exception("The CKAN and datastore database are the same.") @@ -144,44 +141,32 @@ def _check_separate_db(self): def _get_db_from_url(self, url): return url[url.rindex("@"):] - def _check_read_permissions(self): + def _read_connection_has_correct_privileges(self): ''' Check whether the right permissions are set for the read only user. A table is created by the write user to test the read only user. ''' write_connection = db._get_engine(None, {'connection_url': self.write_url}).connect() - write_connection.execute(u"DROP TABLE IF EXISTS public._foo;" - u"CREATE TABLE public._foo (id INTEGER, name VARCHAR)") + write_connection.execute( + u"DROP TABLE IF EXISTS public._foo;", + u"CREATE TABLE public._foo ()") read_connection = db._get_engine(None, {'connection_url': self.read_url}).connect() - statements = [ - u"CREATE TABLE public._bar (id INTEGER, name VARCHAR)", - u"INSERT INTO public._foo VALUES (1, 'okfn')" - ] - try: - for sql in statements: - read_trans = read_connection.begin() - try: - read_connection.execute(sql) - except ProgrammingError, e: - if 'permission denied' not in str(e): - raise - else: - log.info("Connection url {0}".format(self.read_url)) - if 'debug' in self.config and self.config['debug']: - log.critical("We have write permissions on the read-only database.") - else: - raise Exception("We have write permissions on the read-only database.") - finally: - read_trans.rollback() + write_connection.execute(u"CREATE TABLE public._foo ()") + for privilege in ['INSERT', 'UPDATE', 'DELETE']: + sql = u"SELECT has_table_privilege('_foo', '{privilege}')".format(privilege=privilege) + have_privilege = read_connection.execute(sql).first()[0] + if have_privilege: + return False except Exception: raise finally: write_connection.execute("DROP TABLE _foo") + return True def _create_alias_table(self): mapping_sql = ''' From 306f016d1b1803302c0c9b9999fad3b6e14ba92b Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Wed, 27 Mar 2013 12:51:38 +0100 Subject: [PATCH 04/16] [#642] Make check functions consistent (return bool instead of raising exceptions) --- ckanext/datastore/plugin.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/ckanext/datastore/plugin.py b/ckanext/datastore/plugin.py index 4b8ea588a93..10afcfeb192 100644 --- a/ckanext/datastore/plugin.py +++ b/ckanext/datastore/plugin.py @@ -18,9 +18,6 @@ class DatastoreException(Exception): class DatastorePlugin(p.SingletonPlugin): - ''' - Datastore plugin. - ''' p.implements(p.IConfigurable, inherit=True) p.implements(p.IActions) p.implements(p.IAuthFunctions) @@ -59,9 +56,11 @@ def configure(self, config): # Make sure that the right permissions are set # so that no harmful queries can be made if not ('debug' in config and config['debug']): - self._check_separate_db() + if self._same_ckan_and_datastore_db(): + raise Exception("The write and read-only database " + "connection url are the same.") if self.legacy_mode: - log.warn("Legacy mode active." + log.warn("Legacy mode active. " "The sql search will not be available.") elif not self._read_connection_has_correct_privileges(): if 'debug' in self.config and self.config['debug']: @@ -115,6 +114,10 @@ def new_resource_show(context, data_dict): self._add_is_valid_type_function() def _is_read_only_database(self): + ''' + Returns True if no connection has CREATE privileges on the public + schema. This is the case if replication is enabled. + ''' for url in [self.ckan_url, self.write_url, self.read_url]: connection = db._get_engine(None, {'connection_url': url}).connect() @@ -124,26 +127,28 @@ def _is_read_only_database(self): return False return True - def _check_separate_db(self): + def _same_ckan_and_datastore_db(self): ''' Make sure the datastore is on a separate db. Otherwise one could access all internal tables via the api. + + Returns True if the CKAN and DataStore db are the same ''' if not self.legacy_mode: if self.write_url == self.read_url: - raise Exception("The write and read-only database " - "connection url are the same.") + return True if self._get_db_from_url(self.ckan_url) == self._get_db_from_url(self.read_url): - raise Exception("The CKAN and datastore database are the same.") + return True + return False def _get_db_from_url(self, url): return url[url.rindex("@"):] def _read_connection_has_correct_privileges(self): ''' - Check whether the right permissions are set for the read only user. + Returns True if the right permissions are set for the read only user. A table is created by the write user to test the read only user. ''' write_connection = db._get_engine(None, @@ -162,8 +167,6 @@ def _read_connection_has_correct_privileges(self): have_privilege = read_connection.execute(sql).first()[0] if have_privilege: return False - except Exception: - raise finally: write_connection.execute("DROP TABLE _foo") return True From 244bc6628f7474549409bb6093f345b5494025f6 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Wed, 27 Mar 2013 15:50:59 +0100 Subject: [PATCH 05/16] [#642] Refactor datastore plugin configuration, improve (and fix ;-)) tests --- ckanext/datastore/plugin.py | 25 ++++++++-------- ckanext/datastore/tests/test_configure.py | 35 +++++++++++++++++------ 2 files changed, 39 insertions(+), 21 deletions(-) diff --git a/ckanext/datastore/plugin.py b/ckanext/datastore/plugin.py index 10afcfeb192..9d696181d67 100644 --- a/ckanext/datastore/plugin.py +++ b/ckanext/datastore/plugin.py @@ -56,9 +56,12 @@ def configure(self, config): # Make sure that the right permissions are set # so that no harmful queries can be made if not ('debug' in config and config['debug']): + if self._same_read_and_write_url(): + raise DatastoreException("The write and read-only database " + "connection url are the same.") if self._same_ckan_and_datastore_db(): - raise Exception("The write and read-only database " - "connection url are the same.") + raise DatastoreException("CKAN and DataStore database " + "cannot be the same.") if self.legacy_mode: log.warn("Legacy mode active. " "The sql search will not be available.") @@ -67,8 +70,8 @@ def configure(self, config): log.critical("We have write permissions " "on the read-only database.") else: - raise Exception("We have write permissions " - "on the read-only database.") + raise DatastoreException("We have write permissions " + "on the read-only database.") self._create_alias_table() else: log.warn("We detected that CKAN is running on a read " @@ -129,16 +132,8 @@ def _is_read_only_database(self): def _same_ckan_and_datastore_db(self): ''' - Make sure the datastore is on a separate db. Otherwise one could access - all internal tables via the api. - Returns True if the CKAN and DataStore db are the same ''' - - if not self.legacy_mode: - if self.write_url == self.read_url: - return True - if self._get_db_from_url(self.ckan_url) == self._get_db_from_url(self.read_url): return True return False @@ -146,6 +141,12 @@ def _same_ckan_and_datastore_db(self): def _get_db_from_url(self, url): return url[url.rindex("@"):] + def _same_read_and_write_url(self): + # in legacy mode, this test can be ignored + if self.legacy_mode: + return True + return self.write_url == self.read_url + def _read_connection_has_correct_privileges(self): ''' Returns True if the right permissions are set for the read only user. diff --git a/ckanext/datastore/tests/test_configure.py b/ckanext/datastore/tests/test_configure.py index eb96c9bdc49..a5f1dd24d21 100644 --- a/ckanext/datastore/tests/test_configure.py +++ b/ckanext/datastore/tests/test_configure.py @@ -24,16 +24,33 @@ def test_set_legacy_mode(self): assert self.p.write_url == 'foo' assert self.p.read_url == 'foo' - def test_check_separate_db(self): + def test_check_separate_write_and_read_if_not_legacy(self): + self.p.legacy_mode = True + self.p.write_url = 'postgresql://u:pass@localhost/ds' + self.p.read_url = 'postgresql://u:pass@localhost/ds' + assert self.p._same_read_and_write_url() + + self.p.legacy_mode = False + + assert not self.p.legacy_mode + + self.p.write_url = 'postgresql://u:pass@localhost/ds' + self.p.read_url = 'postgresql://u:pass@localhost/ds' + assert self.p._same_read_and_write_url() + + self.p.write_url = 'postgresql://u:pass@localhost/ds' + self.p.read_url = 'postgresql://u2:pass@localhost/ds' + assert not self.p._same_read_and_write_url() + + def test_same_ckan_and_datastore_db(self): + self.p.write_url = 'postgresql://u:pass@localhost/ckan' + self.p.read_url = 'postgresql://u:pass@localhost/ckan' + self.p.ckan_url = 'postgresql://u:pass@localhost/ckan' + + assert self.p._same_ckan_and_datastore_db() + self.p.write_url = 'postgresql://u:pass@localhost/dt' self.p.read_url = 'postgresql://u:pass@localhost/dt' self.p.ckan_url = 'postgresql://u:pass@localhost/ckan' - self.p.legacy_mode = True - try: - self.p._check_separate_db() - except Exception: - self.fail("_check_separate_db raise Exception unexpectedly!") - - self.p.legacy_mode = False - self.assertRaises(Exception, self.p._check_separate_db) + assert not self.p._same_ckan_and_datastore_db() From 6717440f2efb380e9a1749ca5679ec9eae0e1e06 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Thu, 28 Mar 2013 12:41:50 +0100 Subject: [PATCH 06/16] [#642] Fix how the check for separate urls is ignored in legacy mode. I put the check for the legacy mode in this function to make it testable. --- ckanext/datastore/plugin.py | 3 ++- ckanext/datastore/tests/test_configure.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ckanext/datastore/plugin.py b/ckanext/datastore/plugin.py index 9d696181d67..c7b7566aad4 100644 --- a/ckanext/datastore/plugin.py +++ b/ckanext/datastore/plugin.py @@ -143,8 +143,9 @@ def _get_db_from_url(self, url): def _same_read_and_write_url(self): # in legacy mode, this test can be ignored + # because both URLs are set to the same url if self.legacy_mode: - return True + return False return self.write_url == self.read_url def _read_connection_has_correct_privileges(self): diff --git a/ckanext/datastore/tests/test_configure.py b/ckanext/datastore/tests/test_configure.py index a5f1dd24d21..09d7253fcec 100644 --- a/ckanext/datastore/tests/test_configure.py +++ b/ckanext/datastore/tests/test_configure.py @@ -28,7 +28,7 @@ def test_check_separate_write_and_read_if_not_legacy(self): self.p.legacy_mode = True self.p.write_url = 'postgresql://u:pass@localhost/ds' self.p.read_url = 'postgresql://u:pass@localhost/ds' - assert self.p._same_read_and_write_url() + assert not self.p._same_read_and_write_url() self.p.legacy_mode = False From 2ceae033001f6376cac760c999c1ebafce22ac96 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Thu, 28 Mar 2013 19:03:05 +0100 Subject: [PATCH 07/16] [#642] Simplify check for debug mode, only create _foo once --- ckanext/datastore/plugin.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/ckanext/datastore/plugin.py b/ckanext/datastore/plugin.py index c7b7566aad4..2f90f5e5d95 100644 --- a/ckanext/datastore/plugin.py +++ b/ckanext/datastore/plugin.py @@ -55,7 +55,7 @@ def configure(self, config): if not self._is_read_only_database(): # Make sure that the right permissions are set # so that no harmful queries can be made - if not ('debug' in config and config['debug']): + if self.config.get('debug'): if self._same_read_and_write_url(): raise DatastoreException("The write and read-only database " "connection url are the same.") @@ -66,7 +66,7 @@ def configure(self, config): log.warn("Legacy mode active. " "The sql search will not be available.") elif not self._read_connection_has_correct_privileges(): - if 'debug' in self.config and self.config['debug']: + if self.config.get('debug'): log.critical("We have write permissions " "on the read-only database.") else: @@ -155,22 +155,23 @@ def _read_connection_has_correct_privileges(self): ''' write_connection = db._get_engine(None, {'connection_url': self.write_url}).connect() - write_connection.execute( - u"DROP TABLE IF EXISTS public._foo;", - u"CREATE TABLE public._foo ()") - read_connection = db._get_engine(None, {'connection_url': self.read_url}).connect() + drop_foo_sql = u"DROP TABLE IF EXISTS _foo" + + write_connection.execute(drop_foo_sql) + try: - write_connection.execute(u"CREATE TABLE public._foo ()") + write_connection.execute(u"CREATE TABLE _foo ()") for privilege in ['INSERT', 'UPDATE', 'DELETE']: - sql = u"SELECT has_table_privilege('_foo', '{privilege}')".format(privilege=privilege) + test_privilege_sql = u"SELECT has_table_privilege('_foo', '{privilege}')" + sql = test_privilege_sql.format(privilege=privilege) have_privilege = read_connection.execute(sql).first()[0] if have_privilege: return False finally: - write_connection.execute("DROP TABLE _foo") + write_connection.execute(drop_foo_sql) return True def _create_alias_table(self): From f947fbb8b08776a8c023d1be9a21d71db8f2d20d Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Thu, 28 Mar 2013 19:50:25 +0100 Subject: [PATCH 08/16] [#718] Move pg error codes in a separate dictionary --- ckanext/datastore/db.py | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/ckanext/datastore/db.py b/ckanext/datastore/db.py index 6be6c9fefd0..63b026969f7 100644 --- a/ckanext/datastore/db.py +++ b/ckanext/datastore/db.py @@ -31,15 +31,21 @@ def __init__(self, error_dict): _type_names = set() _engines = {} +# See http://www.postgresql.org/docs/9.2/static/errcodes-appendix.html +_pg_err_code = { + 'unique_violation': 23505, + 'query_canceled': 57014 +} + _date_formats = ['%Y-%m-%d', - '%Y-%m-%d %H:%M:%S', - '%Y-%m-%dT%H:%M:%S', - '%Y-%m-%dT%H:%M:%SZ', - '%d/%m/%Y', - '%m/%d/%Y', - '%d-%m-%Y', - '%m-%d-%Y', - ] + '%Y-%m-%d %H:%M:%S', + '%Y-%m-%dT%H:%M:%S', + '%Y-%m-%dT%H:%M:%SZ', + '%d/%m/%Y', + '%m/%d/%Y', + '%d-%m-%Y', + '%m-%d-%Y', + ] INSERT = 'insert' UPSERT = 'upsert' UPDATE = 'update' @@ -956,7 +962,7 @@ def create(context, data_dict): trans.commit() return _unrename_json_field(data_dict) except IntegrityError, e: - if int(e.orig.pgcode) == 23505: + if int(e.orig.pgcode) == _pg_err_code['unique_violation']: raise ValidationError({ 'constraints': ['Cannot insert records or create index because ' 'of uniqueness constraint'], @@ -966,7 +972,7 @@ def create(context, data_dict): }) raise except DBAPIError, e: - if int(e.orig.pgcode) == 57014: + if int(e.orig.pgcode) == _pg_err_code['query_canceled']: raise ValidationError({ 'query': ['Query took too long'] }) @@ -999,7 +1005,7 @@ def upsert(context, data_dict): trans.commit() return _unrename_json_field(data_dict) except IntegrityError, e: - if int(e.orig.pgcode) == 23505: + if int(e.orig.pgcode) == _pg_err_code['unique_violation']: raise ValidationError({ 'constraints': ['Cannot insert records or create index because ' 'of uniqueness constraint'], @@ -1009,7 +1015,7 @@ def upsert(context, data_dict): }) raise except DBAPIError, e: - if int(e.orig.pgcode) == 57014: + if int(e.orig.pgcode) == _pg_err_code['query_canceled']: raise ValidationError({ 'query': ['Query took too long'] }) @@ -1076,7 +1082,7 @@ def search(context, data_dict): }) return search_data(context, data_dict) except DBAPIError, e: - if int(e.orig.pgcode) == 57014: + if int(e.orig.pgcode) == _pg_err_code['query_canceled']: raise ValidationError({ 'query': ['Search took too long'] }) @@ -1109,7 +1115,7 @@ def search_sql(context, data_dict): } }) except DBAPIError, e: - if int(e.orig.pgcode) == 57014: + if int(e.orig.pgcode) == _pg_err_code['query_canceled']: raise ValidationError({ 'query': ['Query took too long'] }) From 98d1a06245ec1d2e3f9c2e65fc8938c1363d58d3 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Thu, 28 Mar 2013 23:23:09 +0100 Subject: [PATCH 09/16] [#718] Try to avoid PL/pgSQL since we cannot guarantee that is is activated --- ckanext/datastore/db.py | 51 ++++++++++++++++++++++--------------- ckanext/datastore/plugin.py | 20 --------------- 2 files changed, 30 insertions(+), 41 deletions(-) diff --git a/ckanext/datastore/db.py b/ckanext/datastore/db.py index 63b026969f7..0edca4c51b7 100644 --- a/ckanext/datastore/db.py +++ b/ckanext/datastore/db.py @@ -32,9 +32,11 @@ def __init__(self, error_dict): _engines = {} # See http://www.postgresql.org/docs/9.2/static/errcodes-appendix.html -_pg_err_code = { +_PG_ERR_CODE = { 'unique_violation': 23505, - 'query_canceled': 57014 + 'query_canceled': 57014, + 'undefined_object': 42704, + 'syntax_error': 42601 } _date_formats = ['%Y-%m-%d', @@ -46,10 +48,10 @@ def __init__(self, error_dict): '%d-%m-%Y', '%m-%d-%Y', ] -INSERT = 'insert' -UPSERT = 'upsert' -UPDATE = 'update' -_methods = [INSERT, UPSERT, UPDATE] + +_INSERT = 'insert' +_UPSERT = 'upsert' +_UPDATE = 'update' def _strip(input): @@ -162,8 +164,15 @@ def _is_valid_pg_type(context, type_name): return True else: connection = context['connection'] - return connection.execute('SELECT is_valid_type(%s)', - type_name).first()[0] + try: + connection.execute('SELECT %s::regtype', type_name) + except ProgrammingError, e: + if int(e.orig.pgcode) in [_PG_ERR_CODE['undefined_object'], + _PG_ERR_CODE['syntax_error']]: + return False + raise + else: + return True def _get_type(context, oid): @@ -520,7 +529,7 @@ def alter_table(context, data_dict): def insert_data(context, data_dict): - data_dict['method'] = INSERT + data_dict['method'] = _INSERT return upsert_data(context, data_dict) @@ -529,9 +538,9 @@ def upsert_data(context, data_dict): if not data_dict.get('records'): return - method = data_dict.get('method', UPSERT) + method = data_dict.get('method', _UPSERT) - if method not in _methods: + if method not in [_INSERT, _UPSERT, _UPDATE]: raise ValidationError({ 'method': [u'"{0}" is not defined'.format(method)] }) @@ -542,7 +551,7 @@ def upsert_data(context, data_dict): sql_columns = ", ".join(['"%s"' % name.replace('%', '%%') for name in field_names] + ['"_full_text"']) - if method == INSERT: + if method == _INSERT: rows = [] for num, record in enumerate(records): _validate_record(record, num, field_names) @@ -565,7 +574,7 @@ def upsert_data(context, data_dict): context['connection'].execute(sql_string, rows) - elif method in [UPDATE, UPSERT]: + elif method in [_UPDATE, _UPSERT]: unique_keys = _get_unique_key(context, data_dict) if len(unique_keys) < 1: raise ValidationError({ @@ -607,7 +616,7 @@ def upsert_data(context, data_dict): full_text = _to_full_text(fields, record) - if method == UPDATE: + if method == _UPDATE: sql_string = u''' UPDATE "{res_id}" SET ({columns}, "_full_text") = ({values}, to_tsvector(%s)) @@ -628,7 +637,7 @@ def upsert_data(context, data_dict): 'key': [u'key "{0}" not found'.format(unique_values)] }) - elif method == UPSERT: + elif method == _UPSERT: sql_string = u''' UPDATE "{res_id}" SET ({columns}, "_full_text") = ({values}, to_tsvector(%s)) @@ -962,7 +971,7 @@ def create(context, data_dict): trans.commit() return _unrename_json_field(data_dict) except IntegrityError, e: - if int(e.orig.pgcode) == _pg_err_code['unique_violation']: + if int(e.orig.pgcode) == _PG_ERR_CODE['unique_violation']: raise ValidationError({ 'constraints': ['Cannot insert records or create index because ' 'of uniqueness constraint'], @@ -972,7 +981,7 @@ def create(context, data_dict): }) raise except DBAPIError, e: - if int(e.orig.pgcode) == _pg_err_code['query_canceled']: + if int(e.orig.pgcode) == _PG_ERR_CODE['query_canceled']: raise ValidationError({ 'query': ['Query took too long'] }) @@ -1005,7 +1014,7 @@ def upsert(context, data_dict): trans.commit() return _unrename_json_field(data_dict) except IntegrityError, e: - if int(e.orig.pgcode) == _pg_err_code['unique_violation']: + if int(e.orig.pgcode) == _PG_ERR_CODE['unique_violation']: raise ValidationError({ 'constraints': ['Cannot insert records or create index because ' 'of uniqueness constraint'], @@ -1015,7 +1024,7 @@ def upsert(context, data_dict): }) raise except DBAPIError, e: - if int(e.orig.pgcode) == _pg_err_code['query_canceled']: + if int(e.orig.pgcode) == _PG_ERR_CODE['query_canceled']: raise ValidationError({ 'query': ['Query took too long'] }) @@ -1082,7 +1091,7 @@ def search(context, data_dict): }) return search_data(context, data_dict) except DBAPIError, e: - if int(e.orig.pgcode) == _pg_err_code['query_canceled']: + if int(e.orig.pgcode) == _PG_ERR_CODE['query_canceled']: raise ValidationError({ 'query': ['Search took too long'] }) @@ -1115,7 +1124,7 @@ def search_sql(context, data_dict): } }) except DBAPIError, e: - if int(e.orig.pgcode) == _pg_err_code['query_canceled']: + if int(e.orig.pgcode) == _PG_ERR_CODE['query_canceled']: raise ValidationError({ 'query': ['Query took too long'] }) diff --git a/ckanext/datastore/plugin.py b/ckanext/datastore/plugin.py index 2f90f5e5d95..d7f8fec5d9a 100644 --- a/ckanext/datastore/plugin.py +++ b/ckanext/datastore/plugin.py @@ -114,7 +114,6 @@ def new_resource_show(context, data_dict): if not hasattr(resource_show, '_datastore_wrapped'): new_resource_show._datastore_wrapped = True logic._actions['resource_show'] = new_resource_show - self._add_is_valid_type_function() def _is_read_only_database(self): ''' @@ -199,25 +198,6 @@ def _create_alias_table(self): {'connection_url': pylons.config['ckan.datastore.write_url']}).connect() connection.execute(create_alias_table_sql) - def _add_is_valid_type_function(self): - # syntax_error - may occur if someone provides a keyword as a type - # undefined_object - is raised if the type does not exist - create_func_sql = ''' - CREATE OR REPLACE FUNCTION is_valid_type(v_type text) - RETURNS boolean - AS $$ - BEGIN - PERFORM v_type::regtype; - RETURN true; - EXCEPTION WHEN undefined_object OR syntax_error THEN - RETURN false; - END; - $$ LANGUAGE plpgsql stable; - ''' - connection = db._get_engine(None, - {'connection_url': pylons.config['ckan.datastore.write_url']}).connect() - connection.execute(create_func_sql) - def get_actions(self): actions = {'datastore_create': action.datastore_create, 'datastore_upsert': action.datastore_upsert, From a97bb5f1f65db4708c1d03f1b5a8c54a25bd96ff Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Thu, 28 Mar 2013 23:08:47 +0100 Subject: [PATCH 10/16] [#642] Refactored datastore config to make it easier to understand and easier to test --- ckanext/datastore/plugin.py | 71 ++++++++++++----------- ckanext/datastore/tests/test_configure.py | 56 +++++++++++++----- 2 files changed, 79 insertions(+), 48 deletions(-) diff --git a/ckanext/datastore/plugin.py b/ckanext/datastore/plugin.py index d7f8fec5d9a..f055878b9d9 100644 --- a/ckanext/datastore/plugin.py +++ b/ckanext/datastore/plugin.py @@ -48,39 +48,30 @@ def configure(self, config): self.write_url = self.config['ckan.datastore.write_url'] if self.legacy_mode: self.read_url = self.write_url + log.warn("Legacy mode active. " + "The sql search will not be available.") else: self.read_url = self.config['ckan.datastore.read_url'] - if model.engine_is_pg(): - if not self._is_read_only_database(): - # Make sure that the right permissions are set - # so that no harmful queries can be made - if self.config.get('debug'): - if self._same_read_and_write_url(): - raise DatastoreException("The write and read-only database " - "connection url are the same.") - if self._same_ckan_and_datastore_db(): - raise DatastoreException("CKAN and DataStore database " - "cannot be the same.") - if self.legacy_mode: - log.warn("Legacy mode active. " - "The sql search will not be available.") - elif not self._read_connection_has_correct_privileges(): - if self.config.get('debug'): - log.critical("We have write permissions " - "on the read-only database.") - else: - raise DatastoreException("We have write permissions " - "on the read-only database.") - self._create_alias_table() - else: - log.warn("We detected that CKAN is running on a read " - "only database. Permission checks and the creation " - "of _table_metadata are skipped.") - else: + if not model.engine_is_pg(): log.warn("We detected that you do not use a PostgreSQL " - "database. The DataStore will NOT work and datastore " + "database. The DataStore will NOT work and DataStore " "tests will be skipped.") + return + + if self._is_read_only_database(): + log.warn("We detected that CKAN is running on a read " + "only database. Permission checks and the creation " + "of _table_metadata are skipped.") + else: + if self.config.get('debug'): + handler = log.critical + else: + def handler(message): + raise DatastoreException(message) + self._check_urls_and_permissions(handler) + + self._create_alias_table() ## Do light wrapping around action function to add datastore_active ## to resource dict. Not using IAction extension as this prevents @@ -115,6 +106,22 @@ def new_resource_show(context, data_dict): new_resource_show._datastore_wrapped = True logic._actions['resource_show'] = new_resource_show + def _check_urls_and_permissions(self, handler): + # Make sure that the right permissions are set + # so that no harmful queries can be made + + if not self.legacy_mode and self._same_read_and_write_url(): + handler("The write and read-only database " + "connection urls are the same.") + + if self._same_ckan_and_datastore_db(): + handler("CKAN and DataStore database " + "cannot be the same.") + + if not self._read_connection_has_correct_privileges(): + handler("We have write permissions " + "on the read-only database.") + def _is_read_only_database(self): ''' Returns True if no connection has CREATE privileges on the public @@ -133,18 +140,12 @@ def _same_ckan_and_datastore_db(self): ''' Returns True if the CKAN and DataStore db are the same ''' - if self._get_db_from_url(self.ckan_url) == self._get_db_from_url(self.read_url): - return True - return False + return self._get_db_from_url(self.ckan_url) == self._get_db_from_url(self.read_url) def _get_db_from_url(self, url): return url[url.rindex("@"):] def _same_read_and_write_url(self): - # in legacy mode, this test can be ignored - # because both URLs are set to the same url - if self.legacy_mode: - return False return self.write_url == self.read_url def _read_connection_has_correct_privileges(self): diff --git a/ckanext/datastore/tests/test_configure.py b/ckanext/datastore/tests/test_configure.py index 09d7253fcec..50a83e71057 100644 --- a/ckanext/datastore/tests/test_configure.py +++ b/ckanext/datastore/tests/test_configure.py @@ -1,5 +1,7 @@ import unittest +import sqlalchemy + import ckanext.datastore.plugin as plugin @@ -24,16 +26,7 @@ def test_set_legacy_mode(self): assert self.p.write_url == 'foo' assert self.p.read_url == 'foo' - def test_check_separate_write_and_read_if_not_legacy(self): - self.p.legacy_mode = True - self.p.write_url = 'postgresql://u:pass@localhost/ds' - self.p.read_url = 'postgresql://u:pass@localhost/ds' - assert not self.p._same_read_and_write_url() - - self.p.legacy_mode = False - - assert not self.p.legacy_mode - + def test_check_separate_write_and_read_url(self): self.p.write_url = 'postgresql://u:pass@localhost/ds' self.p.read_url = 'postgresql://u:pass@localhost/ds' assert self.p._same_read_and_write_url() @@ -43,14 +36,51 @@ def test_check_separate_write_and_read_if_not_legacy(self): assert not self.p._same_read_and_write_url() def test_same_ckan_and_datastore_db(self): - self.p.write_url = 'postgresql://u:pass@localhost/ckan' - self.p.read_url = 'postgresql://u:pass@localhost/ckan' + self.p.read_url = 'postgresql://u2:pass@localhost/ckan' self.p.ckan_url = 'postgresql://u:pass@localhost/ckan' assert self.p._same_ckan_and_datastore_db() - self.p.write_url = 'postgresql://u:pass@localhost/dt' self.p.read_url = 'postgresql://u:pass@localhost/dt' self.p.ckan_url = 'postgresql://u:pass@localhost/ckan' assert not self.p._same_ckan_and_datastore_db() + + def test_check_urls_and_permissions(self): + self.p.legacy_mode = False + self.p.ckan_url = 'postgresql://u:pass@localhost/ckan' + self.p.write_url = 'postgresql://u:pass@localhost/ds' + self.p.read_url = 'postgresql://u:pass@localhost/ds' + + def handler(message): + assert 'urls are the same' in message, message + try: + self.p._check_urls_and_permissions(handler) + except sqlalchemy.exc.OperationalError: + pass + else: + assert False + + self.p.ckan_url = 'postgresql://u:pass@localhost/ds' + self.p.legacy_mode = True + + def handler2(message): + assert 'cannot be the same' in message, message + try: + self.p._check_urls_and_permissions(handler2) + except sqlalchemy.exc.OperationalError: + pass + else: + assert False + + self.p.read_url = 'postgresql://u2:pass@localhost/ds' + self.p.legacy_mode = False + + def handler3(message): + assert 'cannot be the same' in message, message + try: + self.p._check_urls_and_permissions(handler3) + except sqlalchemy.exc.OperationalError: + pass + else: + assert False From 38ea5c6572de8c6b923025b8157816dfba92941f Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Fri, 29 Mar 2013 12:53:40 +0100 Subject: [PATCH 11/16] [#642] Ignore permission check in legacy mode and improve configuration tests --- ckanext/datastore/plugin.py | 37 +++++----- ckanext/datastore/tests/test_configure.py | 89 +++++++++++++++-------- 2 files changed, 79 insertions(+), 47 deletions(-) diff --git a/ckanext/datastore/plugin.py b/ckanext/datastore/plugin.py index f055878b9d9..40c163b8c33 100644 --- a/ckanext/datastore/plugin.py +++ b/ckanext/datastore/plugin.py @@ -1,6 +1,5 @@ import logging import pylons -from sqlalchemy.exc import ProgrammingError import ckan.plugins as p import ckanext.datastore.logic.action as action @@ -64,12 +63,7 @@ def configure(self, config): "only database. Permission checks and the creation " "of _table_metadata are skipped.") else: - if self.config.get('debug'): - handler = log.critical - else: - def handler(message): - raise DatastoreException(message) - self._check_urls_and_permissions(handler) + self._check_urls_and_permissions(self._log_or_raise) self._create_alias_table() @@ -106,21 +100,30 @@ def new_resource_show(context, data_dict): new_resource_show._datastore_wrapped = True logic._actions['resource_show'] = new_resource_show - def _check_urls_and_permissions(self, handler): + def _log_or_raise(self, message): + if self.config.get('debug'): + log.critical(message) + else: + raise DatastoreException(message) + + def _check_urls_and_permissions(self, error_handler): # Make sure that the right permissions are set # so that no harmful queries can be made - if not self.legacy_mode and self._same_read_and_write_url(): - handler("The write and read-only database " - "connection urls are the same.") - if self._same_ckan_and_datastore_db(): - handler("CKAN and DataStore database " - "cannot be the same.") + error_handler("CKAN and DataStore database " + "cannot be the same.") + + # in legacy mode, the read and write url are ths same (both write url) + # consequently the same url check and and write privilege check + # don't make sense + if not self.legacy_mode: + if self._same_read_and_write_url(): + error_handler("The write and read-only database " + "connection urls are the same.") - if not self._read_connection_has_correct_privileges(): - handler("We have write permissions " - "on the read-only database.") + if not self._read_connection_has_correct_privileges(): + error_handler("The read-only user has write privileges.") def _is_read_only_database(self): ''' diff --git a/ckanext/datastore/tests/test_configure.py b/ckanext/datastore/tests/test_configure.py index 50a83e71057..2ab40412a8f 100644 --- a/ckanext/datastore/tests/test_configure.py +++ b/ckanext/datastore/tests/test_configure.py @@ -1,9 +1,12 @@ import unittest - -import sqlalchemy +from nose.tools import assert_equal import ckanext.datastore.plugin as plugin +# global variable used for callback tests +msg = '' +called = 0 + class TestTypeGetters(unittest.TestCase): def setUp(self): @@ -38,49 +41,75 @@ def test_check_separate_write_and_read_url(self): def test_same_ckan_and_datastore_db(self): self.p.read_url = 'postgresql://u2:pass@localhost/ckan' self.p.ckan_url = 'postgresql://u:pass@localhost/ckan' - assert self.p._same_ckan_and_datastore_db() self.p.read_url = 'postgresql://u:pass@localhost/dt' self.p.ckan_url = 'postgresql://u:pass@localhost/ckan' - assert not self.p._same_ckan_and_datastore_db() def test_check_urls_and_permissions(self): + global msg + + def handler(message): + global msg, called + msg = message + called += 1 + + def true_privileges_mock(): + return True + + def false_privileges_mock(): + return False + self.p.legacy_mode = False self.p.ckan_url = 'postgresql://u:pass@localhost/ckan' self.p.write_url = 'postgresql://u:pass@localhost/ds' - self.p.read_url = 'postgresql://u:pass@localhost/ds' + self.p.read_url = 'postgresql://u2:pass@localhost/ds' + self.p._read_connection_has_correct_privileges = true_privileges_mock - def handler(message): - assert 'urls are the same' in message, message - try: - self.p._check_urls_and_permissions(handler) - except sqlalchemy.exc.OperationalError: - pass - else: - assert False + # all urls are correct + self.p._check_urls_and_permissions(handler) + assert_equal(msg, '') + assert_equal(called, 0) - self.p.ckan_url = 'postgresql://u:pass@localhost/ds' + # same url for read and write but in legacy mode self.p.legacy_mode = True + self.p.read_url = 'postgresql://u:pass@localhost/ds' + self.p._check_urls_and_permissions(handler) + assert_equal(msg, '') + assert_equal(called, 0) - def handler2(message): - assert 'cannot be the same' in message, message - try: - self.p._check_urls_and_permissions(handler2) - except sqlalchemy.exc.OperationalError: - pass - else: - assert False + # same url for read and write + self.p.legacy_mode = False + self.p._check_urls_and_permissions(handler) + assert 'urls are the same' in msg, msg + assert_equal(called, 1) + # same ckan and ds db + self.p.ckan_url = 'postgresql://u:pass@localhost/ds' self.p.read_url = 'postgresql://u2:pass@localhost/ds' + self.p._check_urls_and_permissions(handler) + assert 'cannot be the same' in msg, msg + assert_equal(called, 2) + + # have write permissions but in legacy mode + self.p.legacy_mode = True + msg = '' + self.p.ckan_url = 'postgresql://u:pass@localhost/ckan' + self.p._read_connection_has_correct_privileges = false_privileges_mock + self.p._check_urls_and_permissions(handler) + assert_equal(msg, '') + assert_equal(called, 2) + + # have write permissions self.p.legacy_mode = False + self.p._check_urls_and_permissions(handler) + assert 'user has write privileges' in msg, msg + assert_equal(called, 3) - def handler3(message): - assert 'cannot be the same' in message, message - try: - self.p._check_urls_and_permissions(handler3) - except sqlalchemy.exc.OperationalError: - pass - else: - assert False + # everything is wrong + self.p.ckan_url = 'postgresql://u:pass@localhost/ds' + self.p.write_url = 'postgresql://u:pass@localhost/ds' + self.p.read_url = 'postgresql://u:pass@localhost/ds' + self.p._check_urls_and_permissions(handler) + assert_equal(called, 6) From a4d88b756c90c794f7f3473b319d386182d74ce8 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Mon, 1 Apr 2013 19:08:45 +0200 Subject: [PATCH 12/16] [#642] Inject `error_handler` instead of explicitly passing it as an argument, split large test into smaller tests --- ckanext/datastore/plugin.py | 14 +-- ckanext/datastore/tests/test_configure.py | 101 ++++++++++------------ 2 files changed, 52 insertions(+), 63 deletions(-) diff --git a/ckanext/datastore/plugin.py b/ckanext/datastore/plugin.py index 40c163b8c33..95ddad946bf 100644 --- a/ckanext/datastore/plugin.py +++ b/ckanext/datastore/plugin.py @@ -63,7 +63,7 @@ def configure(self, config): "only database. Permission checks and the creation " "of _table_metadata are skipped.") else: - self._check_urls_and_permissions(self._log_or_raise) + self._check_urls_and_permissions() self._create_alias_table() @@ -106,24 +106,24 @@ def _log_or_raise(self, message): else: raise DatastoreException(message) - def _check_urls_and_permissions(self, error_handler): + def _check_urls_and_permissions(self): # Make sure that the right permissions are set # so that no harmful queries can be made if self._same_ckan_and_datastore_db(): - error_handler("CKAN and DataStore database " - "cannot be the same.") + self._log_or_raise("CKAN and DataStore database " + "cannot be the same.") # in legacy mode, the read and write url are ths same (both write url) # consequently the same url check and and write privilege check # don't make sense if not self.legacy_mode: if self._same_read_and_write_url(): - error_handler("The write and read-only database " - "connection urls are the same.") + self._log_or_raise("The write and read-only database " + "connection urls are the same.") if not self._read_connection_has_correct_privileges(): - error_handler("The read-only user has write privileges.") + self._log_or_raise("The read-only user has write privileges.") def _is_read_only_database(self): ''' diff --git a/ckanext/datastore/tests/test_configure.py b/ckanext/datastore/tests/test_configure.py index 2ab40412a8f..8cf8e5e36a2 100644 --- a/ckanext/datastore/tests/test_configure.py +++ b/ckanext/datastore/tests/test_configure.py @@ -1,14 +1,10 @@ import unittest -from nose.tools import assert_equal +from nose.tools import raises import ckanext.datastore.plugin as plugin -# global variable used for callback tests -msg = '' -called = 0 - -class TestTypeGetters(unittest.TestCase): +class TestConfiguration(unittest.TestCase): def setUp(self): self.p = plugin.DatastorePlugin() @@ -47,69 +43,62 @@ def test_same_ckan_and_datastore_db(self): self.p.ckan_url = 'postgresql://u:pass@localhost/ckan' assert not self.p._same_ckan_and_datastore_db() - def test_check_urls_and_permissions(self): - global msg - - def handler(message): - global msg, called - msg = message - called += 1 - - def true_privileges_mock(): - return True - def false_privileges_mock(): - return False +class TestCheckUrlsAndPermissions(unittest.TestCase): + def setUp(self): + self.p = plugin.DatastorePlugin() self.p.legacy_mode = False + + # initialize URLs self.p.ckan_url = 'postgresql://u:pass@localhost/ckan' self.p.write_url = 'postgresql://u:pass@localhost/ds' self.p.read_url = 'postgresql://u2:pass@localhost/ds' + + # initialize mock for privileges check + def true_privileges_mock(): + return True self.p._read_connection_has_correct_privileges = true_privileges_mock - # all urls are correct - self.p._check_urls_and_permissions(handler) - assert_equal(msg, '') - assert_equal(called, 0) + def raise_datastore_exception(message): + raise plugin.DatastoreException(message) + self.p._log_or_raise = raise_datastore_exception - # same url for read and write but in legacy mode - self.p.legacy_mode = True - self.p.read_url = 'postgresql://u:pass@localhost/ds' - self.p._check_urls_and_permissions(handler) - assert_equal(msg, '') - assert_equal(called, 0) + def test_everything_correct_does_not_raise(self): + self.p._check_urls_and_permissions() - # same url for read and write - self.p.legacy_mode = False - self.p._check_urls_and_permissions(handler) - assert 'urls are the same' in msg, msg - assert_equal(called, 1) + @raises(plugin.DatastoreException) + def test_raises_when_ckan_and_datastore_db_are_the_same(self): + self.p.read_url = 'postgresql://u2:pass@localhost/ckan' + self.p.ckan_url = 'postgresql://u:pass@localhost/ckan' - # same ckan and ds db - self.p.ckan_url = 'postgresql://u:pass@localhost/ds' - self.p.read_url = 'postgresql://u2:pass@localhost/ds' - self.p._check_urls_and_permissions(handler) - assert 'cannot be the same' in msg, msg - assert_equal(called, 2) + self.p._check_urls_and_permissions() - # have write permissions but in legacy mode + @raises(plugin.DatastoreException) + def test_raises_when_same_read_and_write_url(self): + self.p.read_url = 'postgresql://u:pass@localhost/ds' + self.p.write_url = 'postgresql://u:pass@localhost/ds' + + self.p._check_urls_and_permissions() + + def test_same_read_and_write_url_in_legacy_mode(self): + self.p.read_url = 'postgresql://u:pass@localhost/ds' + self.p.write_url = 'postgresql://u:pass@localhost/ds' self.p.legacy_mode = True - msg = '' - self.p.ckan_url = 'postgresql://u:pass@localhost/ckan' + + self.p._check_urls_and_permissions() + + @raises(plugin.DatastoreException) + def test_raises_when_we_have_write_permissions(self): + def false_privileges_mock(): + return False self.p._read_connection_has_correct_privileges = false_privileges_mock - self.p._check_urls_and_permissions(handler) - assert_equal(msg, '') - assert_equal(called, 2) + self.p._check_urls_and_permissions() - # have write permissions - self.p.legacy_mode = False - self.p._check_urls_and_permissions(handler) - assert 'user has write privileges' in msg, msg - assert_equal(called, 3) + def test_have_write_permissions_in_legacy_mode(self): + def false_privileges_mock(): + return False + self.p._read_connection_has_correct_privileges = false_privileges_mock + self.p.legacy_mode = True - # everything is wrong - self.p.ckan_url = 'postgresql://u:pass@localhost/ds' - self.p.write_url = 'postgresql://u:pass@localhost/ds' - self.p.read_url = 'postgresql://u:pass@localhost/ds' - self.p._check_urls_and_permissions(handler) - assert_equal(called, 6) + self.p._check_urls_and_permissions() From d76c6570fe96e64925ff7d847e5a0e561ea5be55 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Mon, 1 Apr 2013 21:59:17 +0200 Subject: [PATCH 13/16] [#642] Add plugin loading and unloading. This does not fix the singleton issue but is better anyway. --- ckanext/datastore/tests/test_configure.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/ckanext/datastore/tests/test_configure.py b/ckanext/datastore/tests/test_configure.py index 8cf8e5e36a2..2594e988700 100644 --- a/ckanext/datastore/tests/test_configure.py +++ b/ckanext/datastore/tests/test_configure.py @@ -1,4 +1,5 @@ import unittest +import ckan.plugins as p from nose.tools import raises import ckanext.datastore.plugin as plugin @@ -6,7 +7,10 @@ class TestConfiguration(unittest.TestCase): def setUp(self): - self.p = plugin.DatastorePlugin() + self.p = p.load('datastore') + + def tearDown(self): + p.unload('datastore') def test_legacy_mode_default(self): assert not self.p.legacy_mode @@ -46,7 +50,7 @@ def test_same_ckan_and_datastore_db(self): class TestCheckUrlsAndPermissions(unittest.TestCase): def setUp(self): - self.p = plugin.DatastorePlugin() + self.p = p.load('datastore') self.p.legacy_mode = False @@ -64,6 +68,9 @@ def raise_datastore_exception(message): raise plugin.DatastoreException(message) self.p._log_or_raise = raise_datastore_exception + def tearDown(self): + p.unload('datastore') + def test_everything_correct_does_not_raise(self): self.p._check_urls_and_permissions() From 705c39e45ced0ab79567df357f891ad76753a5a8 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Tue, 2 Apr 2013 23:55:43 +0200 Subject: [PATCH 14/16] [#642] Use single quotes where possible --- ckanext/datastore/db.py | 30 +++++++++++------------ ckanext/datastore/plugin.py | 47 +++++++++++++++++-------------------- 2 files changed, 35 insertions(+), 42 deletions(-) diff --git a/ckanext/datastore/db.py b/ckanext/datastore/db.py index 0edca4c51b7..fb597d7053e 100644 --- a/ckanext/datastore/db.py +++ b/ckanext/datastore/db.py @@ -65,7 +65,7 @@ def _pluck(field, arr): def _get_list(input, strip=True): - """Transforms a string or list to a list""" + '''Transforms a string or list to a list''' if input is None: return if input == '': @@ -114,7 +114,7 @@ def _validate_int(i, field_name, non_negative=False): def _get_engine(context, data_dict): - 'Get either read or write engine.' + '''Get either read or write engine.''' connection_url = data_dict['connection_url'] engine = _engines.get(connection_url) @@ -181,10 +181,8 @@ def _get_type(context, oid): def _rename_json_field(data_dict): - ''' - rename json type to a corresponding type for the datastore since - pre 9.2 postgres versions do not support native json - ''' + '''Rename json type to a corresponding type for the datastore since + pre 9.2 postgres versions do not support native json''' return _rename_field(data_dict, 'json', 'nested') @@ -201,7 +199,8 @@ def _rename_field(data_dict, term, replace): def _guess_type(field): - 'Simple guess type of field, only allowed are integer, numeric and text' + '''Simple guess type of field, only allowed are + integer, numeric and text''' data_types = set([int, float]) if isinstance(field, (dict, list)): return 'nested' @@ -260,7 +259,7 @@ def json_get_values(obj, current_list=None): def check_fields(context, fields): - 'Check if field types are valid.' + '''Check if field types are valid.''' for field in fields: if field.get('type') and not _is_valid_pg_type(context, field['type']): raise ValidationError({ @@ -289,7 +288,7 @@ def convert(data, type_name): def create_table(context, data_dict): - 'Create table from combination of fields and first row of data.' + '''Create table from combination of fields and first row of data.''' datastore_fields = [ {'id': '_id', 'type': 'serial primary key'}, @@ -339,7 +338,7 @@ def create_table(context, data_dict): def _get_aliases(context, data_dict): - ''' Get a list of aliases for a resource. ''' + '''Get a list of aliases for a resource.''' res_id = data_dict['resource_id'] alias_sql = sqlalchemy.text( u'SELECT name FROM "_table_metadata" WHERE alias_of = :id') @@ -348,8 +347,8 @@ def _get_aliases(context, data_dict): def _get_resources(context, alias): - ''' Get a list of resources for an alias. There could be more than one alias - in a resource_dict. ''' + '''Get a list of resources for an alias. There could be more than one alias + in a resource_dict.''' alias_sql = sqlalchemy.text( u'''SELECT alias_of FROM "_table_metadata" WHERE name = :alias AND alias_of IS NOT NULL''') @@ -708,7 +707,7 @@ def _to_full_text(fields, record): def _where(field_ids, data_dict): - 'Return a SQL WHERE clause from data_dict filters and q' + '''Return a SQL WHERE clause from data_dict filters and q''' filters = data_dict.get('filters', {}) if not isinstance(filters, dict): @@ -794,9 +793,8 @@ def _sort(context, data_dict, field_ids): def _insert_links(data_dict, limit, offset): - ''' Adds link to the next/prev part (same limit, offset=offset+limit) - and the resource page. - ''' + '''Adds link to the next/prev part (same limit, offset=offset+limit) + and the resource page.''' data_dict['_links'] = {} # get the url from the request diff --git a/ckanext/datastore/plugin.py b/ckanext/datastore/plugin.py index 95ddad946bf..50b792f122e 100644 --- a/ckanext/datastore/plugin.py +++ b/ckanext/datastore/plugin.py @@ -39,29 +39,29 @@ def configure(self, config): # that we should ignore the following tests. import sys if sys.argv[0].split('/')[-1] == 'paster' and 'datastore' in sys.argv[1:]: - log.warn("Omitting permission checks because you are " - "running paster commands.") + log.warn('Omitting permission checks because you are ' + 'running paster commands.') return self.ckan_url = self.config['sqlalchemy.url'] self.write_url = self.config['ckan.datastore.write_url'] if self.legacy_mode: self.read_url = self.write_url - log.warn("Legacy mode active. " - "The sql search will not be available.") + log.warn('Legacy mode active. ' + 'The sql search will not be available.') else: self.read_url = self.config['ckan.datastore.read_url'] if not model.engine_is_pg(): - log.warn("We detected that you do not use a PostgreSQL " - "database. The DataStore will NOT work and DataStore " - "tests will be skipped.") + log.warn('We detected that you do not use a PostgreSQL ' + 'database. The DataStore will NOT work and DataStore ' + 'tests will be skipped.') return if self._is_read_only_database(): - log.warn("We detected that CKAN is running on a read " - "only database. Permission checks and the creation " - "of _table_metadata are skipped.") + log.warn('We detected that CKAN is running on a read ' + 'only database. Permission checks and the creation ' + 'of _table_metadata are skipped.') else: self._check_urls_and_permissions() @@ -111,25 +111,23 @@ def _check_urls_and_permissions(self): # so that no harmful queries can be made if self._same_ckan_and_datastore_db(): - self._log_or_raise("CKAN and DataStore database " - "cannot be the same.") + self._log_or_raise('CKAN and DataStore database ' + 'cannot be the same.') # in legacy mode, the read and write url are ths same (both write url) # consequently the same url check and and write privilege check # don't make sense if not self.legacy_mode: if self._same_read_and_write_url(): - self._log_or_raise("The write and read-only database " - "connection urls are the same.") + self._log_or_raise('The write and read-only database ' + 'connection urls are the same.') if not self._read_connection_has_correct_privileges(): - self._log_or_raise("The read-only user has write privileges.") + self._log_or_raise('The read-only user has write privileges.') def _is_read_only_database(self): - ''' - Returns True if no connection has CREATE privileges on the public - schema. This is the case if replication is enabled. - ''' + ''' Returns True if no connection has CREATE privileges on the public + schema. This is the case if replication is enabled.''' for url in [self.ckan_url, self.write_url, self.read_url]: connection = db._get_engine(None, {'connection_url': url}).connect() @@ -140,9 +138,7 @@ def _is_read_only_database(self): return True def _same_ckan_and_datastore_db(self): - ''' - Returns True if the CKAN and DataStore db are the same - ''' + '''Returns True if the CKAN and DataStore db are the same''' return self._get_db_from_url(self.ckan_url) == self._get_db_from_url(self.read_url) def _get_db_from_url(self, url): @@ -152,8 +148,7 @@ def _same_read_and_write_url(self): return self.write_url == self.read_url def _read_connection_has_correct_privileges(self): - ''' - Returns True if the right permissions are set for the read only user. + ''' Returns True if the right permissions are set for the read only user. A table is created by the write user to test the read only user. ''' write_connection = db._get_engine(None, @@ -161,12 +156,12 @@ def _read_connection_has_correct_privileges(self): read_connection = db._get_engine(None, {'connection_url': self.read_url}).connect() - drop_foo_sql = u"DROP TABLE IF EXISTS _foo" + drop_foo_sql = u'DROP TABLE IF EXISTS _foo' write_connection.execute(drop_foo_sql) try: - write_connection.execute(u"CREATE TABLE _foo ()") + write_connection.execute(u'CREATE TABLE _foo ()') for privilege in ['INSERT', 'UPDATE', 'DELETE']: test_privilege_sql = u"SELECT has_table_privilege('_foo', '{privilege}')" sql = test_privilege_sql.format(privilege=privilege) From 3c60da1e540a1258f1c54b1c43f6c221e0edbf6f Mon Sep 17 00:00:00 2001 From: Vitor Baptista Date: Mon, 1 Apr 2013 20:59:19 -0300 Subject: [PATCH 15/16] [#642] Refactor and fix Datastore tests Datastore is a SingletonPlugin, so it doesn't matter if we call plugin.DatastorePlugin() many times: we always end up with the same instance. I've added a workaround that, first, saves and unloads the current datastore instance, then sets: pyutilib.component.core.PluginGlobals.singleton_services()[plugin.DatastorePlugin] = True This will make plugin.DatastorePlugin not be a Singleton anymore, so any subsequent calls to ckan.plugins.load('datastore') will create a new instance. Then, in the next line, we create a new DatastorePlugin instance by loading it, and save it into self.p and pyutilib.component.core.PluginGlobals.singleton_services()[plugin.DatastorePlugin]. This turns DatastorePlugin into a Singleton again, and subsequent calls to ckan.plugins.load('datastore') will return this new instance instead. Then, in the teardown, we unload the current the datastore, which gets rid of our test instance, and put the original datastore back in its place, so the environment before setUp() is the same as after tearDown(). For InvalidUrlsOrPermissionsException, what I wanted was a way to check if _check_urls_and_permissions() failed. I did this by overloading _log_or_raise() with an unique Exception, and checking if it's raised. If so, I guarantee that _log_or_raise() was called. This feels like too much boilerplate, but we don't have a stub/mock library, so we have to write it. Conflicts: ckanext/datastore/tests/test_configure.py --- ckanext/datastore/tests/test_configure.py | 104 +++++++++++----------- 1 file changed, 54 insertions(+), 50 deletions(-) diff --git a/ckanext/datastore/tests/test_configure.py b/ckanext/datastore/tests/test_configure.py index 2594e988700..019bb317f48 100644 --- a/ckanext/datastore/tests/test_configure.py +++ b/ckanext/datastore/tests/test_configure.py @@ -1,22 +1,24 @@ import unittest -import ckan.plugins as p -from nose.tools import raises +import nose.tools +import pyutilib.component.core +import ckan.plugins import ckanext.datastore.plugin as plugin - class TestConfiguration(unittest.TestCase): def setUp(self): - self.p = p.load('datastore') + self._original_plugin = ckan.plugins.unload('datastore') + pyutilib.component.core.PluginGlobals.singleton_services()[plugin.DatastorePlugin] = True + self.p = pyutilib.component.core.PluginGlobals.singleton_services()[plugin.DatastorePlugin] = ckan.plugins.load('datastore') def tearDown(self): - p.unload('datastore') + ckan.plugins.unload('datastore') + pyutilib.component.core.PluginGlobals.singleton_services()[plugin.DatastorePlugin] = self._original_plugin def test_legacy_mode_default(self): assert not self.p.legacy_mode def test_set_legacy_mode(self): - assert not self.p.legacy_mode c = { 'sqlalchemy.url': 'bar', 'ckan.datastore.write_url': 'foo' @@ -47,65 +49,67 @@ def test_same_ckan_and_datastore_db(self): self.p.ckan_url = 'postgresql://u:pass@localhost/ckan' assert not self.p._same_ckan_and_datastore_db() + def test_setup_plugin_for_check_urls_and_permissions_tests_should_leave_the_plugin_in_a_valid_state(self): + self.setUp_plugin_for_check_urls_and_permissions_tests() + self.p._check_urls_and_permissions() # Should be OK -class TestCheckUrlsAndPermissions(unittest.TestCase): - def setUp(self): - self.p = p.load('datastore') + def test_check_urls_and_permissions_requires_different_ckan_and_datastore_dbs(self): + self.setUp_plugin_for_check_urls_and_permissions_tests() - self.p.legacy_mode = False + self.p._same_ckan_and_datastore_db = lambda: False + self.p._check_urls_and_permissions() # Should be OK - # initialize URLs - self.p.ckan_url = 'postgresql://u:pass@localhost/ckan' - self.p.write_url = 'postgresql://u:pass@localhost/ds' - self.p.read_url = 'postgresql://u2:pass@localhost/ds' + self.p._same_ckan_and_datastore_db = lambda: True + nose.tools.assert_raises(InvalidUrlsOrPermissionsException, self.p._check_urls_and_permissions) - # initialize mock for privileges check - def true_privileges_mock(): - return True - self.p._read_connection_has_correct_privileges = true_privileges_mock + def test_check_urls_and_permissions_requires_different_read_and_write_urls_when_not_in_legacy_mode(self): + self.setUp_plugin_for_check_urls_and_permissions_tests() + self.p.legacy_mode = False - def raise_datastore_exception(message): - raise plugin.DatastoreException(message) - self.p._log_or_raise = raise_datastore_exception + self.p._same_read_and_write_url = lambda: False + self.p._check_urls_and_permissions() # Should be OK - def tearDown(self): - p.unload('datastore') + self.p._same_read_and_write_url = lambda: True + nose.tools.assert_raises(InvalidUrlsOrPermissionsException, self.p._check_urls_and_permissions) - def test_everything_correct_does_not_raise(self): - self.p._check_urls_and_permissions() + def test_check_urls_and_permissions_doesnt_require_different_read_and_write_urls_when_in_legacy_mode(self): + self.setUp_plugin_for_check_urls_and_permissions_tests() + self.p.legacy_mode = True - @raises(plugin.DatastoreException) - def test_raises_when_ckan_and_datastore_db_are_the_same(self): - self.p.read_url = 'postgresql://u2:pass@localhost/ckan' - self.p.ckan_url = 'postgresql://u:pass@localhost/ckan' + self.p._same_read_and_write_url = lambda: False + self.p._check_urls_and_permissions() # Should be OK - self.p._check_urls_and_permissions() + self.p._same_read_and_write_url = lambda: True + self.p._check_urls_and_permissions() # Should be OK - @raises(plugin.DatastoreException) - def test_raises_when_same_read_and_write_url(self): - self.p.read_url = 'postgresql://u:pass@localhost/ds' - self.p.write_url = 'postgresql://u:pass@localhost/ds' + def test_check_urls_and_permissions_requires_read_connection_with_correct_privileges_when_not_in_legacy_mode(self): + self.setUp_plugin_for_check_urls_and_permissions_tests() + self.p.legacy_mode = False - self.p._check_urls_and_permissions() + self.p._read_connection_has_correct_privileges = lambda: True + self.p._check_urls_and_permissions() # Should be OK - def test_same_read_and_write_url_in_legacy_mode(self): - self.p.read_url = 'postgresql://u:pass@localhost/ds' - self.p.write_url = 'postgresql://u:pass@localhost/ds' + self.p._read_connection_has_correct_privileges = lambda: False + nose.tools.assert_raises(InvalidUrlsOrPermissionsException, self.p._check_urls_and_permissions) + + def test_check_urls_and_permissions_doesnt_care_about_read_connection_privileges_when_in_legacy_mode(self): + self.setUp_plugin_for_check_urls_and_permissions_tests() self.p.legacy_mode = True - self.p._check_urls_and_permissions() + self.p._read_connection_has_correct_privileges = lambda: True + self.p._check_urls_and_permissions() # Should be OK + + self.p._read_connection_has_correct_privileges = lambda: False + self.p._check_urls_and_permissions() # Should be OK - @raises(plugin.DatastoreException) - def test_raises_when_we_have_write_permissions(self): - def false_privileges_mock(): - return False - self.p._read_connection_has_correct_privileges = false_privileges_mock - self.p._check_urls_and_permissions() + def setUp_plugin_for_check_urls_and_permissions_tests(self): + def _raise_invalid_urls_or_permissions_exception(message): + raise InvalidUrlsOrPermissionsException(message) - def test_have_write_permissions_in_legacy_mode(self): - def false_privileges_mock(): - return False - self.p._read_connection_has_correct_privileges = false_privileges_mock + self.p._same_ckan_and_datastore_db = lambda: False self.p.legacy_mode = True + self.p._same_read_and_write_url = lambda: False + self.p._read_connection_has_correct_privileges = lambda: True + self.p._log_or_raise = _raise_invalid_urls_or_permissions_exception - self.p._check_urls_and_permissions() +class InvalidUrlsOrPermissionsException(Exception): pass From 09b23ddeb60b25aaed97f538c23e411b11292b30 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Wed, 3 Apr 2013 00:02:39 +0200 Subject: [PATCH 16/16] [#642] PEP8 --- ckanext/datastore/tests/test_configure.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/ckanext/datastore/tests/test_configure.py b/ckanext/datastore/tests/test_configure.py index 019bb317f48..079df3910b1 100644 --- a/ckanext/datastore/tests/test_configure.py +++ b/ckanext/datastore/tests/test_configure.py @@ -5,6 +5,7 @@ import ckan.plugins import ckanext.datastore.plugin as plugin + class TestConfiguration(unittest.TestCase): def setUp(self): self._original_plugin = ckan.plugins.unload('datastore') @@ -51,13 +52,13 @@ def test_same_ckan_and_datastore_db(self): def test_setup_plugin_for_check_urls_and_permissions_tests_should_leave_the_plugin_in_a_valid_state(self): self.setUp_plugin_for_check_urls_and_permissions_tests() - self.p._check_urls_and_permissions() # Should be OK + self.p._check_urls_and_permissions() # Should be OK def test_check_urls_and_permissions_requires_different_ckan_and_datastore_dbs(self): self.setUp_plugin_for_check_urls_and_permissions_tests() self.p._same_ckan_and_datastore_db = lambda: False - self.p._check_urls_and_permissions() # Should be OK + self.p._check_urls_and_permissions() # Should be OK self.p._same_ckan_and_datastore_db = lambda: True nose.tools.assert_raises(InvalidUrlsOrPermissionsException, self.p._check_urls_and_permissions) @@ -67,7 +68,7 @@ def test_check_urls_and_permissions_requires_different_read_and_write_urls_when_ self.p.legacy_mode = False self.p._same_read_and_write_url = lambda: False - self.p._check_urls_and_permissions() # Should be OK + self.p._check_urls_and_permissions() # Should be OK self.p._same_read_and_write_url = lambda: True nose.tools.assert_raises(InvalidUrlsOrPermissionsException, self.p._check_urls_and_permissions) @@ -77,17 +78,17 @@ def test_check_urls_and_permissions_doesnt_require_different_read_and_write_urls self.p.legacy_mode = True self.p._same_read_and_write_url = lambda: False - self.p._check_urls_and_permissions() # Should be OK + self.p._check_urls_and_permissions() # Should be OK self.p._same_read_and_write_url = lambda: True - self.p._check_urls_and_permissions() # Should be OK + self.p._check_urls_and_permissions() # Should be OK def test_check_urls_and_permissions_requires_read_connection_with_correct_privileges_when_not_in_legacy_mode(self): self.setUp_plugin_for_check_urls_and_permissions_tests() self.p.legacy_mode = False self.p._read_connection_has_correct_privileges = lambda: True - self.p._check_urls_and_permissions() # Should be OK + self.p._check_urls_and_permissions() # Should be OK self.p._read_connection_has_correct_privileges = lambda: False nose.tools.assert_raises(InvalidUrlsOrPermissionsException, self.p._check_urls_and_permissions) @@ -97,10 +98,10 @@ def test_check_urls_and_permissions_doesnt_care_about_read_connection_privileges self.p.legacy_mode = True self.p._read_connection_has_correct_privileges = lambda: True - self.p._check_urls_and_permissions() # Should be OK + self.p._check_urls_and_permissions() # Should be OK self.p._read_connection_has_correct_privileges = lambda: False - self.p._check_urls_and_permissions() # Should be OK + self.p._check_urls_and_permissions() # Should be OK def setUp_plugin_for_check_urls_and_permissions_tests(self): def _raise_invalid_urls_or_permissions_exception(message): @@ -112,4 +113,6 @@ def _raise_invalid_urls_or_permissions_exception(message): self.p._read_connection_has_correct_privileges = lambda: True self.p._log_or_raise = _raise_invalid_urls_or_permissions_exception -class InvalidUrlsOrPermissionsException(Exception): pass + +class InvalidUrlsOrPermissionsException(Exception): + pass