From 3a99771a7555fc4f0b16107f67a312f6a761b987 Mon Sep 17 00:00:00 2001 From: David Read Date: Wed, 12 Oct 2011 17:58:19 +0100 Subject: [PATCH 01/78] [lib/cli]: Adding -h localhost by default for postgres commands. --- ckan/lib/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckan/lib/cli.py b/ckan/lib/cli.py index 32940696500..50fa24f6248 100644 --- a/ckan/lib/cli.py +++ b/ckan/lib/cli.py @@ -132,7 +132,7 @@ def _get_postgres_cmd(self, command): pg_cmd += ' -U %(db_user)s' % self.db_details if self.db_details.get('db_pass') not in (None, ''): pg_cmd = 'export PGPASSWORD=%(db_pass)s && ' % self.db_details + pg_cmd - if self.db_details.get('db_host') not in (None, '', 'localhost'): + if self.db_details.get('db_host') not in (None, ''): pg_cmd += ' -h %(db_host)s' % self.db_details if self.db_details.get('db_port') not in (None, ''): pg_cmd += ' -p %(db_port)s' % self.db_details From 4c2c1a877617359c988a019f0c5a6c1e90e76bed Mon Sep 17 00:00:00 2001 From: amercader Date: Mon, 5 Dec 2011 11:44:17 +0000 Subject: [PATCH 02/78] [search] #1498 Support for multiple SOLR schemas A check is now made on the startup to see if the SOLR server defined in the configuration file is using a compatible version. The schema file will be retrieved from the SOLR server and the version will be extracted and check against a local list of supported versions. If the remote version is not supported, a SearchError exception will be shown. Note that the SOLR schemas files have been moved to:: ckan/config/solr Please refer to `ckan/config/solr/README.txt` for more details. --- ckan/config/environment.py | 4 + ckan/config/solr/CHANGELOG.txt | 11 ++ ckan/config/solr/README.txt | 30 ++++ ckan/config/solr/schema-1.2.xml | 162 ++++++++++++++++++ .../{schema.xml => solr/schema-1.3.xml} | 2 +- ckan/lib/search/__init__.py | 67 +++++++- ckan/tests/lib/solr/schema-no-version.xml | 7 + ckan/tests/lib/solr/schema-wrong-version.xml | 7 + ckan/tests/lib/test_solr_schema_version.py | 58 +++++++ 9 files changed, 343 insertions(+), 5 deletions(-) create mode 100644 ckan/config/solr/CHANGELOG.txt create mode 100644 ckan/config/solr/README.txt create mode 100644 ckan/config/solr/schema-1.2.xml rename ckan/config/{schema.xml => solr/schema-1.3.xml} (99%) create mode 100644 ckan/tests/lib/solr/schema-no-version.xml create mode 100644 ckan/tests/lib/solr/schema-wrong-version.xml create mode 100644 ckan/tests/lib/test_solr_schema_version.py diff --git a/ckan/config/environment.py b/ckan/config/environment.py index cc1a82ab2c1..2b30047f5d4 100644 --- a/ckan/config/environment.py +++ b/ckan/config/environment.py @@ -78,6 +78,10 @@ def find_controller(self, controller): 'ckan.site_id for SOLR search-index rebuild to work.' config['ckan.site_id'] = ckan_host + # Check if SOLR schema is compatible + from ckan.lib.search import check_solr_schema_version + check_solr_schema_version() + config['routes.map'] = make_map() config['pylons.app_globals'] = app_globals.Globals() config['pylons.h'] = ckan.lib.helpers diff --git a/ckan/config/solr/CHANGELOG.txt b/ckan/config/solr/CHANGELOG.txt new file mode 100644 index 00000000000..2b62a9157a3 --- /dev/null +++ b/ckan/config/solr/CHANGELOG.txt @@ -0,0 +1,11 @@ +CKAN SOLR schemas changelog +=========================== + +v1.3 - (ckan>=1.5.2) +-------------------- +* Use the index_id (hash of dataset id + site_id) as uniqueKey (#1430) +* Store extras (#1455) + +v1.2 - (ckan<=1.5.1) +-------------------- +* Original version diff --git a/ckan/config/solr/README.txt b/ckan/config/solr/README.txt new file mode 100644 index 00000000000..9a3bccebfac --- /dev/null +++ b/ckan/config/solr/README.txt @@ -0,0 +1,30 @@ +CKAN SOLR schemas +================= + +This folder contains the latest and previous versions of the SOLR XML +schema files used by CKAN. These can be use on the SOLR server to +override the default SOLR schema. Please note that not all schemas are +backwards compatible with old CKAN versions. Check the CHANGELOG.txt file +in this same folder to check which version of the schema should you use +depending on the CKAN version you are using. + +Developers, when pushing changes to the SOLR schema: + +* Note that updates on the schema are only release based, i.e. all changes + in the schema between releases will be part of the same new version of + the schema. + +* Name the new version of the file using the following convention:: + + schema-.xml + +* Update the `version` attribute of the `schema` tag in the new file:: + + + +* Update the SUPPORTED_SCHEMA_VERSIONS list in `ckan/lib/search/__init__.py` + Consider if the changes introduced are or are not compatible with + previous schema versions. + +* Update the CHANGELOG.txt file with the new version, the CKAN version + required and changes made to the schema. diff --git a/ckan/config/solr/schema-1.2.xml b/ckan/config/solr/schema-1.2.xml new file mode 100644 index 00000000000..2fb41dd96a1 --- /dev/null +++ b/ckan/config/solr/schema-1.2.xml @@ -0,0 +1,162 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +id +text + + + + + + + + + + + + + + + + + + + + diff --git a/ckan/config/schema.xml b/ckan/config/solr/schema-1.3.xml similarity index 99% rename from ckan/config/schema.xml rename to ckan/config/solr/schema-1.3.xml index c337aee3ba2..f83eacd2f38 100644 --- a/ckan/config/schema.xml +++ b/ckan/config/solr/schema-1.3.xml @@ -16,7 +16,7 @@ limitations under the License. --> - + diff --git a/ckan/lib/search/__init__.py b/ckan/lib/search/__init__.py index 0d03f60b0b2..e3a57df19c0 100644 --- a/ckan/lib/search/__init__.py +++ b/ckan/lib/search/__init__.py @@ -15,6 +15,8 @@ SIMPLE_SEARCH = config.get('ckan.simple_search', False) +SUPPORTED_SCHEMA_VERSIONS = ['1.3'] + DEFAULT_OPTIONS = { 'limit': 20, 'offset': 0, @@ -37,6 +39,8 @@ 'package': PackageSearchQuery } +SOLR_SCHEMA_FILE_OFFSET = '/admin/file/?file=schema.xml' + if SIMPLE_SEARCH: import sql as sql _INDICES['package'] = NoopSearchIndex @@ -83,7 +87,7 @@ def dispatch_by_operation(entity_type, entity, operation): # we really need to know about any exceptions, so reraise # (see #1172) raise - + class SynchronousSearchPlugin(SingletonPlugin): """Update the search index automatically.""" @@ -94,7 +98,7 @@ def notify(self, entity, operation): return if operation != DomainObjectOperation.deleted: dispatch_by_operation( - entity.__class__.__name__, + entity.__class__.__name__, get_action('package_show_rest')( {'model': model, 'ignore_auth': True}, {'id': entity.id} @@ -102,7 +106,7 @@ def notify(self, entity, operation): operation ) elif operation == DomainObjectOperation.deleted: - dispatch_by_operation(entity.__class__.__name__, + dispatch_by_operation(entity.__class__.__name__, {'id': entity.id}, operation) else: log.warn("Discarded Sync. indexing for: %s" % entity) @@ -112,7 +116,7 @@ def rebuild(package=None): log.debug("Rebuilding search index...") package_index = index_for(model.Package) - + if package: pkg_dict = get_action('package_show_rest')( {'model': model, 'ignore_auth': True}, @@ -157,3 +161,58 @@ def clear(): log.debug("Clearing search index...") package_index = index_for(model.Package) package_index.clear() + + +def check_solr_schema_version(schema_file=None): + ''' + Checks if the schema version of the SOLR server is compatible + with this CKAN version. + + The schema will be retrieved from the SOLR server, using the + offset defined in SOLR_SCHEMA_FILE_OFFSET + ('/admin/file/?file=schema.xml'). The schema_file parameter + allows to override this pointing to different schema file, but + it should only be used for testing purposes. + + If the CKAN instance is configured to not use SOLR or the SOLR + server is not available, the function will return False, as the + version check does not apply. If the SOLR server is available, + a SearchError exception will be thrown if the version could not + be extracted or it is not included in the supported versions list. + + :schema_file: Absolute path to an alternative schema file. Should + be only used for testing purposes (Default is None) + ''' + + if SIMPLE_SEARCH: + # Not using the SOLR search backend + return False + + if not is_available(): + # Something is wrong with the SOLR server + log.warn('Problems were found while connecting to the SOLR server') + return False + + # Try to get the schema XML file to extract the version + if not schema_file: + solr_url = config.get('solr_url', DEFAULT_SOLR_URL) + url = solr_url.strip('/') + SOLR_SCHEMA_FILE_OFFSET + else: + url = 'file://%s' % schema_file + + import urllib2 + from lxml import etree + + res = urllib2.urlopen(url) + + tree = etree.fromstring(res.read()) + + version = tree.xpath('//schema/@version') + if not len(version): + raise SearchError('Could not extract version info from the SOLR schema, using file: \n%s' % url) + version = version[0] + + if not version in SUPPORTED_SCHEMA_VERSIONS: + raise SearchError('SOLR schema version not supported: %s. Supported versions are [%s]' + % (version,', '.join(SUPPORTED_SCHEMA_VERSIONS))) + return True diff --git a/ckan/tests/lib/solr/schema-no-version.xml b/ckan/tests/lib/solr/schema-no-version.xml new file mode 100644 index 00000000000..6ea81a0fa36 --- /dev/null +++ b/ckan/tests/lib/solr/schema-no-version.xml @@ -0,0 +1,7 @@ + + + + + diff --git a/ckan/tests/lib/solr/schema-wrong-version.xml b/ckan/tests/lib/solr/schema-wrong-version.xml new file mode 100644 index 00000000000..0daed9dbd4c --- /dev/null +++ b/ckan/tests/lib/solr/schema-wrong-version.xml @@ -0,0 +1,7 @@ + + + + + diff --git a/ckan/tests/lib/test_solr_schema_version.py b/ckan/tests/lib/test_solr_schema_version.py new file mode 100644 index 00000000000..2dbc06f5bc7 --- /dev/null +++ b/ckan/tests/lib/test_solr_schema_version.py @@ -0,0 +1,58 @@ +import os +from pylons import config +from ckan.tests import TestController + +class TestSolrSchemaVersionCheck(TestController): + + @classmethod + def setup_class(cls): + + cls.root_dir = os.path.dirname(os.path.realpath(__file__)) + + def _get_current_schema(self): + + from ckan.lib.search import SUPPORTED_SCHEMA_VERSIONS + + current_version = sorted(SUPPORTED_SCHEMA_VERSIONS).pop() + + current_schema = os.path.join(self.root_dir,'..','..','config','solr','schema-%s.xml' % current_version) + + return current_schema + + def test_current_schema_exists(self): + + current_schema = self._get_current_schema() + + assert os.path.exists(current_schema) + + + def test_solr_schema_version_check(self): + + from ckan.lib.search import check_solr_schema_version, SearchError + + schema_file = self._get_current_schema() + + # Check that current schema version schema is supported + assert check_solr_schema_version(schema_file) + + # An exception is thrown if version could not be extracted + try: + schema_file = os.path.join(self.root_dir,'solr','schema-no-version.xml') + check_solr_schema_version(schema_file) + + #Should not happen + assert False + except SearchError,e: + assert 'Could not extract version info' in str(e) + + # An exception is thrown if the schema version is not supported + try: + schema_file = os.path.join(self.root_dir,'solr','schema-wrong-version.xml') + check_solr_schema_version(schema_file) + + #Should not happen + assert False + except SearchError,e: + assert 'SOLR schema version not supported' in str(e) + + From b471000db4b1b2f1e666eb9e09b0f76c2692e585 Mon Sep 17 00:00:00 2001 From: amercader Date: Mon, 5 Dec 2011 12:08:16 +0000 Subject: [PATCH 03/78] Merge branch 'defect-1430-mixed-docs-in-search-index' into feature-1498-multiple-schemas --- ckan/config/solr/schema-1.3.xml | 3 ++- ckan/lib/search/index.py | 4 ++++ ckan/tests/lib/test_solr_search_index.py | 5 +++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/ckan/config/solr/schema-1.3.xml b/ckan/config/solr/schema-1.3.xml index f83eacd2f38..f0ce00ded62 100644 --- a/ckan/config/solr/schema-1.3.xml +++ b/ckan/config/solr/schema-1.3.xml @@ -91,6 +91,7 @@ + @@ -138,7 +139,7 @@ -id +index_id text diff --git a/ckan/lib/search/index.py b/ckan/lib/search/index.py index d62f9b015b7..e0969ae4107 100644 --- a/ckan/lib/search/index.py +++ b/ckan/lib/search/index.py @@ -126,6 +126,10 @@ def index_package(self, pkg_dict): # mark this CKAN instance as data source: pkg_dict['site_id'] = config.get('ckan.site_id') + # add a unique index_id to avoid conflicts + import hashlib + pkg_dict['index_id'] = hashlib.md5('%s%s' % (pkg_dict['id'],config.get('ckan.site_id'))).hexdigest() + # send to solr: try: conn.add_many([pkg_dict]) diff --git a/ckan/tests/lib/test_solr_search_index.py b/ckan/tests/lib/test_solr_search_index.py index dd59f4dc5cd..18435a69e94 100644 --- a/ckan/tests/lib/test_solr_search_index.py +++ b/ckan/tests/lib/test_solr_search_index.py @@ -1,3 +1,4 @@ +import hashlib import socket import solr from pylons import config @@ -47,6 +48,9 @@ def teardown_class(cls): def teardown(self): # clear the search index after every test search.index_for('Package').clear() + + def _get_index_id(self,pkg_id): + return hashlib.md5('%s%s' % (pkg_id,config['ckan.site_id'])).hexdigest() def test_index(self): pkg_dict = { @@ -57,6 +61,7 @@ def test_index(self): search.dispatch_by_operation('Package', pkg_dict, 'new') response = self.solr.query('title:penguin', fq=self.fq) assert len(response) == 1, len(response) + assert response.results[0]['index_id'] == self._get_index_id (pkg_dict['id']) assert response.results[0]['title'] == 'penguin' def test_no_state_not_indexed(self): From cedfb917978b08a74a13283a3f9a626cd7570012 Mon Sep 17 00:00:00 2001 From: amercader Date: Mon, 5 Dec 2011 15:22:21 +0000 Subject: [PATCH 04/78] [search] Index creatiaon and modification date As the SOLR schema needs to be modified for the new release, we might as well introduce this small change that will allow the long overdue ticket #191 (Search by modification date). --- ckan/config/solr/CHANGELOG.txt | 1 + ckan/config/solr/schema-1.3.xml | 11 +++++++---- ckan/lib/search/index.py | 8 +++++++- ckan/tests/lib/test_solr_search_index.py | 16 ++++++++++++++-- 4 files changed, 29 insertions(+), 7 deletions(-) diff --git a/ckan/config/solr/CHANGELOG.txt b/ckan/config/solr/CHANGELOG.txt index 2b62a9157a3..860f3f0729a 100644 --- a/ckan/config/solr/CHANGELOG.txt +++ b/ckan/config/solr/CHANGELOG.txt @@ -5,6 +5,7 @@ v1.3 - (ckan>=1.5.2) -------------------- * Use the index_id (hash of dataset id + site_id) as uniqueKey (#1430) * Store extras (#1455) +* Store dataset creation and modification date (#191) v1.2 - (ckan<=1.5.1) -------------------- diff --git a/ckan/config/solr/schema-1.3.xml b/ckan/config/solr/schema-1.3.xml index f0ce00ded62..bc22441ad7f 100644 --- a/ckan/config/solr/schema-1.3.xml +++ b/ckan/config/solr/schema-1.3.xml @@ -91,15 +91,15 @@ - - - + + + - + @@ -133,6 +133,9 @@ + + + diff --git a/ckan/lib/search/index.py b/ckan/lib/search/index.py index e0969ae4107..8ae7390256a 100644 --- a/ckan/lib/search/index.py +++ b/ckan/lib/search/index.py @@ -122,7 +122,13 @@ def index_package(self, pkg_dict): pkg_dict[TYPE_FIELD] = PACKAGE_TYPE pkg_dict = dict([(k.encode('ascii', 'ignore'), v) for (k, v) in pkg_dict.items()]) - + + # modify dates (SOLR is quite picky with dates, and only accepts ISO dates + # with UTC time (i.e trailing Z) + # See http://lucene.apache.org/solr/api/org/apache/solr/schema/DateField.html + pkg_dict['metadata_created'] += 'Z' + pkg_dict['metadata_modified'] += 'Z' + # mark this CKAN instance as data source: pkg_dict['site_id'] = config.get('ckan.site_id') diff --git a/ckan/tests/lib/test_solr_search_index.py b/ckan/tests/lib/test_solr_search_index.py index 18435a69e94..176e2df6c42 100644 --- a/ckan/tests/lib/test_solr_search_index.py +++ b/ckan/tests/lib/test_solr_search_index.py @@ -1,3 +1,4 @@ +from datetime import datetime import hashlib import socket import solr @@ -53,16 +54,25 @@ def _get_index_id(self,pkg_id): return hashlib.md5('%s%s' % (pkg_id,config['ckan.site_id'])).hexdigest() def test_index(self): + + datetime_now = datetime.now() pkg_dict = { 'id': u'penguin-id', 'title': u'penguin', - 'state': u'active' + 'state': u'active', + 'metadata_created': datetime_now.isoformat(), + 'metadata_modified': datetime_now.isoformat(), } search.dispatch_by_operation('Package', pkg_dict, 'new') response = self.solr.query('title:penguin', fq=self.fq) assert len(response) == 1, len(response) assert response.results[0]['index_id'] == self._get_index_id (pkg_dict['id']) assert response.results[0]['title'] == 'penguin' + + # looks like solrpy messes with microseconds and time zones, + # so ignore them for testing + assert datetime_now.strftime('%Y-%m-%d %H:%M:%S') == response.results[0]['metadata_created'].strftime('%Y-%m-%d %H:%M:%S') + assert datetime_now.strftime('%Y-%m-%d %H:%M:%S') == response.results[0]['metadata_modified'].strftime('%Y-%m-%d %H:%M:%S') def test_no_state_not_indexed(self): pkg_dict = { @@ -76,7 +86,9 @@ def test_index_clear(self): pkg_dict = { 'id': u'penguin-id', 'title': u'penguin', - 'state': u'active' + 'state': u'active', + 'metadata_created': datetime.now().isoformat(), + 'metadata_modified': datetime.now().isoformat(), } search.dispatch_by_operation('Package', pkg_dict, 'new') response = self.solr.query('title:penguin', fq=self.fq) From 5784b181392fff43b8490082da1007a3b8e42738 Mon Sep 17 00:00:00 2001 From: amercader Date: Mon, 5 Dec 2011 16:50:00 +0000 Subject: [PATCH 05/78] [search] Fix versions in schemas CHANGELOG --- ckan/config/solr/CHANGELOG.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ckan/config/solr/CHANGELOG.txt b/ckan/config/solr/CHANGELOG.txt index 860f3f0729a..5fe664fc8f8 100644 --- a/ckan/config/solr/CHANGELOG.txt +++ b/ckan/config/solr/CHANGELOG.txt @@ -1,12 +1,12 @@ CKAN SOLR schemas changelog =========================== -v1.3 - (ckan>=1.5.2) +v1.3 - (ckan>=1.5.1) -------------------- * Use the index_id (hash of dataset id + site_id) as uniqueKey (#1430) * Store extras (#1455) * Store dataset creation and modification date (#191) -v1.2 - (ckan<=1.5.1) +v1.2 - (ckan<=1.5) -------------------- * Original version From 0d9f45bd37781bcba3ad36978c967916e970d1bf Mon Sep 17 00:00:00 2001 From: amercader Date: Mon, 5 Dec 2011 19:19:24 +0000 Subject: [PATCH 06/78] [doc,search] Update docs with Solr deployment details and conventions --- doc/configuration.rst | 4 +++- doc/index.rst | 1 + doc/install-from-package.rst | 8 +++----- doc/install-from-source.rst | 35 +---------------------------------- 4 files changed, 8 insertions(+), 40 deletions(-) diff --git a/doc/configuration.rst b/doc/configuration.rst index 9d0b38ff68f..45fcaf47d13 100644 --- a/doc/configuration.rst +++ b/doc/configuration.rst @@ -378,6 +378,8 @@ a single CKAN instance then this can be ignored. .. index:: single: solr_url +.. _solr_url: + solr_url ^^^^^^^^ @@ -385,7 +387,7 @@ Example:: solr_url = http://solr.okfn.org:8983/solr -This configures the Solr server used for search. The SOLR schema must be the one in ``ckan/config/schema.xml``. +This configures the Solr server used for search. The SOLR schema must be one of the ones in ``ckan/config/solr`` (generally the last one). Optionally, ``solr_user`` and ``solr_password`` can also be passed along to specify HTTP Basic authentication details for all Solr requests. diff --git a/doc/index.rst b/doc/index.rst index 5b6863f7276..a5539df13c0 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -18,6 +18,7 @@ Contents: install-from-package install-from-source post-installation + solr-setup theming loading_data paster diff --git a/doc/install-from-package.rst b/doc/install-from-package.rst index 1fff3462316..aadeedf7985 100644 --- a/doc/install-from-package.rst +++ b/doc/install-from-package.rst @@ -642,12 +642,10 @@ You can now proceed to :doc:`post-installation`. If you don't do this and you install different versions of the same Python packages into the different pyenvs in ``/var/lib/ckan`` for each instance, there is a chance the CKAN instances might use the wrong package. + + If you want to make sure that you CKAN instances are using different Solr indexes, you can + configure Solr to run in multi-core mode. See :ref:`solr-multi-core` for more details. - The CKAN team have also recently had difficulties with CKAN instances writing - over each other's Solr search indexes. These have been documented in `ticket - #1430 `_. If you run into the same problems - send an email to `ckan-dev `_. - CKAN packaging is well tested and reliable with single instance CKAN installs. Multi-instance support is newer, and whilst we believe will work well, hasn't had the same degree of testing. If you hit any problems with multi-instance diff --git a/doc/install-from-source.rst b/doc/install-from-source.rst index 9c75ed7dc48..6d53bd6ab2f 100644 --- a/doc/install-from-source.rst +++ b/doc/install-from-source.rst @@ -250,21 +250,7 @@ Install the Source 10. Setup Solr. - Edit the jetty config file (/etc/default/jetty by default on Ubuntu), - changing the following: - - :: - - NO_START=0 # (line 4) - JETTY_HOST=127.0.0.1 # (line 15) - JETTY_PORT=8983 # (line 18) - - Then replace Solr's schema.xml file with a symlink to the one in the CKAN source (Note: The path ``~/pyenv/src/ckan/ckan/config/schema.xml`` will probably need to be to be adjusted for your system. Also ensure it is an absolute path.) - - :: - - sudo mv /usr/share/solr/conf/schema.xml /usr/share/solr/conf/schema.xml.bak - sudo ln -s ~/pyenv/src/ckan/ckan/config/schema.xml /usr/share/solr/conf/schema.xml + Set up Solr following the instructions on :ref:`solr-single` or :ref:`solr-multi-core` depending on your needs. Set appropriate values for the ``ckan.site_id`` and ``solr_url`` config variables in your CKAN config file: @@ -273,25 +259,6 @@ Install the Source ckan.site_id=my_ckan_instance solr_url=http://127.0.0.1:8983/solr - You should now be able to start Solr: - - :: - - sudo service jetty start - - .. note:: If you get the message `Could not start Jetty servlet engine because no Java Development Kit (JDK) was found.` then you will have to edit /etc/profile and add this line to the end such as this to the end (adjusting the path for your machine's jdk install):: - - JAVA_HOME=/usr/lib/jvm/java-6-openjdk-amd64/ - - Now run:: - - export JAVA_HOME - sudo service jetty start - - - Now you should check Solr is running ok by browsing: http://localhost:8983/solr/ - - For more information on Solr setup and configuration, see the CKAN wiki: http://wiki.ckan.net/Solr_Search 11. Run the CKAN webserver. From 49684a00d565691e67a5db59f07e0a5475da1bfd Mon Sep 17 00:00:00 2001 From: amercader Date: Tue, 6 Dec 2011 09:55:17 +0000 Subject: [PATCH 07/78] [doc,search] Add missing doc file --- doc/solr-setup.rst | 189 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 189 insertions(+) create mode 100644 doc/solr-setup.rst diff --git a/doc/solr-setup.rst b/doc/solr-setup.rst new file mode 100644 index 00000000000..acfe65db335 --- /dev/null +++ b/doc/solr-setup.rst @@ -0,0 +1,189 @@ +=============== +Setting up Solr +=============== + +CKAN uses Solr_ as search platform. This document describes different +topics related with the deployment and management of Solr from a CKAN +point of view. + +.. _Solr: http://lucene.apache.org/solr/ + +CKAN uses customized schema files that take into account its specific +search needs. Different versions of the schema file are found in +``ckan/ckan/config/solr`` + +The following instructions apply to Ubuntu 10.04 (Lucid), the supported +platform by the CKAN team. Other versions or distributions may need +slightly different instructions. + +.. _solr-single: + +Single Solr instance +-------------------- + +In this case, there will be only one Solr endpoint that uses a single schema file. +This can be useful for a Solr server used by only a single CKAN instance, or +different instances that share the same schema version. + +To install Solr (if you are following the :doc:`install-from-source` or +:doc:`install-from-package` instructions, you already did this):: + + sudo apt-get install solr-jetty + +You'll need to edit the Jetty configuration file (`/etc/default/jetty`) with the +suitable values:: + + NO_START=0 # (line 4) + JETTY_HOST=127.0.0.1 # (line 15) + JETTY_PORT=8983 # (line 18) + +Start the Jetty server:: + + sudo service jetty start + +You should see welcome page from Solr when visiting (replace localhost with your +server address if needed):: + + http://localhost:8983/solr/ + +and the admin site:: + + http://localhost:8983/solr/admin + + +.. note:: If you get the message `Could not start Jetty servlet engine because no Java Development Kit (JDK) was found.` then you will have to edit /etc/profile and add this line to the end such as this to the end (adjusting the path for your machine's jdk install):: + + JAVA_HOME=/usr/lib/jvm/java-6-openjdk-amd64/ + +Now run:: + + export JAVA_HOME + sudo service jetty start + + + +This default setup will use the following locations in your file system: + +* `/usr/share/solr`: Solr home, with a symlink pointing to the configuration dir in `/etc`. +* `/etc/solr/conf`: Solr configuration files. The more important ones are `schema.xml` and `solrconfig.xml`. +* `/var/lib/solr/data/`: This is where the index files are physically stored. + +You will obviously need to replace the default `schema.xml` file with the CKAN one. To do +so, create a symbolic link to the schema file in the config folder. Use the latest schema version +supported by the CKAN version you are installing (it will generally be the highest one):: + + sudo mv /etc/solr/conf/schema.xml /etc/solr/conf/schema.xml.bak + sudo ln -s ~/ckan/ckan/config/solr/schema-1.3.xml /etc/solr/conf/schema.xml + +Restart jetty and check that Solr is still working. + + +.. _solr-multi-core: + +Multiple Solr cores +------------------- + +Solr can also be set up to have multiple configurations and indexes on the +same instance. This is specially useful when you want other applications than CKAN +or different CKAN versions to use the same Solr instance. The different cores +will have different paths in the Solr server URL:: + + http://localhost:8983/solr/schema-1.2 # Used by CKAN up to 1.5 + http://localhost:8983/solr/schema-1.3 # Used by CKAN 1.5.1 and upwards + http://localhost:8983/solr/some-other-site # Used by another site + +To set up a multicore Solr instance, repeat the steps on the previous section +to configure a single Solr instance. + +Create a `solr.xml` file in `/usr/share/solr`. This file will list the +different cores, and allows also to define some configuration options. +This is how cores are defined:: + + + + + + + + + + + + +Note that each core has its own data directory. This is really important to +prevent conflicts between cores. + +For each core, we will create a folder with its name in `/usr/share/solr`, +with a symbolic link to a specific configuration folder in `/etc/solr/`. +Copy the existing conf directory to the core directory and link it from +the home dir like this:: + + + sudo mkdir /etc/solr/core0 + sudo mv /etc/solr/conf /etc/solr/core0/ + + sudo mkdir /usr/share/solr/core0 + sudo ln -s /etc/solr/core0/conf /usr/share/solr/core0/conf + +Once you have your first core configured, to create new ones, you just need to +add them to the `solr.xml` file and copy the existing configuration dir:: + + sudo mkdir /etc/solr/core1 + sudo cp -R /etc/solr/core0/conf /etc/solr/core1 + + sudo mkdir /usr/share/solr/core1 + sudo ln -s /etc/solr/core1/conf /usr/share/solr/core1/conf + +After configuring the cores, restart Jetty and visit:: + + http://localhost:8983/solr + +You should see a list of links to the admin sites for the different Solr cores. + +**Note**: You should check that the `` directive in the `solrconfig.xml` +file (located in the config dir) points to the correct location. The best thing +to do is use the `dataDir` variable that we defined in `solr.xml` to ensure +that cores are using the right data directory:: + + ${dataDir} + + +Handling changes in the CKAN schema +----------------------------------- + +At some point, changes in new CKAN versions will mean modifications in the schema +to support new features or fix defects. These changes won't be always backwards +compatible, so some changes in the Solr servers will need to be performed. + +If a CKAN instance is using a Solr server for itself, the schema can just be updated +on the Solr server and the index rebuilt. But if a Solr server is shared between +different CKAN instances, there may be conflicts if the schema is updated. + +CKAN uses the following conventions for supporting different schemas: + +* If needed, create a new schema file when releasing a new version of CKAN (i.e if there + are two or more different modifications in the schema file between CKAN releases, + only one new schema file is created). + +* Keep different versions of the Solr schema in the CKAN source, with a naming convention, + `schema-.xml`:: + + ckan/config/solr/schema-1.2.xml + ckan/config/solr/schema-1.3.xml + +* Each new version of the schema file must include its version in the main `` tag:: + + + +* Solr servers used by more than one CKAN instance should be configured as multiple cores, + and provide a core for each schema version needed. The cores should be named following the + convention `schema-`, e.g.:: + + http:///solr/schema-1.2/ + http:///solr/schema-1.3/ + +When a new version of the schema becomes available, a new core is created, with a link to the +latest schema.xml file in the CKAN source. That way, CKAN instances that use an older version +of the schema can still point to the core that uses it, while more recent versions can point +to the latest one. When old versions of CKAN are updated, they only need to change their +:ref:`solr_url` setting to point to the suitable Solr core. From 0a95c6b3ad96659a75a158195a50b281dfafd20a Mon Sep 17 00:00:00 2001 From: Tom Rees Date: Tue, 6 Dec 2011 17:48:47 +0000 Subject: [PATCH 08/78] [groups][xs]: Changed navbar text when not logged in. --- ckan/templates/group/layout.html | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ckan/templates/group/layout.html b/ckan/templates/group/layout.html index 64153aa871e..a5204e7af2c 100644 --- a/ckan/templates/group/layout.html +++ b/ckan/templates/group/layout.html @@ -26,7 +26,8 @@ ${h.subnav_link(c, h.icon('group') + _('List Groups'), controller='group', action='index')}
  • - ${h.subnav_link(c, h.icon('group_add') + _('Add a Group'), controller='group', action='new')} + + ${h.subnav_link(c, h.icon('group_add') + _('Login to Add a Group'), controller='group', action='new')}
  • From acf1b0a08d8dd8bc4d7a66c26bc71b077d4c090a Mon Sep 17 00:00:00 2001 From: amercader Date: Thu, 8 Dec 2011 12:16:17 +0000 Subject: [PATCH 09/78] [doc,search] Solr docs: minor change in conventions, add troubleshooting --- doc/solr-setup.rst | 41 +++++++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/doc/solr-setup.rst b/doc/solr-setup.rst index acfe65db335..c000723af8b 100644 --- a/doc/solr-setup.rst +++ b/doc/solr-setup.rst @@ -50,7 +50,6 @@ and the admin site:: http://localhost:8983/solr/admin - .. note:: If you get the message `Could not start Jetty servlet engine because no Java Development Kit (JDK) was found.` then you will have to edit /etc/profile and add this line to the end such as this to the end (adjusting the path for your machine's jdk install):: JAVA_HOME=/usr/lib/jvm/java-6-openjdk-amd64/ @@ -61,7 +60,6 @@ Now run:: sudo service jetty start - This default setup will use the following locations in your file system: * `/usr/share/solr`: Solr home, with a symlink pointing to the configuration dir in `/etc`. @@ -88,8 +86,8 @@ same instance. This is specially useful when you want other applications than CK or different CKAN versions to use the same Solr instance. The different cores will have different paths in the Solr server URL:: - http://localhost:8983/solr/schema-1.2 # Used by CKAN up to 1.5 - http://localhost:8983/solr/schema-1.3 # Used by CKAN 1.5.1 and upwards + http://localhost:8983/solr/ckan-schema-1.2 # Used by CKAN up to 1.5 + http://localhost:8983/solr/ckan-schema-1.3 # Used by CKAN 1.5.1 and upwards http://localhost:8983/solr/some-other-site # Used by another site To set up a multicore Solr instance, repeat the steps on the previous section @@ -147,6 +145,37 @@ that cores are using the right data directory:: ${dataDir} +Troubleshooting +--------------- + +Solr requests and errors are logged in the web server log. + +* For jetty servers, they are located in:: + + /var/log/jetty/.stderrout.log + +* For Tomcat servers, they are located in:: + + /var/log/tomcat6/catalina..log + +Some problems that can be found during the install: + +* When setting up a multi-core Solr instance, no cores are shown when visiting the + Solr index page, and the admin interface returns a 404 error. + + Check the web server error log if you can find an error similar to this one:: + + WARNING: [iatiregistry.org] Solr index directory '/usr/share/solr/iatiregistry.org/data/index' doesn't exist. Creating new index... + 07-Dec-2011 18:06:33 org.apache.solr.common.SolrException log + SEVERE: java.lang.RuntimeException: Cannot create directory: /usr/share/solr/iatiregistry.org/data/index + [...] + + The dataDir is not properly configured. With our setup the data directory should + be under `/var/lib/solr/data`. Make sure that you defined the correct `dataDir` + in the `solr.xml` file and that in the `solrconfig.xml` file you have the + following configuration option:: + + ${dataDir} Handling changes in the CKAN schema ----------------------------------- @@ -179,8 +208,8 @@ CKAN uses the following conventions for supporting different schemas: and provide a core for each schema version needed. The cores should be named following the convention `schema-`, e.g.:: - http:///solr/schema-1.2/ - http:///solr/schema-1.3/ + http:///solr/ckan-schema-1.2/ + http:///solr/ckan-schema-1.3/ When a new version of the schema becomes available, a new core is created, with a link to the latest schema.xml file in the CKAN source. That way, CKAN instances that use an older version From 2bc06e3434a6d75c481e5107371b52afd152060c Mon Sep 17 00:00:00 2001 From: amercader Date: Thu, 8 Dec 2011 16:59:02 +0000 Subject: [PATCH 10/78] [search] #1498 Support basic authorization when checking Solr schema version --- ckan/lib/search/__init__.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/ckan/lib/search/__init__.py b/ckan/lib/search/__init__.py index e3a57df19c0..30e033e522e 100644 --- a/ckan/lib/search/__init__.py +++ b/ckan/lib/search/__init__.py @@ -184,6 +184,8 @@ def check_solr_schema_version(schema_file=None): be only used for testing purposes (Default is None) ''' + import urllib2 + if SIMPLE_SEARCH: # Not using the SOLR search backend return False @@ -195,16 +197,27 @@ def check_solr_schema_version(schema_file=None): # Try to get the schema XML file to extract the version if not schema_file: + solr_user = config.get('solr_user') + solr_password = config.get('solr_password') + + http_auth = None + if solr_user is not None and solr_password is not None: + http_auth = solr_user + ':' + solr_password + http_auth = 'Basic ' + http_auth.encode('base64').strip() + solr_url = config.get('solr_url', DEFAULT_SOLR_URL) url = solr_url.strip('/') + SOLR_SCHEMA_FILE_OFFSET + + req = urllib2.Request(url = url) + if http_auth: + req.add_header('Authorization',http_auth) + + res = urllib2.urlopen(req) else: url = 'file://%s' % schema_file + res = urllib2.urlopen(url) - import urllib2 from lxml import etree - - res = urllib2.urlopen(url) - tree = etree.fromstring(res.read()) version = tree.xpath('//schema/@version') From e1f68e1c46a7f038b55d12c368aa57a0dd96788e Mon Sep 17 00:00:00 2001 From: David Read Date: Fri, 9 Dec 2011 12:25:15 +0000 Subject: [PATCH 11/78] [release] Set version to 1.5.1c. This branch is aiming for CKAN release 1.5.1. (Original branch 1.5.1 went wrong, so this is the corrected branch.) --- ckan/__init__.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/ckan/__init__.py b/ckan/__init__.py index bf43d695595..016ee448325 100644 --- a/ckan/__init__.py +++ b/ckan/__init__.py @@ -1,17 +1,18 @@ -__version__ = '1.5.1b' +__version__ = '1.5.1c' __description__ = 'Comprehensive Knowledge Archive Network (CKAN) Software' __long_description__ = \ -'''The CKAN software is used to run the Comprehensive Knowledge Archive -Network (CKAN) site: http://www.ckan.net. +'''CKAN software provides a hub for datasets. The flagship site running CKAN +is theDataHub.org but it is also used for dozens of other open data websites +run by governments, agencies and citizens. -The Comprehensive Knowledge Archive Network is a registry of open -knowledge packages and projects (and a few closed ones). CKAN is the -place to search for open knowledge resources as well as register your -own - be that a set of Shakespeare's works, a global population density -database, the voting records of MPs, or 30 years of US patents. +CKAN provides a place to search for open knowledge resources as well as +register your own - be that a set of Shakespeare's works, a global +population density database, the voting records of MPs, or 30 years of +US patents. -Those familiar with freshmeat or CPAN can think of CKAN as providing an -analogous service for open knowledge. +CKAN is an abbreviation for 'Comprehensive Knowledge Archive Network'. +Those familiar with Freshmeat or CPAN can think of CKAN as providing an +analogous service for open data and knowledge. ''' __license__ = 'AGPL' From 57e770ebfd064c3e6c08af0d25b08f236f7cceff Mon Sep 17 00:00:00 2001 From: David Read Date: Thu, 1 Dec 2011 16:31:51 +0000 Subject: [PATCH 12/78] [lib,controllers]: #1504 Fix for blank data_dict in action api. --- ckan/controllers/api.py | 7 +++++++ ckan/lib/base.py | 7 ++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/ckan/controllers/api.py b/ckan/controllers/api.py index 87979aabb7a..276d3a75dfc 100644 --- a/ckan/controllers/api.py +++ b/ckan/controllers/api.py @@ -148,6 +148,13 @@ def action(self, logic_function): log.error('Bad request data: %s' % str(inst)) return self._finish_bad_request( gettext('JSON Error: %s') % str(inst)) + if not isinstance(request_data, dict): + # this occurs if request_data is blank + log.error('Bad request data - not dict: %r' % request_data) + return self._finish_bad_request( + gettext('Bad request data: %s') % \ + 'Request data JSON decoded to %r but ' \ + 'it needs to be a dictionary.' % request_data) try: result = function(context, request_data) return_dict['success'] = True diff --git a/ckan/lib/base.py b/ckan/lib/base.py index 1b4bcf64194..deaf19536d4 100644 --- a/ckan/lib/base.py +++ b/ckan/lib/base.py @@ -182,12 +182,13 @@ def _get_request_data(cls): try: request_data = json.loads(request_data, encoding='utf8') except ValueError, e: - raise ValueError, 'Error parsing JSON data. ' \ + raise ValueError, 'Error decoding JSON data. ' \ 'Error: %r ' \ - 'JSON (Decoded and re-encoded): %r' % \ + 'JSON data extracted from the request: %r' % \ (e, request_data) if not isinstance(request_data, dict): - raise ValueError, "Request params must be in form of a json encoded dictionary." + raise ValueError, 'Request data JSON decoded to %r but ' \ + 'it needs to be a dictionary.' % request_data # ensure unicode values for key, val in request_data.items(): # if val is str then assume it is ascii, since json converts From b43e028ee4051336c211b97e83475a78579fa31a Mon Sep 17 00:00:00 2001 From: David Read Date: Thu, 1 Dec 2011 16:32:43 +0000 Subject: [PATCH 13/78] [cli]: Fix for CLI load-only command that was broken in last commit. --- ckan/lib/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckan/lib/cli.py b/ckan/lib/cli.py index 8f657362d0e..a0c518845b3 100644 --- a/ckan/lib/cli.py +++ b/ckan/lib/cli.py @@ -105,7 +105,7 @@ def command(self): elif cmd == 'load': self.load(cmd) elif cmd == 'load-only': - self.load(cmd, only_load=True) + self.load(only_load=True) elif cmd == 'simple-dump-csv': self.simple_dump_csv(cmd) elif cmd == 'simple-dump-json': From bb4fd64dba235f144f9681db1092ba9f35c8c02e Mon Sep 17 00:00:00 2001 From: David Read Date: Thu, 1 Dec 2011 17:17:18 +0000 Subject: [PATCH 14/78] [controller,tests]: #1505 Fix catching of search errors in package_search Action API. --- ckan/tests/functional/api/test_action.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ckan/tests/functional/api/test_action.py b/ckan/tests/functional/api/test_action.py index 601af4542f3..381c5381aa2 100644 --- a/ckan/tests/functional/api/test_action.py +++ b/ckan/tests/functional/api/test_action.py @@ -933,4 +933,3 @@ def test_3_bad_param(self): status=400) assert '"message": "Search Query is invalid:' in res.body, res.body assert '"Invalid search parameters: [u\'weird_param\']' in res.body, res.body - From c790b8268aee1525d1a15ad0610ab6c139bbb7fe Mon Sep 17 00:00:00 2001 From: David Read Date: Thu, 1 Dec 2011 17:56:21 +0000 Subject: [PATCH 15/78] [lib,doc][xs]: Slightly improved SOLR search errors. --- ckan/lib/search/query.py | 11 +++++++++-- doc/apiv3.rst | 1 + 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/ckan/lib/search/query.py b/ckan/lib/search/query.py index 68497007734..9082162eb7a 100644 --- a/ckan/lib/search/query.py +++ b/ckan/lib/search/query.py @@ -232,6 +232,7 @@ def run(self, query): May raise SearchQueryError or SearchError. ''' + from solr import SolrException assert isinstance(query, (dict, MultiDict)) # check that query keys are valid if not set(query.keys()) <= VALID_SOLR_PARAMETERS: @@ -281,9 +282,15 @@ def run(self, query): # query['qf'] = query.get('qf', QUERY_FIELDS) conn = make_connection() + log.debug('Package query: %r' % query) + + try: + solr_response = conn.raw_query(**query) + except SolrException, e: + raise SearchError('SOLR returned an error running query: %r Error: %r' % + (query, e.reason)) try: - log.debug('Package query: %r' % query) - data = json.loads(conn.raw_query(**query)) + data = json.loads(solr_response) response = data['response'] self.count = response.get('numFound', 0) self.results = response.get('docs', []) diff --git a/doc/apiv3.rst b/doc/apiv3.rst index d2741cf2da2..92acef984be 100644 --- a/doc/apiv3.rst +++ b/doc/apiv3.rst @@ -370,6 +370,7 @@ These parameters are all the standard SOLR syntax (in contrast to the syntax use +-----------------------+---------------+----------------------------------+----------------------------------+ | sort | field name, || sort=name asc | Changes the sort order according | | | asc / dec | | to the field and direction given.| +| | | | default: score desc, name asc | +-----------------------+---------------+----------------------------------+----------------------------------+ | start, rows | result-int | start=40&rows=20 | Pagination options. Start is the | | | (defaults: | | number of the first result and | From e2402d3ec882f562ef290a0cd9267cc14c547519 Mon Sep 17 00:00:00 2001 From: David Read Date: Thu, 1 Dec 2011 18:10:01 +0000 Subject: [PATCH 16/78] [tests][xs]: Fix test broken in ca8a11649. --- ckan/tests/functional/api/test_action.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ckan/tests/functional/api/test_action.py b/ckan/tests/functional/api/test_action.py index 381c5381aa2..33363cca1f0 100644 --- a/ckan/tests/functional/api/test_action.py +++ b/ckan/tests/functional/api/test_action.py @@ -923,7 +923,8 @@ def test_2_bad_param(self): res = self.app.post('/api/action/package_search', params=postparams, status=409) assert '"message": "Search error:' in res.body, res.body - assert 'Search error: HTTP code=400, Reason=Missing sort order.' in res.body, res.body + assert 'SOLR returned an error' in res.body, res.body + assert 'Missing sort order' in res.body, res.body def test_3_bad_param(self): postparams = '%s=1' % json.dumps({ From ecbff1c8aaf5f36a56b87a808f236763ad49abf7 Mon Sep 17 00:00:00 2001 From: Ian Murray Date: Mon, 21 Nov 2011 09:32:13 +0000 Subject: [PATCH 17/78] [types][sqlite/datetime check] Check whether the engine itself is sqlite Rather than checking the config file. --- ckan/model/meta.py | 5 +++++ ckan/model/types.py | 4 +++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/ckan/model/meta.py b/ckan/model/meta.py index b3da7f231c7..8e48e595a34 100644 --- a/ckan/model/meta.py +++ b/ckan/model/meta.py @@ -114,3 +114,8 @@ def after_rollback(self, session): # names, you'll need a metadata for each database metadata = MetaData() +def engine_is_sqlite(): + """ + Returns true iff the engine is connected to a sqlite database. + """ + return engine.url.drivername == 'sqlite' diff --git a/ckan/model/types.py b/ckan/model/types.py index 9639ccfad34..ff6f904389d 100644 --- a/ckan/model/types.py +++ b/ckan/model/types.py @@ -5,6 +5,8 @@ from pylons import config +from ckan.model import meta + def make_uuid(): return unicode(uuid.uuid4()) @@ -83,7 +85,7 @@ def iso_date_to_datetime_for_sqlite(datetime_or_iso_date_if_sqlite): # to call this to convert it into a datetime type. When running on # postgres then you have a datetime anyway, so this function doesn't # do anything. - if config['sqlalchemy.url'].startswith('sqlite:'): + if meta.engine_is_sqlite(): return datetime.datetime.strptime(datetime_or_iso_date_if_sqlite, '%Y-%m-%d %H:%M:%S.%f') else: From 6ae89dbeda489c91652a6f0d606604859bb4a527 Mon Sep 17 00:00:00 2001 From: Ross Jones Date: Mon, 5 Dec 2011 20:15:02 +0000 Subject: [PATCH 18/78] [model][xs] Cope with sqlite on my machine returns a datetime instead of a string. --- ckan/model/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckan/model/types.py b/ckan/model/types.py index ff6f904389d..bd5e5948d52 100644 --- a/ckan/model/types.py +++ b/ckan/model/types.py @@ -85,7 +85,7 @@ def iso_date_to_datetime_for_sqlite(datetime_or_iso_date_if_sqlite): # to call this to convert it into a datetime type. When running on # postgres then you have a datetime anyway, so this function doesn't # do anything. - if meta.engine_is_sqlite(): + if meta.engine_is_sqlite() and isinstance(datetime_or_iso_date_if_sqlite, basestring): return datetime.datetime.strptime(datetime_or_iso_date_if_sqlite, '%Y-%m-%d %H:%M:%S.%f') else: From 29f6d03946cd12656b1b75b2e17d10873c9ccafc Mon Sep 17 00:00:00 2001 From: John Glover Date: Wed, 30 Nov 2011 20:10:13 +0000 Subject: [PATCH 19/78] [xs, 1501] Fix group, user, tag pagination broken when map.explicit=True was set (15.11.11) --- ckan/lib/alphabet_paginate.py | 2 +- ckan/lib/create_test_data.py | 15 ++++-- ckan/lib/helpers.py | 13 ++++- ckan/tests/functional/test_pagination.py | 64 ++++++++++++++++++++++++ 4 files changed, 89 insertions(+), 5 deletions(-) create mode 100644 ckan/tests/functional/test_pagination.py diff --git a/ckan/lib/alphabet_paginate.py b/ckan/lib/alphabet_paginate.py index af7939b2436..c467ee57158 100644 --- a/ckan/lib/alphabet_paginate.py +++ b/ckan/lib/alphabet_paginate.py @@ -58,7 +58,7 @@ def pager(self, q=None): letters = [char for char in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'] + [self.other_text] for letter in letters: if letter != page: - page = HTML.a(class_='pager_link', href=url_for(page=letter), c=letter) + page = HTML.a(class_='pager_link', href=url_for(controller='tag', action='index', page=letter), c=letter) else: page = HTML.span(class_='pager_curpage', c=letter) pages.append(page) diff --git a/ckan/lib/create_test_data.py b/ckan/lib/create_test_data.py index ef305fe771e..ebb86244bd8 100644 --- a/ckan/lib/create_test_data.py +++ b/ckan/lib/create_test_data.py @@ -110,6 +110,7 @@ def create_arbitrary(cls, package_dicts, relationships=[], model.Session.remove() new_user_names = extra_user_names new_group_names = set() + new_groups = {} rev = model.repo.new_revision() rev.author = cls.author @@ -172,9 +173,17 @@ def create_arbitrary(cls, package_dicts, relationships=[], for group_name in group_names: group = model.Group.by_name(unicode(group_name)) if not group: - group = model.Group(name=unicode(group_name)) - model.Session.add(group) - new_group_names.add(group_name) + if not group_name in new_groups: + group = model.Group(name=unicode(group_name)) + model.Session.add(group) + new_group_names.add(group_name) + new_groups[group_name] = group + else: + # If adding multiple packages with the same group name, + # model.Group.by_name will not find the group as the + # session has not yet been committed at this point. + # Fetch from the new_groups dict instead. + group = new_groups[group_name] pkg.groups.append(group) elif attr == 'license': pkg.license_id = val diff --git a/ckan/lib/helpers.py b/ckan/lib/helpers.py index e15b679d74d..3030cf68ac4 100644 --- a/ckan/lib/helpers.py +++ b/ckan/lib/helpers.py @@ -15,6 +15,7 @@ from webhelpers import paginate from webhelpers.text import truncate import webhelpers.date as date +from pylons import url from pylons.decorators.cache import beaker_cache from routes import url_for, redirect_to from alphabet_paginate import AlphaPage @@ -237,10 +238,20 @@ def gravatar(email_hash, size=100, default="mm"): class Page(paginate.Page): - + + def _page_url(self, page, partial=None, **kwargs): + routes_dict = url.environ['pylons.routes_dict'] + kwargs['controller'] = routes_dict['controller'] + kwargs['action'] = routes_dict['action'] + if routes_dict.get('id'): + kwargs['id'] = routes_dict['id'] + kwargs['page'] = page + return url(**kwargs) + # Curry the pager method of the webhelpers.paginate.Page class, so we have # our custom layout set as default. def pager(self, *args, **kwargs): + self._url_generator = self._page_url kwargs.update( format=u"
    $link_previous ~2~ $link_next
    ", symbol_previous=u'« Prev', symbol_next=u'Next »' diff --git a/ckan/tests/functional/test_pagination.py b/ckan/tests/functional/test_pagination.py new file mode 100644 index 00000000000..0a611772b64 --- /dev/null +++ b/ckan/tests/functional/test_pagination.py @@ -0,0 +1,64 @@ +from ckan.lib.create_test_data import CreateTestData +import ckan.model as model +from ckan.tests import TestController, url_for + +class TestPagination(TestController): + @classmethod + def setup_class(cls): + model.repo.init_db() + + # no. entities per page is hardcoded into the controllers, so + # create enough of each here so that we can test pagination + cls.num_groups = 21 + cls.num_packages_in_large_group = 51 + cls.num_users = 21 + + groups = [u'group_%s' % str(i).zfill(2) for i in range(1, cls.num_groups)] + users = [u'user_%s' % str(i).zfill(2) for i in range(cls.num_users)] + packages = [] + for i in range(cls.num_packages_in_large_group): + packages.append({ + 'name': u'package_%s' % str(i).zfill(2), + 'groups': u'group_00' + }) + + CreateTestData.create_arbitrary( + packages, extra_group_names=groups, extra_user_names = users, + ) + + @classmethod + def teardown_class(self): + model.repo.rebuild_db() + + def test_group_index(self): + res = self.app.get(url_for(controller='group', action='index')) + assert 'href="/group?page=2"' in res + assert 'href="/group/group_19"' in res + + res = self.app.get(url_for(controller='group', action='index', page=2)) + assert 'href="/group?page=1"' in res + assert 'href="/group/group_20"' in res + + def test_group_read(self): + res = self.app.get(url_for(controller='group', action='read', id='group_00')) + assert 'href="/group/group_00?page=2' in res + + # TODO: enable this check when #1502 is fixed + # assert 'href="/dataset/package_49"' in res + + res = self.app.get(url_for(controller='group', action='read', id='group_00', page=2)) + assert 'href="/group/group_00?page=1' in res + + # TODO: enable this check when #1502 is fixed + # assert 'href="/dataset/package_50"' in res + + def test_users_index(self): + # allow for 2 extra users shown on user listing, 'logged_in' and 'visitor' + res = self.app.get(url_for(controller='user', action='index')) + assert 'href="/user/user_18"' in res + assert 'href="/user?q=&order_by=name&page=2"' in res + + res = self.app.get(url_for(controller='user', action='index', page=2)) + assert 'href="/user/user_20"' in res + assert 'href="/user?q=&order_by=name&page=1"' in res + From 66102b8b70592422f3744a4580dfc25f53e0646f Mon Sep 17 00:00:00 2001 From: John Glover Date: Wed, 30 Nov 2011 20:48:49 +0000 Subject: [PATCH 20/78] [xs, 1502] Bug fix: list group packages by name --- ckan/logic/action/get.py | 5 +++-- ckan/tests/functional/test_pagination.py | 8 ++------ ckan/tests/functional/test_user.py | 2 ++ 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 8339039c7b9..394fc722227 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -432,11 +432,12 @@ def group_package_show(context, data_dict): .filter(model.PackageRevision.current==True)\ .join(model.PackageGroup, model.PackageGroup.package_id==model.PackageRevision.id)\ .join(model.Group, model.Group.id==model.PackageGroup.group_id)\ - .filter_by(id=group.id) + .filter_by(id=group.id)\ + .order_by(model.PackageRevision.name) - query = query.order_by(model.package_revision_table.c.revision_timestamp.desc()) if limit: query = query.limit(limit) + pack_rev = query.all() return _package_list_with_resources(context, pack_rev) diff --git a/ckan/tests/functional/test_pagination.py b/ckan/tests/functional/test_pagination.py index 0a611772b64..b87f1fc6555 100644 --- a/ckan/tests/functional/test_pagination.py +++ b/ckan/tests/functional/test_pagination.py @@ -42,15 +42,11 @@ def test_group_index(self): def test_group_read(self): res = self.app.get(url_for(controller='group', action='read', id='group_00')) assert 'href="/group/group_00?page=2' in res - - # TODO: enable this check when #1502 is fixed - # assert 'href="/dataset/package_49"' in res + assert 'href="/dataset/package_49"' in res res = self.app.get(url_for(controller='group', action='read', id='group_00', page=2)) assert 'href="/group/group_00?page=1' in res - - # TODO: enable this check when #1502 is fixed - # assert 'href="/dataset/package_50"' in res + assert 'href="/dataset/package_50"' in res def test_users_index(self): # allow for 2 extra users shown on user listing, 'logged_in' and 'visitor' diff --git a/ckan/tests/functional/test_user.py b/ckan/tests/functional/test_user.py index 9fbff19a6ec..c0a2ad19ba6 100644 --- a/ckan/tests/functional/test_user.py +++ b/ckan/tests/functional/test_user.py @@ -32,6 +32,8 @@ def setup_class(self): @classmethod def teardown_class(self): + # clear routes 'id' so that next test to run doesn't get it + self.app.get(url_for(controller='user', action='login', id=None)) SmtpServerHarness.teardown_class() model.repo.rebuild_db() From 0a2323316551620daaffcc66eaf292ffc10114f6 Mon Sep 17 00:00:00 2001 From: John Glover Date: Wed, 7 Dec 2011 14:10:16 +0000 Subject: [PATCH 21/78] [tasks, logic] Use a local session object in task_status_update so we can commit it at the end of the function Part of the bug fixes for #1483. Our Celery tasks will be expecting the logic layer function to work like this. --- ckan/logic/action/update.py | 9 ++++++--- ckan/model/__init__.py | 3 ++- ckan/model/meta.py | 29 +++++++++++++---------------- 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/ckan/logic/action/update.py b/ckan/logic/action/update.py index 461f0810175..5bd99b8bc94 100644 --- a/ckan/logic/action/update.py +++ b/ckan/logic/action/update.py @@ -366,6 +366,9 @@ def user_update(context, data_dict): def task_status_update(context, data_dict): model = context['model'] + session = model.meta.create_local_session() + context['session'] = session + user = context['user'] id = data_dict.get("id") schema = context.get('schema') or default_task_status_schema() @@ -382,13 +385,13 @@ def task_status_update(context, data_dict): data, errors = validate(data_dict, schema, context) if errors: - model.Session.rollback() + session.rollback() raise ValidationError(errors, task_status_error_summary(errors)) task_status = task_status_dict_save(data, context) - if not context.get('defer_commit'): - model.Session.commit() + session.commit() + session.close() return task_status_dictize(task_status, context) def task_status_update_many(context, data_dict): diff --git a/ckan/model/__init__.py b/ckan/model/__init__.py index f4b952cf7d3..4ee2e3fea16 100644 --- a/ckan/model/__init__.py +++ b/ckan/model/__init__.py @@ -37,6 +37,7 @@ def init_model(engine): '''Call me before using any of the tables or classes in the model''' meta.Session.remove() meta.Session.configure(bind=engine) + meta.create_local_session.configure(bind=engine) meta.engine = engine meta.metadata.bind = engine # sqlalchemy migrate version table @@ -46,7 +47,7 @@ def init_model(engine): except sqlalchemy.exc.NoSuchTableError: pass - + class Repository(vdm.sqlalchemy.Repository): migrate_repository = ckan.migration.__path__[0] diff --git a/ckan/model/meta.py b/ckan/model/meta.py index 8e48e595a34..2872e8f1133 100644 --- a/ckan/model/meta.py +++ b/ckan/model/meta.py @@ -90,22 +90,19 @@ def after_rollback(self, session): # SQLAlchemy database engine. Updated by model.init_model() engine = None -if sqav.startswith("0.4"): - # SQLAlchemy session manager. Updated by model.init_model() - Session = scoped_session(sessionmaker( - autoflush=False, - transactional=True, - extension=[CkanSessionExtension(), - extension.PluginSessionExtension()], - )) -else: - Session = scoped_session(sessionmaker( - autoflush=False, - autocommit=False, - expire_on_commit=False, - extension=[CkanSessionExtension(), - extension.PluginSessionExtension()], - )) +Session = scoped_session(sessionmaker( + autoflush=False, + autocommit=False, + expire_on_commit=False, + extension=[CkanSessionExtension(), extension.PluginSessionExtension()], +)) + +create_local_session = sessionmaker( + autoflush=False, + autocommit=False, + expire_on_commit=False, + extension=[CkanSessionExtension(), extension.PluginSessionExtension()], +) #mapper = Session.mapper mapper = orm.mapper From a61959fbde828bbc8f6d7102de479bc4d6ee92f2 Mon Sep 17 00:00:00 2001 From: John Glover Date: Tue, 13 Dec 2011 11:21:29 +0000 Subject: [PATCH 22/78] [xs, 1546] Bug fix for Package.metadata_created --- ckan/model/package.py | 10 ++++------ ckan/tests/models/test_package.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/ckan/model/package.py b/ckan/model/package.py index 33b86de4a0b..f6a41315218 100644 --- a/ckan/model/package.py +++ b/ckan/model/package.py @@ -502,14 +502,12 @@ def metadata_modified(self): @property def metadata_created(self): import ckan.model as model - q = model.Session.query(model.Revision).select_from( - model.revision_table.join( - model.package_revision_table, - and_(model.revision_table.c.id==model.package_revision_table.c.revision_id)) - ).order_by(model.Revision.timestamp.asc()) + q = model.Session.query(model.PackageRevision)\ + .filter(model.PackageRevision.id == self.id)\ + .order_by(model.PackageRevision.revision_timestamp.asc()) ts = q.first() if ts is not None: - return ts.timestamp + return ts.revision_timestamp @staticmethod def get_fields(core_only=False, fields_to_ignore=None): diff --git a/ckan/tests/models/test_package.py b/ckan/tests/models/test_package.py index 5381153c95c..dbc682b2290 100644 --- a/ckan/tests/models/test_package.py +++ b/ckan/tests/models/test_package.py @@ -111,6 +111,34 @@ def test_as_dict(self): assert_equal(out['notes'], pkg.notes) assert_equal(out['notes_rendered'], '

    A great package [HTML_REMOVED] like package:pollution_stats\n

    ') + def test_metadata_created_and_modified(self): + # create a new package + name = "test_metadata" + rev = model.repo.new_revision() + package = model.Package(name=name) + model.Session.add(package) + model.Session.flush() + revision_id = model.Session().revision.id + created_timestamp = model.Session().revision.timestamp + model.repo.commit_and_remove() + + package = model.Package.by_name(name) + assert package.metadata_created == created_timestamp,\ + (package.metadata_created, created_timestamp) + assert package.metadata_modified == created_timestamp,\ + (package.metadata_modified, created_timestamp) + + # update the package it + rev = model.repo.new_revision() + package = model.Package.by_name(name) + package.title = "test_metadata_new_title" + modified_timestamp = model.Session().revision.timestamp + model.repo.commit_and_remove() + + package = model.Package.by_name(name) + assert package.metadata_created == created_timestamp + assert package.metadata_modified == modified_timestamp + class TestPackageWithTags: """ From 9e86c8bb14afb2fde94ca7529c4c400d9bf0c491 Mon Sep 17 00:00:00 2001 From: David Read Date: Thu, 15 Dec 2011 13:18:28 +0000 Subject: [PATCH 23/78] [controller,templates] #1532 Add guidance for users trying to login with OpenID when they have not registered it with a CKAN user account yet. --- ckan/controllers/user.py | 3 ++- ckan/templates/user/login.html | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/ckan/controllers/user.py b/ckan/controllers/user.py index 035d4b4aa6b..6a96c93ccfa 100644 --- a/ckan/controllers/user.py +++ b/ckan/controllers/user.py @@ -259,7 +259,8 @@ def logged_in(self): h.flash_success(_("%s is now logged in") % user_dict['display_name']) return self.me() else: - h.flash_error('Login failed. Bad username or password.') + h.flash_error('Login failed. Bad username or password.' + \ + ' (Or if using OpenID, it hasn\'t been associated with an user account.)') h.redirect_to(controller='user', action='login') def logged_out(self): diff --git a/ckan/templates/user/login.html b/ckan/templates/user/login.html index a7ddd1efaa5..2b6fe7148e2 100644 --- a/ckan/templates/user/login.html +++ b/ckan/templates/user/login.html @@ -45,6 +45,7 @@
    Login using Open ID +

    NB: To set-up your OpenID for this site, you first need to Register and then edit your Profile to provide your OpenID.

    Please click your account provider:

    From 2c595ae142bb9919000b3a1e8962f19f5a4c84d1 Mon Sep 17 00:00:00 2001 From: David Read Date: Thu, 15 Dec 2011 17:14:52 +0000 Subject: [PATCH 24/78] [model]: #1546/#191 Fix for SOLR indexing of new packages, broken in a619. --- ckan/model/package.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ckan/model/package.py b/ckan/model/package.py index f6a41315218..30ddec6ada1 100644 --- a/ckan/model/package.py +++ b/ckan/model/package.py @@ -481,8 +481,11 @@ def last_modified(*av): model.package_tag_table.c.revision_id == model.revision_table.c.id, *where) ] query = union(*[select([model.revision_table.c.timestamp], x) for x in where_clauses] - ).order_by("timestamp DESC").limit(1) - conn = model.meta.engine.connect() + ).order_by('timestamp DESC').limit(1) + # Use current connection because we might be in a 'before_commit' of + # a SessionExtension - only by using the current connection can we get + # at the newly created revision etc. objects. + conn = model.Session.connection() result = conn.execute(query).fetchone() if result: result_datetime = iso_date_to_datetime_for_sqlite(result[0]) From aeff7e5c476e46fee0b0d8fbbe754f8766681324 Mon Sep 17 00:00:00 2001 From: David Read Date: Thu, 15 Dec 2011 17:16:25 +0000 Subject: [PATCH 25/78] [release]: Changelog started. --- CHANGELOG.txt | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 2c4d92aa7aa..c7fbdee3d1d 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,6 +1,19 @@ CKAN CHANGELOG ++++++++++++++ +v1.5.1 XXXX-XX-XX +================= + +Minor: + * Account creation disallowed with Open ID (create account in CKAN first) (#1386) + * Web UI improvements: + * Resource editing refreshed + * 'Task status' table added (#1363) - a step towards caching resource downloads (#1397) + + +Bug fixes: + * Dataset property metadata_created was wrong (since XXX) (#1546) + v1.5 2011-11-07 =============== Major: From bbaba5024f189a1548a1b01c3f4e8741df672e05 Mon Sep 17 00:00:00 2001 From: amercader Date: Mon, 28 Nov 2011 13:56:09 +0000 Subject: [PATCH 26/78] Cherry-picked branch 'feature-1490-standard-package-output-from-logic-layer' --- ckan/lib/dictization/model_dictize.py | 5 +++++ ckan/logic/action/get.py | 14 +++++++++----- ckan/model/package.py | 1 + ckan/tests/lib/test_dictization.py | 1 + 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/ckan/lib/dictization/model_dictize.py b/ckan/lib/dictization/model_dictize.py index 8adb48ac60d..675d47f1712 100644 --- a/ckan/lib/dictization/model_dictize.py +++ b/ckan/lib/dictization/model_dictize.py @@ -155,6 +155,11 @@ def package_dictize(pkg, context): q = select([rel_rev]).where(rel_rev.c.object_package_id == pkg.id) result = _execute_with_revision(q, rel_rev, context) result_dict["relationships_as_object"] = obj_list_dictize(result, context) + #isopen + # Get an actual Package object, not a PackageRevision + pkg_object = model.Package.get(pkg.id) + result_dict['isopen'] = pkg_object.isopen if isinstance(pkg_object.isopen,bool) else pkg_object.isopen() + return result_dict def group_dictize(group, context): diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 394fc722227..b5a3e1a504b 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -438,8 +438,11 @@ def group_package_show(context, data_dict): if limit: query = query.limit(limit) - pack_rev = query.all() - return _package_list_with_resources(context, pack_rev) + result = [] + for pkg_rev in query.all(): + result.append(package_dictize(pkg_rev,context)) + + return result def tag_show(context, data_dict): '''Shows tag details''' @@ -457,9 +460,11 @@ def tag_show(context, data_dict): check_access('tag_show',context, data_dict) tag_dict = tag_dictize(tag,context) + extended_packages = [] for package in tag_dict['packages']: - extended_packages.append(_extend_package_dict(package,context)) + pkg = model.Package.get(package['id']) + extended_packages.append(package_dictize(pkg,context)) tag_dict['packages'] = extended_packages @@ -682,8 +687,7 @@ def package_search(context, data_dict): log.warning('package %s in index but not in database' % package) continue - result_dict = table_dictize(pkg, context) - result_dict = _extend_package_dict(result_dict,context) + result_dict = package_dictize(pkg,context) results.append(result_dict) return { diff --git a/ckan/model/package.py b/ckan/model/package.py index 30ddec6ada1..d854bca097d 100644 --- a/ckan/model/package.py +++ b/ckan/model/package.py @@ -189,6 +189,7 @@ def as_dict(self, ref_package_by='name', ref_group_by='name'): # Set 'license' in _dict to cater for old clients. # Todo: Remove from Version 2? _dict['license'] = self.license.title if self.license else _dict.get('license_id', '') + _dict['isopen'] = self.isopen() tags = [tag.name for tag in self.tags] tags.sort() # so it is determinable _dict['tags'] = tags diff --git a/ckan/tests/lib/test_dictization.py b/ckan/tests/lib/test_dictization.py index e63d716c6cf..9ddd5c6aa5e 100644 --- a/ckan/tests/lib/test_dictization.py +++ b/ckan/tests/lib/test_dictization.py @@ -43,6 +43,7 @@ def setup_class(cls): 'name': u'roger', 'state': u'active', 'title': u"Roger's books"}], + 'isopen': True, 'license_id': u'other-open', 'maintainer': None, 'maintainer_email': None, From 9e1e82ab452118f898545af7ce4f715b8c69c9e7 Mon Sep 17 00:00:00 2001 From: David Read Date: Fri, 16 Dec 2011 10:50:15 +0000 Subject: [PATCH 27/78] [doc][xs]: Mention the common pyc problem. --- doc/common-error-messages.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/doc/common-error-messages.rst b/doc/common-error-messages.rst index bdbe7d6b295..a0e58313982 100644 --- a/doc/common-error-messages.rst +++ b/doc/common-error-messages.rst @@ -118,3 +118,10 @@ This suggests that the config file specified with the paster ``--config`` parame =========================================== This occurs when trying to ``import migrate.exceptions`` and is due to the version of sqlalchemy-migrate being used is too old - check the requires files for the version needed. + +``AssertionError: There is no script for 46 version`` +===================================================== + +This sort of message may be seen if you swap between different branches of CKAN. The .pyc file for database migration 46 exists, but the .py file no longer exists by swapping to an earlier branch. The solution is to delete all pyc files (which is harmless):: + + find . -name "*.pyc" |xargs rm \ No newline at end of file From 93a278a88aab8403761bd960f24f60012e93045d Mon Sep 17 00:00:00 2001 From: James Gardner Date: Fri, 16 Dec 2011 12:39:04 +0000 Subject: [PATCH 28/78] [packaging] Apache reloads instead of restarts, and there is a more accurate database existence check --- ckan_deb/usr/bin/ckan-create-instance | 4 ++-- ckan_deb/usr/lib/ckan/common.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ckan_deb/usr/bin/ckan-create-instance b/ckan_deb/usr/bin/ckan-create-instance index 2c42ae3d563..13f04730509 100755 --- a/ckan_deb/usr/bin/ckan-create-instance +++ b/ckan_deb/usr/bin/ckan-create-instance @@ -144,8 +144,8 @@ echo "Bringing the ${INSTANCE} INSTANCE out of maintenance mode ..." ckan_maintenance_off ${INSTANCE} # Restart Apache so it is aware of any changes -echo "Restarting apache ..." -/etc/init.d/apache2 restart +echo "Reloading apache ..." +/etc/init.d/apache2 reload # Install the new crontab echo "Enabling crontab for the ckan${INSTANCE} user ..." diff --git a/ckan_deb/usr/lib/ckan/common.sh b/ckan_deb/usr/lib/ckan/common.sh index e2a64194a2a..46a6059fe4d 100644 --- a/ckan_deb/usr/lib/ckan/common.sh +++ b/ckan_deb/usr/lib/ckan/common.sh @@ -151,7 +151,7 @@ ckan_ensure_db_exists () { exit 1 else INSTANCE=$1 - COMMAND_OUTPUT=`sudo -u postgres psql -l` + COMMAND_OUTPUT=`sudo -u postgres psql -c "select datname from pg_database where datname='$INSTANCE'"` if ! [[ "$COMMAND_OUTPUT" =~ ${INSTANCE} ]] ; then echo "Creating the database ..." sudo -u postgres createdb -O ${INSTANCE} ${INSTANCE} From 5aabc429feb44fbfd425c291f0587158311652f5 Mon Sep 17 00:00:00 2001 From: rgrp Date: Tue, 13 Dec 2011 07:40:33 +0000 Subject: [PATCH 29/78] [bugfix,upload][xs]: fixes #1518 by replacing ' ' with '-' in filenames of uploaded files. --- ckan/public/scripts/vendor/ckanjs/1.0.0/ckanjs.js | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ckan/public/scripts/vendor/ckanjs/1.0.0/ckanjs.js b/ckan/public/scripts/vendor/ckanjs/1.0.0/ckanjs.js index b3b440a30ce..8f415687b93 100755 --- a/ckan/public/scripts/vendor/ckanjs/1.0.0/ckanjs.js +++ b/ckan/public/scripts/vendor/ckanjs/1.0.0/ckanjs.js @@ -1661,12 +1661,15 @@ this.CKAN.View || (this.CKAN.View = {}); // (Could add userid/username and/or a small random string to reduce // collisions but chances seem very low already) makeUploadKey: function(fileName) { + // google storage replaces ' ' with '+' which breaks things + // See http://trac.ckan.org/ticket/1518 for more. + var corrected = fileName.replace(/ /g, '-'); // note that we put hh mm ss as hhmmss rather hh:mm:ss (former is 'basic // format') var now = new Date(); // replace ':' with nothing var str = this.ISODateString(now).replace(':', '').replace(':', ''); - return str + '/' + fileName; + return str + '/' + corrected; }, updateFormData: function(key) { From 9850afc7ed22930de6ca212a9a65fa44775333ef Mon Sep 17 00:00:00 2001 From: David Read Date: Fri, 16 Dec 2011 17:23:12 +0000 Subject: [PATCH 30/78] [cli]: #1552 Specify password as param when creating a user. Not the best code, but works. --- ckan/lib/cli.py | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/ckan/lib/cli.py b/ckan/lib/cli.py index a0c518845b3..198e18aff79 100644 --- a/ckan/lib/cli.py +++ b/ckan/lib/cli.py @@ -411,14 +411,16 @@ class UserCmd(CkanCommand): user - lists users user list - lists users user - shows user properties - user add [] - add a user (prompts for password) + user add [apikey=] [password=] + - add a user (prompts for password if + not supplied) user setpass - set user password (prompts) user remove - removes user from users user search - searches for a user name ''' summary = __doc__.split('\n')[0] usage = __doc__ - max_args = 3 + max_args = 4 min_args = 0 def command(self): @@ -510,12 +512,38 @@ def add(self): print 'Need name of the user.' return username = self.args[1] - apikey = self.args[2] if len(self.args) > 2 else None - password = self.password_prompt() user = model.User.by_name(unicode(username)) if user: print 'User "%s" already found' % username sys.exit(1) + + # parse args + apikey = None + password = None + args = self.args[2:] + if len(args) == 1 and not (args[0].startswith('password') or \ + args[0].startswith('apikey')): + # continue to support the old syntax of just supplying + # the apikey + apikey = args[0] + else: + # new syntax: password=foo apikey=bar + for arg in args: + split = arg.find('=') + if split == -1: + split = arg.find(' ') + if split == -1: + raise ValueError('Could not parse arg: %r (expected "--