From 7fcd8349f364c95a1b6c1b11efd38b60ecdbf105 Mon Sep 17 00:00:00 2001 From: Toby Date: Tue, 20 Mar 2012 11:10:56 +0000 Subject: [PATCH 1/7] fix for connection loss on redis.flush --- ckan/config/middleware.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ckan/config/middleware.py b/ckan/config/middleware.py index 8ed5ec0f47b..cf1415a7727 100644 --- a/ckan/config/middleware.py +++ b/ckan/config/middleware.py @@ -220,6 +220,8 @@ def _start_response(status, response_headers, exc_info=None): self.redis_connection = self.redis.StrictRedis() self.redis_connection.flushdb() except self.redis_exception: + # Connection may have failed at flush so clear it. + self.redis_connection = None return self.app(environ, start_response) # If cached return cached result From 41a665710653e96107b905c335a1ecfdcd95b092 Mon Sep 17 00:00:00 2001 From: Toby Date: Tue, 20 Mar 2012 12:31:04 +0000 Subject: [PATCH 2/7] only worry about ckan/auth_tkt cookies --- ckan/config/middleware.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/ckan/config/middleware.py b/ckan/config/middleware.py index cf1415a7727..c07948a51a8 100644 --- a/ckan/config/middleware.py +++ b/ckan/config/middleware.py @@ -204,12 +204,18 @@ def _start_response(status, response_headers, exc_info=None): return start_response(status, response_headers, exc_info) # Only use cache for GET requests - # If there is a cookie we avoid the cache. # REMOTE_USER is used by some tests. - if environ['REQUEST_METHOD'] != 'GET' or environ.get('HTTP_COOKIE') or \ - environ.get('REMOTE_USER'): + if environ['REQUEST_METHOD'] != 'GET' or environ.get('REMOTE_USER'): return self.app(environ, start_response) + # If there is a ckan cookie (or auth_tkt) we avoid the cache. + # We want to allow other cookies like google analytics ones :( + cookie_string = environ.get('HTTP_COOKIE') + if cookie_string: + for cookie in cookie_string.split(';'): + if cookie.startswith('ckan') or cookie.startswith('auth_tkt'): + return self.app(environ, start_response) + # Make our cache key key = 'page:%s?%s' % (environ['PATH_INFO'], environ['QUERY_STRING']) From 7535aefb6285ad9147cdc1f791cb691fa2dcbcf5 Mon Sep 17 00:00:00 2001 From: Toby Date: Tue, 20 Mar 2012 13:00:18 +0000 Subject: [PATCH 3/7] remove auth_tkt if possible plus added comments to cookie removal code --- ckan/lib/base.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/ckan/lib/base.py b/ckan/lib/base.py index ae984abb92e..cf1ab99b007 100644 --- a/ckan/lib/base.py +++ b/ckan/lib/base.py @@ -180,17 +180,24 @@ def __call__(self, environ, start_response): # the request is routed to. This routing information is # available in environ['pylons.routes_dict'] - # clean out any old cookies as they may contain api keys etc + # Clean out any old cookies as they may contain api keys etc + # This also improves the cachability of our pages as cookies + # prevent proxy servers from caching content unless they have + # been configured to ignore them. for cookie in request.cookies: - if cookie.startswith('ckan') and cookie not in ['ckan', 'ckan_killtopbar']: + if cookie.startswith('ckan') and cookie not in ['ckan']: response.delete_cookie(cookie) - - if cookie == 'ckan' and not c.user and not h.are_there_flash_messages(): + # Remove the ckan session cookie if not used e.g. logged out + elif cookie == 'ckan' and not c.user and not h.are_there_flash_messages(): if session.id: if not session.get('lang'): session.delete() else: response.delete_cookie(cookie) + # Remove auth_tkt repoze.who cookie if user not logged in. + elif cookie == 'auth_tkt' and not session.id: + response.delete_cookie(cookie) + try: return WSGIController.__call__(self, environ, start_response) finally: From 10cfd16841366fc7f6f4fc38d2b027868f0cb1ef Mon Sep 17 00:00:00 2001 From: amercader Date: Tue, 20 Mar 2012 18:25:28 +0000 Subject: [PATCH 4/7] [#1616][search] Improvements on the search index CLI Several fixes and new options for the `search-index` paster command: * Add -o option to only reindex datasets not already indexed * Add -i option to ignore exceptions when rebuilding * Add -r option to just refresh the index (not clearing it first) * Fix show command to show the index stored for a dataset * Add support for clearing the index of just one dataset --- ckan/lib/cli.py | 113 +++++++++++++++++++++++------------- ckan/lib/search/__init__.py | 80 ++++++++++++++++++++----- ckan/lib/search/index.py | 4 +- ckan/lib/search/query.py | 84 ++++++++++++++++++--------- ckan/tests/lib/test_cli.py | 68 +++++++++++++++++++++- doc/paster.rst | 54 ++++++++++++----- 6 files changed, 299 insertions(+), 104 deletions(-) diff --git a/ckan/lib/cli.py b/ckan/lib/cli.py index 3605b04b7bc..d6a40630bf2 100644 --- a/ckan/lib/cli.py +++ b/ckan/lib/cli.py @@ -8,12 +8,12 @@ from paste.script.util.logging_config import fileConfig import re -class MockTranslator(object): - def gettext(self, value): - return value +class MockTranslator(object): + def gettext(self, value): + return value - def ugettext(self, value): - return value + def ugettext(self, value): + return value def ungettext(self, singular, plural, n): if n > 1: @@ -54,13 +54,13 @@ def _load_config(self): self.registry.register(pylons.translator, self.translator_obj) def _setup_app(self): - cmd = paste.script.appinstall.SetupCommand('setup-app') - cmd.run([self.filename]) + cmd = paste.script.appinstall.SetupCommand('setup-app') + cmd.run([self.filename]) class ManageDb(CkanCommand): '''Perform various tasks on the database. - + db create # alias of db upgrade db init # create and put in default data db clean @@ -82,7 +82,7 @@ class ManageDb(CkanCommand): max_args = None min_args = 1 - def command(self): + def command(self): self._load_config() from ckan import model import ckan.lib.search as search @@ -170,7 +170,7 @@ def _postgres_load(self, filepath): self._run_cmd(pg_cmd) def _run_cmd(self, command_line): - import subprocess + import subprocess retcode = subprocess.call(command_line, shell=True) if retcode != 0: raise SystemError('Command exited with errorcode: %i' % retcode) @@ -196,7 +196,7 @@ def load(self, only_load=False): print 'Upgrading DB' from ckan import model model.repo.upgrade_db() - + print 'Rebuilding search index' import ckan.lib.search ckan.lib.search.rebuild() @@ -270,10 +270,10 @@ class SearchIndexCommand(CkanCommand): '''Creates a search index for all datasets Usage: - search-index rebuild [package-name] - reindex package-name if given, if not then rebuild full search index (all packages) - search-index check - checks for packages not indexed - search-index show {package-name} - shows index of a package - search-index clear - clears the search index for this ckan instance + search-index [-i] [-o] [-r] rebuild [dataset-name] - reindex dataset-name if given, if not then rebuild full search index (all datasets) + search-index check - checks for datasets not indexed + search-index show {dataset-name} - shows index of a dataset + search-index clear [dataset-name] - clears the search index for the provided dataset or for the whole ckan instance ''' summary = __doc__.split('\n')[0] @@ -281,36 +281,71 @@ class SearchIndexCommand(CkanCommand): max_args = 2 min_args = 0 + def __init__(self,name): + + super(SearchIndexCommand,self).__init__(name) + + self.parser.add_option('-i', '--force', dest='force', + action='store_true', default=False, help='Ignore exceptions when rebuilding the index') + + self.parser.add_option('-o', '--only-missing', dest='only_missing', + action='store_true', default=False, help='Index non indexed datasets only') + + self.parser.add_option('-r', '--refresh', dest='refresh', + action='store_true', default=False, help='Refresh current index (does not clear the existing one)') + def command(self): self._load_config() - from ckan.lib.search import rebuild, check, show, clear if not self.args: # default to printing help print self.usage return - cmd = self.args[0] + cmd = self.args[0] if cmd == 'rebuild': - if len(self.args) > 1: - rebuild(self.args[1]) - else: - rebuild() + self.rebuild() elif cmd == 'check': - check() + self.check() elif cmd == 'show': - if not len(self.args) == 2: - import pdb; pdb.set_trace() - self.args - show(self.args[1]) + self.show() elif cmd == 'clear': - clear() + self.clear() else: print 'Command %s not recognized' % cmd + def rebuild(self): + from ckan.lib.search import rebuild + + if len(self.args) > 1: + rebuild(self.args[1]) + else: + rebuild(only_missing=self.options.only_missing, + force=self.options.force, + refresh=self.options.refresh) + def check(self): + from ckan.lib.search import check + + check() + + def show(self): + from ckan.lib.search import show + + if not len(self.args) == 2: + print 'Missing parameter: dataset-name' + return + index = show(self.args[1]) + pprint(index) + + def clear(self): + from ckan.lib.search import clear + + package_id =self.args[1] if len(self.args) > 1 else None + clear(package_id) + class Notification(CkanCommand): '''Send out modification notifications. - + In "replay" mode, an update signal is sent for each dataset in the database. Usage: @@ -332,7 +367,7 @@ def command(self): cmd = 'replay' else: cmd = self.args[0] - + if cmd == 'replay': dome = DomainObjectModificationExtension() for package in Session.query(Package): @@ -466,12 +501,12 @@ def get_user_str(self, user): if user.name != user.display_name: user_str += ' display=%s' % user.display_name return user_str - + def list(self): from ckan import model print 'Users:' users = model.Session.query(model.User) - print 'count = %i' % users.count() + print 'count = %i' % users.count() for user in users: print self.get_user_str(user) @@ -484,7 +519,7 @@ def show(self): def setpass(self): from ckan import model - + if len(self.args) < 2: print 'Need name of the user.' return @@ -524,7 +559,7 @@ def password_prompt(cls): def add(self): from ckan import model - + if len(self.args) < 2: print 'Need name of the user.' return @@ -561,10 +596,10 @@ def add(self): if not password: password = self.password_prompt() - + print('Creating user: %r' % username) - + user_params = {'name': unicode(username), 'password': password} if apikey: @@ -641,7 +676,7 @@ def _get_dataset(self, dataset_ref): dataset = model.Package.get(unicode(dataset_ref)) assert dataset, 'Could not find dataset matching reference: %r' % dataset_ref return dataset - + def show(self, dataset_ref): from ckan import model import pprint @@ -670,7 +705,7 @@ def purge(self, dataset_ref): dataset.purge() model.repo.commit_and_remove() print '%s purged' % name - + class Celery(CkanCommand): '''Run celery daemon @@ -686,7 +721,7 @@ def command(self): os.environ['CKAN_CONFIG'] = os.path.abspath(self.options.config) from ckan.lib.celery_app import celery celery.worker_main(argv=['celeryd', '--loglevel=INFO']) - + class Ratings(CkanCommand): '''Manage the ratings stored in the db @@ -721,7 +756,7 @@ def count(self): q = model.Session.query(model.Rating) print "%i ratings" % q.count() q = q.filter(model.Rating.user_id == None) - print "of which %i are anonymous ratings" % q.count() + print "of which %i are anonymous ratings" % q.count() def clean(self, user_ratings=True): from ckan import model diff --git a/ckan/lib/search/__init__.py b/ckan/lib/search/__init__.py index 7137a18a1ba..874da65d059 100644 --- a/ckan/lib/search/__init__.py +++ b/ckan/lib/search/__init__.py @@ -13,6 +13,17 @@ log = logging.getLogger(__name__) +import sys +import cgitb +import warnings +def text_traceback(): + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + res = 'the original traceback:'.join( + cgitb.text(sys.exc_info()).split('the original traceback:')[1:] + ).strip() + return res + SIMPLE_SEARCH = config.get('ckan.simple_search', False) SUPPORTED_SCHEMA_VERSIONS = ['1.3'] @@ -111,29 +122,61 @@ def notify(self, entity, operation): else: log.warn("Discarded Sync. indexing for: %s" % entity) -def rebuild(package=None): +def rebuild(package_id=None,only_missing=False,force=False,refresh=False): + ''' + Rebuilds the search index. + + If a dataset id is provided, only this dataset will be reindexed. + When reindexing all datasets, if only_missing is True, only the + datasets not already indexed will be processed. If force equals + True, if an execption is found, the exception will be logged, but + the process will carry on. + ''' from ckan import model log.debug("Rebuilding search index...") package_index = index_for(model.Package) - if package: + if package_id: pkg_dict = get_action('package_show_rest')( {'model': model, 'ignore_auth': True, 'api_version':1}, - {'id': package} + {'id': package_id} ) package_index.remove_dict(pkg_dict) package_index.insert_dict(pkg_dict) else: - # rebuild index - package_index.clear() - for pkg in model.Session.query(model.Package).filter(model.Package.state == 'active').all(): - package_index.insert_dict( - get_action('package_show_rest')( - {'model': model, 'ignore_auth': True, 'api_version':1}, - {'id': pkg.id} + package_ids = [r[0] for r in model.Session.query(model.Package.id).filter(model.Package.state == 'active').all()] + if only_missing: + log.debug('Indexing only missing packages...') + package_query = query_for(model.Package) + indexed_pkg_ids = set(package_query.get_all_entity_ids(max_results=len(package_ids))) + package_ids = set(package_ids) - indexed_pkg_ids # Packages not indexed + + if len(package_ids) == 0: + log.debug('All datasets are already indexed') + return + else: + log.debug('Rebuilding the whole index...') + # When refreshing, the index is not previously cleared + if not refresh: + package_index.clear() + + for pkg_id in package_ids: + try: + package_index.insert_dict( + get_action('package_show_rest')( + {'model': model, 'ignore_auth': True, 'api_version':1}, + {'id': pkg_id} + ) ) - ) + except Exception,e: + log.error('Error while indexing dataset %s: %s' % (pkg_id,str(e))) + if force: + log.error(text_traceback()) + continue + else: + raise + model.Session.commit() log.debug('Finished rebuilding search index.') @@ -153,14 +196,19 @@ def check(): def show(package_reference): from ckan import model - package_index = index_for(model.Package) - print package_index.get_index(package_reference) + package_query = query_for(model.Package) -def clear(): + return package_query.get_index(package_reference) + +def clear(package_reference=None): from ckan import model - log.debug("Clearing search index...") package_index = index_for(model.Package) - package_index.clear() + if package_reference: + log.debug("Clearing search index for dataset %s..." % package_reference) + package_index.delete_package({'id':package_reference}) + else: + log.debug("Clearing search index...") + package_index.clear() def check_solr_schema_version(schema_file=None): diff --git a/ckan/lib/search/index.py b/ckan/lib/search/index.py index b6c376c94d6..9c2d75bf91c 100644 --- a/ckan/lib/search/index.py +++ b/ckan/lib/search/index.py @@ -158,8 +158,8 @@ def index_package(self, pkg_dict): def delete_package(self, pkg_dict): conn = make_connection() - query = "+%s:%s +id:\"%s\" +site_id:\"%s\"" % (TYPE_FIELD, PACKAGE_TYPE, - pkg_dict.get('id'), + query = "+%s:%s (+id:\"%s\" OR +name:\"%s\") +site_id:\"%s\"" % (TYPE_FIELD, PACKAGE_TYPE, + pkg_dict.get('id'), pkg_dict.get('id'), config.get('ckan.site_id')) try: conn.delete_query(query) diff --git a/ckan/lib/search/query.py b/ckan/lib/search/query.py index e4d6dbb6152..a3b7adf35fe 100644 --- a/ckan/lib/search/query.py +++ b/ckan/lib/search/query.py @@ -16,7 +16,7 @@ 'extras' # Not used by Solr, but useful for extensions ]) -# for (solr) package searches, this specifies the fields that are searched +# for (solr) package searches, this specifies the fields that are searched # and their relative weighting QUERY_FIELDS = "name^4 title^4 tags^2 groups^2 text" @@ -65,30 +65,30 @@ def convert_legacy_parameters_to_solr(legacy_params): log.debug('Converted legacy search params from %r to %r', legacy_params, solr_params) return solr_params - + class QueryOptions(dict): """ - Options specify aspects of the search query which are only tangentially related + Options specify aspects of the search query which are only tangentially related to the query terms (such as limits, etc.). NB This is used only by legacy package search and current resource & tag search. Modern SOLR package search leaves this to SOLR syntax. """ - + BOOLEAN_OPTIONS = ['all_fields'] INTEGER_OPTIONS = ['offset', 'limit'] UNSUPPORTED_OPTIONS = ['filter_by_downloadable', 'filter_by_openness'] def __init__(self, **kwargs): from ckan.lib.search import DEFAULT_OPTIONS - + # set values according to the defaults for option_name, default_value in DEFAULT_OPTIONS.items(): if not option_name in self: self[option_name] = default_value - + super(QueryOptions, self).__init__(**kwargs) - + def validate(self): for key, value in self.items(): if key in self.BOOLEAN_OPTIONS: @@ -102,31 +102,31 @@ def validate(self): except ValueError: raise SearchQueryError('Value for search option %r must be an integer but received %r' % (key, value)) elif key in self.UNSUPPORTED_OPTIONS: - raise SearchQueryError('Search option %r is not supported' % key) - self[key] = value - + raise SearchQueryError('Search option %r is not supported' % key) + self[key] = value + def __getattr__(self, name): return self.get(name) - + def __setattr__(self, name, value): self[name] = value class SearchQuery(object): """ - A query is ... when you ask the search engine things. SearchQuery is intended + A query is ... when you ask the search engine things. SearchQuery is intended to be used for only one query, i.e. it sets state. Definitely not thread-safe. """ - + def __init__(self): self.results = [] self.count = 0 - + @property def open_licenses(self): # this isn't exactly the very best place to put these, but they stay - # there persistently. - # TODO: figure out if they change during run-time. + # there persistently. + # TODO: figure out if they change during run-time. global _open_licenses if not isinstance(_open_licenses, list): _open_licenses = [] @@ -134,16 +134,16 @@ def open_licenses(self): if license and license.isopen(): _open_licenses.append(license.id) return _open_licenses - + def get_all_entity_ids(self, max_results=1000): """ Return a list of the IDs of all indexed packages. """ return [] - + def run(self, query=None, terms=[], fields={}, facet_by=[], options=None, **kwargs): raise SearchError("SearchQuery.run() not implemented!") - + # convenience, allows to query(..) __call__ = run @@ -152,13 +152,13 @@ class TagSearchQuery(SearchQuery): """Search for tags.""" def run(self, query=[], fields={}, options=None, **kwargs): if options is None: - options = QueryOptions(**kwargs) + options = QueryOptions(**kwargs) else: options.update(kwargs) context = {'model': model, 'session': model.Session} data_dict = { - 'query': query, + 'query': query, 'fields': fields, 'offset': options.get('offset'), 'limit': options.get('limit') @@ -172,7 +172,7 @@ def run(self, query=[], fields={}, options=None, **kwargs): results['results'] = [r.as_dict() for r in results['results']] else: results['results'] = [r['name'] for r in results['results']] - + self.count = results['count'] self.results = results['results'] return results @@ -182,7 +182,7 @@ class ResourceSearchQuery(SearchQuery): """Search for resources.""" def run(self, fields={}, options=None, **kwargs): if options is None: - options = QueryOptions(**kwargs) + options = QueryOptions(**kwargs) else: options.update(kwargs) @@ -225,13 +225,41 @@ def get_all_entity_ids(self, max_results=1000): return [r.get('id') for r in data.results] + def get_index(self,reference): + query = { + 'rows': 1, + 'q': 'name:%s OR id:%s' % (reference,reference), + 'wt': 'json', + 'fq': 'site_id:"%s"' % config.get('ckan.site_id')} + + conn = make_connection() + log.debug('Package query: %r' % query) + try: + solr_response = conn.raw_query(**query) + except SolrException, e: + raise SearchError('SOLR returned an error running query: %r Error: %r' % + (query, e.reason)) + try: + data = json.loads(solr_response) + + if data['response']['numFound'] == 0: + raise SearchError('Dataset not found in the search index: %s' % reference) + else: + return data['response']['docs'][0] + except Exception, e: + log.exception(e) + raise SearchError(e) + finally: + conn.close() + + def run(self, query): ''' Performs a dataset search using the given query. @param query - dictionary with keys like: q, fq, sort, rows, facet @return - dictionary with keys results and count - + May raise SearchQueryError or SearchError. ''' from solr import SolrException @@ -258,7 +286,7 @@ def run(self, query): # order by score if no 'sort' term given order_by = query.get('sort') - if order_by == 'rank' or order_by is None: + if order_by == 'rank' or order_by is None: query['sort'] = 'score desc, name asc' # show only results from this CKAN instance @@ -266,7 +294,7 @@ def run(self, query): if not '+site_id:' in fq: fq += ' +site_id:"%s"' % config.get('ckan.site_id') - # filter for package status + # filter for package status if not '+state:' in fq: fq += " +state:active" query['fq'] = fq @@ -278,7 +306,7 @@ def run(self, query): # return the package ID and search scores query['fl'] = query.get('fl', 'name') - + # return results as json encoded string query['wt'] = query.get('wt', 'json') @@ -328,5 +356,5 @@ def run(self, query): raise SearchError(e) finally: conn.close() - + return {'results': self.results, 'count': self.count} diff --git a/ckan/tests/lib/test_cli.py b/ckan/tests/lib/test_cli.py index 4adda216572..7bea16bb484 100644 --- a/ckan/tests/lib/test_cli.py +++ b/ckan/tests/lib/test_cli.py @@ -4,10 +4,12 @@ from nose.tools import assert_equal from ckan import model -from ckan.lib.cli import ManageDb +from ckan.lib.cli import ManageDb,SearchIndexCommand from ckan.lib.create_test_data import CreateTestData from ckan.lib.helpers import json +from ckan.lib.search import index_for,query_for + class TestDb: @classmethod def setup_class(cls): @@ -18,10 +20,10 @@ def setup_class(cls): rev = model.repo.new_revision() model.Package.by_name(u'warandpeace').delete() model.repo.commit_and_remove() - + @classmethod def teardown_class(cls): - model.repo.rebuild_db() + model.repo.rebuild_db() def test_simple_dump_csv(self): csv_filepath = '/tmp/dump.tmp' @@ -47,3 +49,63 @@ def test_simple_dump_json(self): pkg_names = set(row['name'] for row in rows) assert 'annakarenina' in pkg_names, pkg_names assert 'warandpeace' not in pkg_names, pkg_names + +class FakeOptions(): + def __init__(self,**kwargs): + for key in kwargs: + setattr(self,key,kwargs[key]) + +class TestSearch: + @classmethod + def setup_class(cls): + cls.search = SearchIndexCommand('search-index') + cls.index = index_for(model.Package) + cls.query = query_for(model.Package) + CreateTestData.create() + + @classmethod + def teardown_class(cls): + model.repo.rebuild_db() + + def test_clear_and_rebuild_index(self): + + # Clear index + self.search.args = () + self.search.options = FakeOptions() + self.search.clear() + + self.query.run({'q':'*:*'}) + + assert self.query.count == 0 + + # Rebuild index + self.search.args = () + self.search.options = FakeOptions(only_missing=False,force=False,refresh=False) + self.search.rebuild() + pkg_count = model.Session.query(model.Package).filter(model.Package.state==u'active').count() + + self.query.run({'q':'*:*'}) + + assert self.query.count == pkg_count + + def test_clear_and_rebuild_only_one(self): + + pkg_count = model.Session.query(model.Package).filter(model.Package.state==u'active').count() + + # Clear index for annakarenina + self.search.args = ('clear annakarenina').split() + self.search.options = FakeOptions() + self.search.clear() + + self.query.run({'q':'*:*'}) + + assert self.query.count == pkg_count - 1 + + # Rebuild index for annakarenina + self.search.args = ('rebuild annakarenina').split() + self.search.options = FakeOptions(only_missing=False,force=False,refresh=False) + self.search.rebuild() + + self.query.run({'q':'*:*'}) + + assert self.query.count == pkg_count diff --git a/doc/paster.rst b/doc/paster.rst index c664a74dc17..19d197bb971 100644 --- a/doc/paster.rst +++ b/doc/paster.rst @@ -2,11 +2,11 @@ Common CKAN Administrator Tasks =============================== -The majority of common CKAN administration tasks are carried out using the **paster** script. +The majority of common CKAN administration tasks are carried out using the **paster** script. Paster is run on the command line on the server running CKAN. This section covers: -* :ref:`paster-understanding`. Understanding paster syntax and getting help. +* :ref:`paster-understanding`. Understanding paster syntax and getting help. * :ref:`paster-tasks`. How to carry out common CKAN admin tasks using paster. .. _paster-understanding: @@ -62,7 +62,7 @@ The alternative, which also suits a CKAN 'package' install, is to simply give th Running Paster on a deployment ============================== -If CKAN is deployed with Apache on this machine, then you should run paster as the same user, which is usually ``www-data``. This is because paster will write to the same CKAN logfile as the Apache process and file permissions need to match. +If CKAN is deployed with Apache on this machine, then you should run paster as the same user, which is usually ``www-data``. This is because paster will write to the same CKAN logfile as the Apache process and file permissions need to match. For example:: @@ -94,18 +94,18 @@ It is essential to run the correct paster. The program may be installed globally . pyenv/bin/activate 2. Giving the path to paster when you run it:: - - pyenv/bin/paster ... + + pyenv/bin/paster ... Position of Paster Parameters ----------------------------- -The position of paster parameters matters. +The position of paster parameters matters. ``--plugin`` is a parameter to paster, so needs to come before the CKAN command. To do this, the first parameter to paster is normally ``--plugin=ckan``. -.. note:: The default value for ``--plugin`` is ``setup.py`` in the current directory. If you are running paster from the directory where CKAN's ``setup.py`` file is located, you don't need to specify the plugin parameter.. +.. note:: The default value for ``--plugin`` is ``setup.py`` in the current directory. If you are running paster from the directory where CKAN's ``setup.py`` file is located, you don't need to specify the plugin parameter.. Meanwhile, ``--config`` is a parameter to CKAN, so needs to come after the CKAN command. This specifies the CKAN config file for the instance you want to use, e.g. ``--config=/etc/ckan/std/std.ini`` @@ -120,7 +120,7 @@ The position of the CKAN command itself is less important, as longs as it follow Running a Paster Shell ---------------------- -If you want to run a "paster shell", which can be useful for development, then the plugin is pylons. e.g. ``paster --plugin=pylons shell``. +If you want to run a "paster shell", which can be useful for development, then the plugin is pylons. e.g. ``paster --plugin=pylons shell``. Often you will want to run this as the same user as the web application, to ensure log files are written as the same user. And you'll also want to specify a config file (note that this is not specified using the ``--config`` parameter, but simply as the final argument). For example:: @@ -147,20 +147,20 @@ The following tasks are supported by paster. For the full list of tasks supported by paster, you can run:: - + paster --plugin=ckan --help create-test-data: Create test data ---------------------------------- -As the name suggests, this command lets you load test data when first setting up CKAN. See :ref:`create-test-data` for details. +As the name suggests, this command lets you load test data when first setting up CKAN. See :ref:`create-test-data` for details. db: Manage databases -------------------- -Lets you initialise, upgrade, and dump the CKAN database. +Lets you initialise, upgrade, and dump the CKAN database. Initialisation ~~~~~~~~~~~~~~ @@ -216,7 +216,7 @@ For information on using ``db`` to create dumpfiles, see :doc:`database-dumps`. ratings: Manage dataset ratings ------------------------------- -Manages the ratings stored in the database, and can be used to count ratings, remove all ratings, or remove only anonymous ratings. +Manages the ratings stored in the database, and can be used to count ratings, remove all ratings, or remove only anonymous ratings. For example, to remove anonymous ratings from the database:: @@ -234,7 +234,7 @@ For example, to give the user named 'bar' the 'admin' role on the dataset 'foo': To list all the rights currently specified:: - paster --plugin=ckan rights list --config=/etc/ckan/std/std.ini + paster --plugin=ckan rights list --config=/etc/ckan/std/std.ini For more information and examples, see :doc:`authorization`. @@ -242,7 +242,7 @@ For more information and examples, see :doc:`authorization`. roles: Manage system-wide permissions -------------------------------------- -This important command gives you fine-grained control over CKAN permissions, by listing and modifying the assignment of actions to roles. +This important command gives you fine-grained control over CKAN permissions, by listing and modifying the assignment of actions to roles. The ``roles`` command has its own section: see :doc:`authorization`. @@ -254,13 +254,35 @@ Rebuilds the search index. This is useful to prevent search indexes from getting For example:: - paster --plugin=ckan search-index --config=/etc/ckan/std/std.ini + paster --plugin=ckan search-index rebuild --config=/etc/ckan/std/std.ini + +This default behaviour will clear the index and rebuild it with all datasets. If you want to rebuild it for only +one dataset, you can provide a dataset name:: + + paster --plugin=ckan search-index rebuild test-dataset-name --config=/etc/ckan/std/std.ini + +Alternatively, you can use the `-o` or `--only-missing` option to only reindex datasets which are not +already indexed:: + + paster --plugin=ckan search-index rebuild -o --config=/etc/ckan/std/std.ini + +If you don't want to rebuild the whole index, but just refresh it, use the `-r` or `--refresh` option. This +won't clear the index before starting rebuilding it:: + + paster --plugin=ckan search-index rebuild -r --config=/etc/ckan/std/std.ini + +There are other search related commands, mostly useful for debugging purposes:: + + search-index check - checks for datasets not indexed + search-index show {dataset-name} - shows index of a dataset + search-index clear [dataset-name] - clears the search index for the provided dataset or for the whole ckan instance + sysadmin: Give sysadmin rights ------------------------------ -Gives sysadmin rights to a named user. This means the user can perform any action on any object. +Gives sysadmin rights to a named user. This means the user can perform any action on any object. For example, to make a user called 'admin' into a sysadmin:: From 3b1fff9ff90c61cfdca2368c5e71d735e6a42ed2 Mon Sep 17 00:00:00 2001 From: Sean Hammond Date: Tue, 20 Mar 2012 21:17:34 +0000 Subject: [PATCH 5/7] Fix paster create -t ckanext ... This command was crashing, complaining that it couldn't find an egg-info dir. It looks like there needs to be an empty directory at: ckan/pastertemplates/template/ckanext_+project+.egg-info/ Since git doesn't track empty directories, I think this might have been lost in the move from hg to git? --- .../template/ckanext_+project+.egg-info/.gitignore | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 ckan/pastertemplates/template/ckanext_+project+.egg-info/.gitignore diff --git a/ckan/pastertemplates/template/ckanext_+project+.egg-info/.gitignore b/ckan/pastertemplates/template/ckanext_+project+.egg-info/.gitignore new file mode 100644 index 00000000000..e69de29bb2d From 163ea7bb8024c5706ddf1858c81185b0eb2e1c0e Mon Sep 17 00:00:00 2001 From: Rufus Pollock Date: Wed, 21 Mar 2012 08:41:34 +0000 Subject: [PATCH 6/7] [master,bugfix,routing][xs]: correct bug introduced in f82f3b9acd8a4dc82813a163511e87cf706681ac whereby conditions for datastore write were GET/POST/DELETE rather than PUT/POST/DELETE. --- ckan/config/routing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ckan/config/routing.py b/ckan/config/routing.py index 36c5b628584..f36dc3b2027 100644 --- a/ckan/config/routing.py +++ b/ckan/config/routing.py @@ -25,7 +25,7 @@ def make_map(): DELETE = dict(method=['DELETE']) GET_POST = dict(method=['GET', 'POST']) PUT_POST = dict(method=['PUT','POST']) - GET_POST_DELETE = dict(method=['GET', 'POST', 'DELETE']) + PUT_POST_DELETE = dict(method=['PUT', 'POST', 'DELETE']) OPTIONS = dict(method=['OPTIONS']) from ckan.lib.plugins import register_package_plugins @@ -139,7 +139,7 @@ def make_map(): m.connect('datastore_read', '/api/data/{id}{url:(/.*)?}', action='read', url='', conditions=GET) m.connect('datastore_write', '/api/data/{id}{url:(/.*)?}', - action='write', url='', conditions=GET_POST_DELETE) + action='write', url='', conditions=PUT_POST_DELETE) map.redirect('/packages', '/dataset') From 257bf052a9a701109e5486430561b8a0666671b6 Mon Sep 17 00:00:00 2001 From: kindly Date: Wed, 21 Mar 2012 10:19:54 +0000 Subject: [PATCH 7/7] [2237] speed up tests by makeing metadata modified faster --- ckan/lib/dictization/__init__.py | 5 +++++ ckan/lib/dictization/model_dictize.py | 18 +++++------------- ckan/model/package.py | 6 +++--- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/ckan/lib/dictization/__init__.py b/ckan/lib/dictization/__init__.py index 42f748b07d6..08999974b7d 100644 --- a/ckan/lib/dictization/__init__.py +++ b/ckan/lib/dictization/__init__.py @@ -46,6 +46,11 @@ def table_dictize(obj, context, **kw): result_dict.update(kw) + ##HACK For optimisation to get metadata_modified created faster. + + context['metadata_modified'] = max(result_dict.get('revision_timestamp', ''), + context.get('metadata_modified', '')) + return result_dict diff --git a/ckan/lib/dictization/model_dictize.py b/ckan/lib/dictization/model_dictize.py index 1c766bd44d8..a7b2247869b 100644 --- a/ckan/lib/dictization/model_dictize.py +++ b/ckan/lib/dictization/model_dictize.py @@ -40,9 +40,10 @@ def resource_list_dictize(res_list, context): active = context.get('active', True) result_list = [] for res in res_list: + resource_dict = resource_dictize(res, context) if active and res.state not in ('active', 'pending'): continue - result_list.append(resource_dictize(res, context)) + result_list.append(resource_dict) return sorted(result_list, key=lambda x: x["position"]) @@ -65,9 +66,9 @@ def extras_list_dictize(extras_list, context): result_list = [] active = context.get('active', True) for extra in extras_list: + dictized = d.table_dictize(extra, context) if active and extra.state not in ('active', 'pending'): continue - dictized = d.table_dictize(extra, context) value = dictized["value"] if not(context.get("extras_as_string") and isinstance(value, basestring)): dictized["value"] = h.json.dumps(value) @@ -205,11 +206,10 @@ def package_dictize(pkg, context): result_dict['license_title']= pkg.license_id # creation and modification date - result_dict['metadata_modified'] = pkg.metadata_modified.isoformat() \ - if pkg.metadata_modified else None + result_dict['metadata_modified'] = context.pop('metadata_modified') result_dict['metadata_created'] = pkg.metadata_created.isoformat() \ if pkg.metadata_created else None - + if context.get('for_view'): for item in plugins.PluginImplementations(plugins.IPackageController): result_dict = item.before_view(result_dict) @@ -366,14 +366,6 @@ def package_to_api(pkg, context): if site_url: dictized['ckan_url'] = '%s/dataset/%s' % (site_url, pkg.name) - metadata_modified = pkg.metadata_modified - dictized['metadata_modified'] = metadata_modified.isoformat() \ - if metadata_modified else None - - metadata_created = pkg.metadata_created - dictized['metadata_created'] = metadata_created.isoformat() \ - if metadata_created else None - for resource in dictized["resources"]: resource_dict_to_api(resource, pkg.id, context) diff --git a/ckan/model/package.py b/ckan/model/package.py index 4e4a62b97f3..26f8022a9aa 100644 --- a/ckan/model/package.py +++ b/ckan/model/package.py @@ -584,12 +584,12 @@ def get_groups(self, group_type=None, capacity=None): @property def metadata_created(self): import ckan.model as model - q = model.Session.query(model.PackageRevision)\ + q = model.Session.query(model.PackageRevision.revision_timestamp)\ .filter(model.PackageRevision.id == self.id)\ .order_by(model.PackageRevision.revision_timestamp.asc()) ts = q.first() - if ts is not None: - return ts.revision_timestamp + if ts: + return ts[0] @staticmethod def get_fields(core_only=False, fields_to_ignore=None):