From 731111ddff34c4931db06f7b127184dcbffeb305 Mon Sep 17 00:00:00 2001 From: David Read Date: Tue, 17 Jan 2012 13:55:09 +0000 Subject: [PATCH 1/5] [master][#1575]: Script used for correcting tags and groups during ca.ckan.net import to thedatahub.org. --- bin/canada.py | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++ bin/status.py | 26 +++++++++++++ 2 files changed, 129 insertions(+) create mode 100644 bin/canada.py create mode 100644 bin/status.py diff --git a/bin/canada.py b/bin/canada.py new file mode 100644 index 00000000000..031102d8441 --- /dev/null +++ b/bin/canada.py @@ -0,0 +1,103 @@ +''' +Script to sort out the tags imported from ca.ckan.net to thedatahub.org and +got mangled in the process. +''' + +import re +from optparse import OptionParser +import copy + +import ckanclient +from status import Status + +def sort_out_tags(source_ckan_uri, + dest_ckan_uri, dest_api_key, + ): + ckan1 = ckanclient.CkanClient(base_location=source_ckan_uri) + ckan2 = ckanclient.CkanClient(base_location=dest_ckan_uri, + api_key=dest_api_key) + + # ensure group exists + group = 'country-ca' + assert group in set(ckan2.group_register_get()) + group_to_change = 'canadagov' + + # work out tag mappings + tag_status = Status('tag mapping') + tag_replace_map = {} + source_tags = ckan1.tag_register_get() + for tag in source_tags: + mangled_tag = re.sub('[-._]', '', tag) + replacement_tag = tag + # Change underscores to hyphens + replacement_tag = replacement_tag.replace('_', '-') + # Remove trailing punctuation + if replacement_tag[-1] in '_-.': + replacement_tag = replacement_tag[:-1] + if replacement_tag[0] in '_-.': + replacement_tag = replacement_tag[1:] + if mangled_tag == replacement_tag: + tag_status.record('Unchanged', mangled_tag, do_print=False) + continue + if mangled_tag in tag_replace_map and tag_replace_map[mangled_tag] != replacement_tag: + print 'Warning - can\'t differentiate %s : %s / %s' % \ + (mangled_tag, tag_replace_map[mangled_tag], replacement_tag) + tag_status.record('Mapping added', '%s:%s' % (mangled_tag, replacement_tag), do_print=False) + tag_replace_map[mangled_tag] = replacement_tag + example_map = tag_replace_map.items()[0] + print tag_status + + # Custom mappings + tag_replace_map['metaimportedfromcackannet'] = 'meta.imported-from-ca-ckan-net' + + # edit packages + pkg_status = Status('Packages') + pkgs = ckan2.group_entity_get(group)['packages'] + print 'Packages in the group: %i' % len(pkgs) + for pkg_name in pkgs: + pkg = ckan2.package_entity_get(pkg_name) + original_pkg = copy.deepcopy(pkg) + + # Change tags + edited_tags = [tag_replace_map.get(tag, tag) for tag in pkg['tags']] + if 'canada' in edited_tags: + edited_tags.remove('canada') + + if group_to_change in pkg['groups']: + pkg['groups'].remove(group_to_change) + edited_tags.append('canada-gov') + + if set(pkg['tags']) != set(edited_tags): + pkg['tags'] = edited_tags + print '%s: %r -> %r' % (pkg_name, sorted(original_pkg['tags']), sorted(edited_tags)) + + if pkg == original_pkg: + pkg_status.record('Unchanged', pkg_name) + continue + + try: + ckan2.package_entity_put(pkg) + except ckanclient.CkanApiError, e: + pkg_status.record('Error: %r' % e.args, pkg_name) + continue + + pkg_status.record('Successfully changed', pkg_name) + + print pkg_status + +usage = '''%prog [OPTIONS] +Recopy tags that got mangled in Canadian copy.''' +parser = OptionParser(usage=usage) +parser.add_option("-k", "--destination-ckan-api-key", dest="destination_ckan_api_key", + help="Destination CKAN's API key", metavar="API-KEY") + +(options, args) = parser.parse_args() + +assert len(args) == 2, 'The source and destination CKAN API URIs are the only two arguments. Found: %r' % args +source_ckan_uri, destination_ckan_uri = args +print 'Key: ', options.destination_ckan_api_key + +sort_out_tags(source_ckan_uri, + destination_ckan_uri, + options.destination_ckan_api_key, +) diff --git a/bin/status.py b/bin/status.py new file mode 100644 index 00000000000..575bd9c906e --- /dev/null +++ b/bin/status.py @@ -0,0 +1,26 @@ +from collections import defaultdict + +class Status: + '''When looping through objects and doing operations to them, + this is a useful object to keep track of what happens and + summarise the numbers at the end.''' + def __init__(self, obj_type_str=None): + self.obj_type_str = obj_type_str + self.pkg_status = defaultdict(list) # reason: [pkgs] + + def record(self, status_category, pkg_name, do_print=True): + self.pkg_status[status_category].append(pkg_name) + if do_print: + print '%s: %s' % (pkg_name, status_category) + + def __str__(self): + status = '\nStatus' + if self.obj_type_str: + status += ' of: %s' % self.obj_type_str + status += '\n' + status += '\n'.join([ \ + '%s: %i (e.g. %s)' % (category, len(pkg_names), sorted(pkg_names)[0]) \ + for (category, pkg_names) in self.pkg_status.items()]) + status += '\nTotal: %i\n' % sum([len(pkg_names) for pkg_names in self.pkg_status.values()]) + return status + From 814b468d0b138681cfc402f73b1d9fe6cc3e5775 Mon Sep 17 00:00:00 2001 From: amercader Date: Tue, 17 Jan 2012 15:53:56 +0000 Subject: [PATCH 2/5] [master][#1547] Abort search flag If extensions add an `abort_search` = True pair to the search_params dict returned by the `before_search` plugin hook, the actual query to Solr will be skiped and 0 results will be returned. Useful e.g. if the extension has performed a previous query that returned no records and the Solr one is no longer needed. --- ckan/logic/action/get.py | 56 ++++++++++++++---------- ckan/tests/functional/api/test_action.py | 23 ++++++++++ 2 files changed, 56 insertions(+), 23 deletions(-) diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index e83c5672c30..ab2d4e2a48c 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -664,36 +664,46 @@ def package_search(context, data_dict): for item in PluginImplementations(IPackageController): data_dict = item.before_search(data_dict) - # return a list of package ids - data_dict['fl'] = 'id' - - query = query_for(model.Package) - query.run(data_dict) + # the extension may have decided that it's no necessary to perform the query + abort = data_dict.get('abort_search',False) results = [] - for package in query.results: - # get the package object - pkg_query = session.query(model.PackageRevision)\ - .filter(model.PackageRevision.id == package)\ - .filter(and_( - model.PackageRevision.state == u'active', - model.PackageRevision.current == True - )) - pkg = pkg_query.first() + if not abort: + # return a list of package ids + data_dict['fl'] = 'id' + + query = query_for(model.Package) + query.run(data_dict) + + for package in query.results: + # get the package object + pkg_query = session.query(model.PackageRevision)\ + .filter(model.PackageRevision.id == package)\ + .filter(and_( + model.PackageRevision.state == u'active', + model.PackageRevision.current == True + )) + pkg = pkg_query.first() - ## if the index has got a package that is not in ckan then - ## ignore it. - if not pkg: - log.warning('package %s in index but not in database' % package) - continue + ## if the index has got a package that is not in ckan then + ## ignore it. + if not pkg: + log.warning('package %s in index but not in database' % package) + continue - result_dict = package_dictize(pkg,context) - results.append(result_dict) + result_dict = package_dictize(pkg,context) + results.append(result_dict) + count = query.count + facets = query.facets + else: + count = 0 + facets = {} + results = [] search_results = { - 'count': query.count, - 'facets': query.facets, + 'count': count, + 'facets': facets, 'results': results } diff --git a/ckan/tests/functional/api/test_action.py b/ckan/tests/functional/api/test_action.py index 9e2bb8e2e38..a80d52948a3 100644 --- a/ckan/tests/functional/api/test_action.py +++ b/ckan/tests/functional/api/test_action.py @@ -1196,6 +1196,12 @@ class MockPackageSearchPlugin(SingletonPlugin): def before_search(self, search_params): if 'extras' in search_params and 'ext_avoid' in search_params['extras']: assert 'q' in search_params + + if 'extras' in search_params and 'ext_abort' in search_params['extras']: + assert 'q' in search_params + # Prevent the actual query + search_params['abort_search'] = True + return search_params def after_search(self, search_results, search_params): @@ -1245,3 +1251,20 @@ def test_search_plugin_interface_search(self): assert results_dict['count'] == 1 plugins.unload(plugin) + + def test_search_plugin_interface_abort(self): + plugin = MockPackageSearchPlugin() + plugins.load(plugin) + + search_params = '%s=1' % json.dumps({ + 'q': '*:*', + 'extras' : {'ext_abort':True} + }) + + res = self.app.post('/api/action/package_search', params=search_params) + + # Check that the query was aborted and no results returned + res_dict = json.loads(res.body)['result'] + assert res_dict['count'] == 0 + assert len(res_dict['results']) == 0 + plugins.unload(plugin) From 93f24f33ccbf816e6b19b3d8eaaabdca627f9740 Mon Sep 17 00:00:00 2001 From: David Read Date: Tue, 17 Jan 2012 18:01:00 +0000 Subject: [PATCH 3/5] [master][noticket][s]: CLI command to check version of db. --- ckan/lib/cli.py | 7 +++++++ ckan/model/authz.py | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/ckan/lib/cli.py b/ckan/lib/cli.py index b78fcd3c3e4..89ade452569 100644 --- a/ckan/lib/cli.py +++ b/ckan/lib/cli.py @@ -62,6 +62,7 @@ class ManageDb(CkanCommand): db init # create and put in default data db clean db upgrade [{version no.}] # Data migrate + db version # returns current version of data schema db dump {file-path} # dump to a pg_dump file db dump-rdf {dataset-name} {file-path} db simple-dump-csv {file-path} @@ -100,6 +101,8 @@ def command(self): model.repo.upgrade_db(self.args[1]) else: model.repo.upgrade_db() + elif cmd == 'version': + self.version() elif cmd == 'dump': self.dump() elif cmd == 'load': @@ -248,6 +251,10 @@ def send_rdf(self): talis = ckan.lib.talis.Talis() return talis.send_rdf(talis_store, username, password) + def version(self): + from ckan.model import Session + print Session.execute('select version from migrate_version;').fetchall() + class SearchIndexCommand(CkanCommand): '''Creates a search index for all datasets diff --git a/ckan/model/authz.py b/ckan/model/authz.py index fbc7ee4c2ec..85b474483ab 100644 --- a/ckan/model/authz.py +++ b/ckan/model/authz.py @@ -208,7 +208,7 @@ def add_authorization_group_to_role(cls, authorization_group, role, domain_obj): commit, will add the role to the database twice. Since some other functions count the number of occurrences, that leaves a fairly obvious bug. But adding a commit here seems to break various tests. - So don't call this twice without committing, I guess... + So don\'t call this twice without committing, I guess... ''' if cls.authorization_group_has_role(authorization_group, role, domain_obj): return @@ -355,7 +355,7 @@ def give_all_packages_default_user_roles(): print 'Creating default user for for %s with admins %s' % (pkg.name, admins) setup_default_user_roles(pkg, admins) -# default user roles - used when the config doesn't specify them +# default user roles - used when the config doesn\'t specify them default_default_user_roles = { 'Package': {"visitor": ["editor"], "logged_in": ["editor"]}, 'Group': {"visitor": ["reader"], "logged_in": ["reader"]}, From 8314e9738d2c9084b5310593764820968b9f7c0b Mon Sep 17 00:00:00 2001 From: David Read Date: Wed, 18 Jan 2012 12:40:52 +0000 Subject: [PATCH 4/5] [master][#1627][s]: Favicon fixed. --- ckan/config/deployment.ini_tmpl | 2 +- ckan/lib/app_globals.py | 2 +- ckan/public/images/icons/ckan.ico | Bin 0 -> 1150 bytes ckan/templates/layout_base.html | 2 +- doc/configuration.rst | 13 ++++++++++++- 5 files changed, 15 insertions(+), 4 deletions(-) create mode 100644 ckan/public/images/icons/ckan.ico diff --git a/ckan/config/deployment.ini_tmpl b/ckan/config/deployment.ini_tmpl index 571e99da917..adc875b7d19 100644 --- a/ckan/config/deployment.ini_tmpl +++ b/ckan/config/deployment.ini_tmpl @@ -109,7 +109,7 @@ ckan.site_description = ckan.site_url = ## Favicon (default is the CKAN software favicon) -ckan.favicon = http://assets.okfn.org/p/ckan/img/ckan.ico +ckan.favicon = /images/icons/ckan.ico ## Solr support #solr_url = http://127.0.0.1:8983/solr diff --git a/ckan/lib/app_globals.py b/ckan/lib/app_globals.py index ec8c9b341d6..221cb0e28ce 100644 --- a/ckan/lib/app_globals.py +++ b/ckan/lib/app_globals.py @@ -18,7 +18,7 @@ def __init__(self): """ self.site_title = config.get('ckan.site_title', '') self.favicon = config.get('ckan.favicon', - 'http://assets.okfn.org/p/ckan/img/ckan.ico') + '/images/icons/ckan.ico') self.site_logo = config.get('ckan.site_logo', '') self.site_url = config.get('ckan.site_url', '') self.site_url_nice = self.site_url.replace('http://','').replace('www.','') diff --git a/ckan/public/images/icons/ckan.ico b/ckan/public/images/icons/ckan.ico new file mode 100644 index 0000000000000000000000000000000000000000..0d9295c77aa301fcd6751a4a499873e80a063388 GIT binary patch literal 1150 zcmZQzU<5(|0R|wcz>vYhz#zuJz@P!dKp~(AL>x#lFaYJQ@j@_|40NH1M@YFxUH$VS}FI-Nt8OZvIdqzNLUBf2h$HRV>yO9F!Y1$O<0Se7iJE+ zJ5cn4^v18jsTZ3WaQz^?ajOaHMK=Sa-)`^mf0l8ph}MfRW9k3?{z(t?Jxl*Nc^>-b W?0tkJ?BsRmpM_Q0PgprXE)4)>797+7 literal 0 HcmV?d00001 diff --git a/ckan/templates/layout_base.html b/ckan/templates/layout_base.html index 2ca6b2f5135..ca23a0c9a94 100644 --- a/ckan/templates/layout_base.html +++ b/ckan/templates/layout_base.html @@ -17,7 +17,7 @@ - + diff --git a/doc/configuration.rst b/doc/configuration.rst index edeaa2feae0..fb29c220d42 100644 --- a/doc/configuration.rst +++ b/doc/configuration.rst @@ -57,7 +57,7 @@ site_logo Example:: - ckan.site_logo=/images/ckan_logo_fullname_long.png + ckan.site_logo = /images/ckan_logo_fullname_long.png Default value: (none) @@ -70,6 +70,17 @@ This sets the logo used in the title bar. .. index:: single: package_hide_extras +favicon +^^^^^^^ + +Example:: + + ckan.favicon = http://okfn.org/wp-content/themes/okfn-master-wordpress-theme/images/favicon.ico + +Default value: ``/images/icons/ckan.ico`` + +This sets the site's `favicon`. This icon is usually displayed by the browser in the tab heading and bookmark. + site_about ^^^^^^^^^^ From c2c342716684280509a400aa6e7df22ffb0683fe Mon Sep 17 00:00:00 2001 From: David Read Date: Wed, 18 Jan 2012 13:08:28 +0000 Subject: [PATCH 5/5] [master][#1623]: CSV/JSON dumps to excludes deleted objects. Also added first CLI test! --- ckan/lib/cli.py | 6 ++--- ckan/tests/lib/test_cli.py | 45 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 4 deletions(-) create mode 100644 ckan/tests/lib/test_cli.py diff --git a/ckan/lib/cli.py b/ckan/lib/cli.py index 89ade452569..7c5b3b4054b 100644 --- a/ckan/lib/cli.py +++ b/ckan/lib/cli.py @@ -209,8 +209,7 @@ def simple_dump_csv(self): dump_filepath = self.args[1] import ckan.lib.dumper as dumper dump_file = open(dump_filepath, 'w') - query = model.Session.query(model.Package) - dumper.SimpleDumper().dump_csv(dump_file, query) + dumper.SimpleDumper().dump(dump_file, format='csv') def simple_dump_json(self): from ckan import model @@ -220,8 +219,7 @@ def simple_dump_json(self): dump_filepath = self.args[1] import ckan.lib.dumper as dumper dump_file = open(dump_filepath, 'w') - query = model.Session.query(model.Package) - dumper.SimpleDumper().dump_json(dump_file, query) + dumper.SimpleDumper().dump(dump_file, format='json') def dump_rdf(self): if len(self.args) < 3: diff --git a/ckan/tests/lib/test_cli.py b/ckan/tests/lib/test_cli.py new file mode 100644 index 00000000000..f6e383f0c78 --- /dev/null +++ b/ckan/tests/lib/test_cli.py @@ -0,0 +1,45 @@ +import os +import csv + +from nose.tools import assert_equal + +from ckan import model +from ckan.lib.cli import ManageDb +from ckan.lib.create_test_data import CreateTestData +from ckan.lib.helpers import json + +class TestDb: + @classmethod + def setup_class(cls): + cls.db = ManageDb('db') + CreateTestData.create() + + # delete warandpeace + rev = model.repo.new_revision() + model.Package.by_name(u'warandpeace').delete() + model.repo.commit_and_remove() + + def test_simple_dump_csv(self): + csv_filepath = '/tmp/dump.tmp' + self.db.args = ('simple-dump-csv %s' % csv_filepath).split() + self.db.simple_dump_csv() + assert os.path.exists(csv_filepath), csv_filepath + f_obj = open(csv_filepath, "r") + reader = csv.reader(f_obj) + rows = [row for row in reader] + assert_equal(rows[0][:3], ['id', 'name', 'title']) + pkg_names = set(row[1] for row in rows[1:]) + assert 'annakarenina' in pkg_names, pkg_names + assert 'warandpeace' not in pkg_names, pkg_names + + def test_simple_dump_json(self): + json_filepath = '/tmp/dump.tmp' + self.db.args = ('simple-dump-json %s' % json_filepath).split() + self.db.simple_dump_json() + assert os.path.exists(json_filepath), json_filepath + f_obj = open(json_filepath, "r") + rows = json.loads(f_obj.read()) + assert set(rows[0].keys()) > set(('id', 'name', 'title')), rows[0].keys() + pkg_names = set(row['name'] for row in rows) + assert 'annakarenina' in pkg_names, pkg_names + assert 'warandpeace' not in pkg_names, pkg_names