From 4c267bf3e5a5185e0e5319c2c5b7f55c249149f8 Mon Sep 17 00:00:00 2001 From: alexandru-m-g Date: Mon, 19 Oct 2015 00:15:50 +0300 Subject: [PATCH 1/6] pacakge_search: improve speed on getting facet display names for orgs and groups --- ckan/logic/action/get.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 16ddb29a494..675a0c16f06 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -1960,6 +1960,16 @@ def package_search(context, data_dict): 'sort': data_dict['sort'] } + # Group keys will contain all the names of the orgs and groups in the facets + group_keys = [] + for key, value in facets.items(): + if key in ('groups', 'organization'): + group_keys.extend(value.keys()) + + #Querying just for the columns we're interested in: name and title + groups = session.query(model.Group.name, model.Group.title).filter(model.Group.name.in_(group_keys)).all() + group_display_names = {g.name: g.title for g in groups} + # Transform facets into a more useful data structure. restructured_facets = {} for key, value in facets.items(): @@ -1971,11 +1981,9 @@ def package_search(context, data_dict): new_facet_dict = {} new_facet_dict['name'] = key_ if key in ('groups', 'organization'): - group = model.Group.get(key_) - if group: - new_facet_dict['display_name'] = group.display_name - else: - new_facet_dict['display_name'] = key_ + display_name = group_display_names.get(key_, key_) + display_name = display_name if display_name and display_name.strip() else key_ + new_facet_dict['display_name'] = display_name elif key == 'license_id': license = model.Package.get_license_register().get(key_) if license: From b6bf279d98a6e0eb3739d60b908c7b63ef15e5fc Mon Sep 17 00:00:00 2001 From: alexandru-m-g Date: Mon, 19 Oct 2015 00:17:05 +0300 Subject: [PATCH 2/6] pacakge_search: pkg variable no longer exists after trusting solr for getting the package data --- ckan/logic/action/get.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 675a0c16f06..32ef0f7afa9 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -1944,7 +1944,7 @@ def package_search(context, data_dict): package_dict = item.before_view(package_dict) results.append(package_dict) else: - results.append(model_dictize.package_dictize(pkg, context)) + log.error('No package_dict is coming from solr for package with id {}'.format(package)) count = query.count facets = query.facets From 839e2309e93e35c762c0e1a1e2b5614be5f54049 Mon Sep 17 00:00:00 2001 From: alexandru-m-g Date: Mon, 19 Oct 2015 00:56:06 +0300 Subject: [PATCH 3/6] pacakge_search: changing dict comprehension syntax to be 2.6 compatible --- ckan/logic/action/get.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 32ef0f7afa9..dd9c578ad09 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -1968,7 +1968,7 @@ def package_search(context, data_dict): #Querying just for the columns we're interested in: name and title groups = session.query(model.Group.name, model.Group.title).filter(model.Group.name.in_(group_keys)).all() - group_display_names = {g.name: g.title for g in groups} + group_display_names = dict((g.name, g.title) for g in groups) # Transform facets into a more useful data structure. restructured_facets = {} From 27789db25aa2b8f057502abe9b43031f45f41a18 Mon Sep 17 00:00:00 2001 From: David Read Date: Wed, 4 Nov 2015 17:08:59 +0000 Subject: [PATCH 4/6] [#2724] Reworked #2692 slightly. Facet test and others moved from legacy. --- ckan/logic/action/get.py | 19 +-- ckan/tests/legacy/logic/test_action.py | 167 ------------------------- ckan/tests/logic/action/test_get.py | 94 +++++++++++++- 3 files changed, 103 insertions(+), 177 deletions(-) diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 07967ca3b9b..74ed45fe802 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -1961,15 +1961,16 @@ def package_search(context, data_dict): 'sort': data_dict['sort'] } - # Group keys will contain all the names of the orgs and groups in the facets - group_keys = [] - for key, value in facets.items(): - if key in ('groups', 'organization'): - group_keys.extend(value.keys()) + # create a lookup table of group name to title for all the groups and + # organizations in the current search's facets. + group_names = [] + for field_name in ('groups', 'organization'): + group_names.extend(facets.get(field_name, {}).keys()) - #Querying just for the columns we're interested in: name and title - groups = session.query(model.Group.name, model.Group.title).filter(model.Group.name.in_(group_keys)).all() - group_display_names = dict((g.name, g.title) for g in groups) + groups = session.query(model.Group.name, model.Group.title) \ + .filter(model.Group.name.in_(group_names)) \ + .all() + group_titles_by_name = dict(groups) # Transform facets into a more useful data structure. restructured_facets = {} @@ -1982,7 +1983,7 @@ def package_search(context, data_dict): new_facet_dict = {} new_facet_dict['name'] = key_ if key in ('groups', 'organization'): - display_name = group_display_names.get(key_, key_) + display_name = group_titles_by_name.get(key_, key_) display_name = display_name if display_name and display_name.strip() else key_ new_facet_dict['display_name'] = display_name elif key == 'license_id': diff --git a/ckan/tests/legacy/logic/test_action.py b/ckan/tests/legacy/logic/test_action.py index 05bc73ba9e1..9190db68c79 100644 --- a/ckan/tests/legacy/logic/test_action.py +++ b/ckan/tests/legacy/logic/test_action.py @@ -1053,173 +1053,6 @@ def test_2_update_many(self): json.loads(res.body) - - -class TestActionPackageSearch(WsgiAppCase): - - @classmethod - def setup_class(cls): - setup_test_search_index() - CreateTestData.create() - cls.sysadmin_user = model.User.get('testsysadmin') - - @classmethod - def teardown_class(self): - model.repo.rebuild_db() - - def test_1_basic(self): - params = { - 'q':'tolstoy', - 'facet.field': ['groups', 'tags', 'res_format', 'license'], - 'rows': 20, - 'start': 0, - } - postparams = '%s=1' % json.dumps(params) - res = self.app.post('/api/action/package_search', params=postparams) - res = json.loads(res.body) - result = res['result'] - assert_equal(res['success'], True) - assert_equal(result['count'], 1) - assert_equal(result['results'][0]['name'], 'annakarenina') - - # Test GET request - params_json_list = params - params_json_list['facet.field'] = json.dumps(params['facet.field']) - url_params = urllib.urlencode(params_json_list) - res = self.app.get('/api/action/package_search?{0}'.format(url_params)) - res = json.loads(res.body) - result = res['result'] - assert_equal(res['success'], True) - assert_equal(result['count'], 1) - assert_equal(result['results'][0]['name'], 'annakarenina') - - def test_1_facet_limit(self): - params = { - 'q':'*:*', - 'facet.field': ['groups', 'tags', 'res_format', 'license'], - 'rows': 20, - 'start': 0, - } - postparams = '%s=1' % json.dumps(params) - res = self.app.post('/api/action/package_search', params=postparams) - res = json.loads(res.body) - assert_equal(res['success'], True) - - assert_equal(len(res['result']['search_facets']['groups']['items']), 2) - - params = { - 'q':'*:*', - 'facet.field': ['groups', 'tags', 'res_format', 'license'], - 'facet.limit': 1, - 'rows': 20, - 'start': 0, - } - postparams = '%s=1' % json.dumps(params) - res = self.app.post('/api/action/package_search', params=postparams) - res = json.loads(res.body) - assert_equal(res['success'], True) - - assert_equal(len(res['result']['search_facets']['groups']['items']), 1) - - params = { - 'q':'*:*', - 'facet.field': ['groups', 'tags', 'res_format', 'license'], - 'facet.limit': -1, # No limit - 'rows': 20, - 'start': 0, - } - postparams = '%s=1' % json.dumps(params) - res = self.app.post('/api/action/package_search', params=postparams) - res = json.loads(res.body) - assert_equal(res['success'], True) - - assert_equal(len(res['result']['search_facets']['groups']['items']), 2) - - def test_1_basic_no_params(self): - postparams = '%s=1' % json.dumps({}) - res = self.app.post('/api/action/package_search', params=postparams) - res = json.loads(res.body) - result = res['result'] - assert_equal(res['success'], True) - assert_equal(result['count'], 2) - assert result['results'][0]['name'] in ('annakarenina', 'warandpeace') - - # Test GET request - res = self.app.get('/api/action/package_search') - res = json.loads(res.body) - result = res['result'] - assert_equal(res['success'], True) - assert_equal(result['count'], 2) - assert result['results'][0]['name'] in ('annakarenina', 'warandpeace') - - def test_2_bad_param(self): - postparams = '%s=1' % json.dumps({ - 'sort':'metadata_modified', - }) - res = self.app.post('/api/action/package_search', params=postparams, - status=409) - assert '"message": "Search error:' in res.body, res.body - assert 'SOLR returned an error' in res.body, res.body - # solr error is 'Missing sort order' or 'Missing_sort_order', - # depending on the solr version. - assert 'sort' in res.body, res.body - - def test_3_bad_param(self): - postparams = '%s=1' % json.dumps({ - 'weird_param':True, - }) - res = self.app.post('/api/action/package_search', params=postparams, - status=400) - assert '"message": "Search Query is invalid:' in res.body, res.body - assert '"Invalid search parameters: [\'weird_param\']' in res.body, res.body - - def test_4_sort_by_metadata_modified(self): - search_params = '%s=1' % json.dumps({ - 'q': '*:*', - 'fl': 'name, metadata_modified', - 'sort': u'metadata_modified desc' - }) - - # modify warandpeace, check that it is the first search result - rev = model.repo.new_revision() - pkg = model.Package.get('warandpeace') - pkg.title = "War and Peace [UPDATED]" - - pkg.metadata_modified = datetime.datetime.utcnow() - model.repo.commit_and_remove() - - res = self.app.post('/api/action/package_search', params=search_params) - result = json.loads(res.body)['result'] - result_names = [r['name'] for r in result['results']] - assert result_names == ['warandpeace', 'annakarenina'], result_names - - # modify annakarenina, check that it is the first search result - rev = model.repo.new_revision() - pkg = model.Package.get('annakarenina') - pkg.title = "A Novel By Tolstoy [UPDATED]" - pkg.metadata_modified = datetime.datetime.utcnow() - model.repo.commit_and_remove() - - res = self.app.post('/api/action/package_search', params=search_params) - result = json.loads(res.body)['result'] - result_names = [r['name'] for r in result['results']] - assert result_names == ['annakarenina', 'warandpeace'], result_names - - # add a tag to warandpeace, check that it is the first result - pkg = model.Package.get('warandpeace') - pkg_params = '%s=1' % json.dumps({'id': pkg.id}) - res = self.app.post('/api/action/package_show', params=pkg_params) - pkg_dict = json.loads(res.body)['result'] - pkg_dict['tags'].append({'name': 'new-tag'}) - pkg_params = '%s=1' % json.dumps(pkg_dict) - res = self.app.post('/api/action/package_update', params=pkg_params, - extra_environ={'Authorization': str(self.sysadmin_user.apikey)}) - - res = self.app.post('/api/action/package_search', params=search_params) - result = json.loads(res.body)['result'] - result_names = [r['name'] for r in result['results']] - assert result_names == ['warandpeace', 'annakarenina'], result_names - class MockPackageSearchPlugin(SingletonPlugin): implements(IPackageController, inherit=True) diff --git a/ckan/tests/logic/action/test_get.py b/ckan/tests/logic/action/test_get.py index 9d646e215f2..83138302e77 100644 --- a/ckan/tests/logic/action/test_get.py +++ b/ckan/tests/logic/action/test_get.py @@ -5,6 +5,7 @@ import ckan.tests.helpers as helpers import ckan.tests.factories as factories import ckan.logic.schema as schema +from ckan.lib.search.common import SearchError eq = nose.tools.eq_ @@ -843,12 +844,103 @@ def test_package_autocomplete_does_not_return_private_datasets(self): class TestPackageSearch(helpers.FunctionalTestBase): + def test_search(self): + factories.Dataset(title='Rivers') + factories.Dataset(title='Lakes') # decoy + + search_result = helpers.call_action('package_search', q='rivers') + + eq(search_result['results'][0]['title'], 'Rivers') + eq(search_result['count'], 1) + + def test_search_all(self): + factories.Dataset(title='Rivers') + factories.Dataset(title='Lakes') + + search_result = helpers.call_action('package_search') # no q + + eq(search_result['count'], 2) + + def test_bad_action_parameter(self): + nose.tools.assert_raises( + SearchError, + helpers.call_action, + 'package_search', weird_param=1) + + def test_bad_solr_parameter(self): + nose.tools.assert_raises( + SearchError, + helpers.call_action, + 'package_search', sort='metadata_modified') + # SOLR doesn't like that we didn't specify 'asc' or 'desc' + # SOLR error is 'Missing sort order' or 'Missing_sort_order', + # depending on the solr version. + + def test_facets(self): + org = factories.Organization() + factories.Dataset(owner_org=org['id']) + factories.Dataset(owner_org=org['id']) + + data_dict = {'facet.field': ['organization']} + search_result = helpers.call_action('package_search', **data_dict) + + eq(search_result['count'], 2) + eq(search_result['search_facets'], + {'organization': {'items': [{'count': 2, + 'display_name': u'Test Organization', + 'name': 'test_org_0'}], + 'title': 'organization'}}) + + def test_facet_limit(self): + group1 = factories.Group() + group2 = factories.Group() + factories.Dataset(groups=[{'name': group1['name']}, + {'name': group2['name']}]) + factories.Dataset(groups=[{'name': group1['name']}]) + factories.Dataset() + + data_dict = {'facet.field': ['groups'], + 'facet.limit': 1} + search_result = helpers.call_action('package_search', **data_dict) + + eq(len(search_result['search_facets']['groups']['items']), 1) + eq(search_result['search_facets'], + {'groups': {'items': [{'count': 2, + 'display_name': u'Test Group 0', + 'name': 'test_group_0'}], + 'title': 'groups'}}) + + def test_facet_no_limit(self): + group1 = factories.Group() + group2 = factories.Group() + factories.Dataset(groups=[{'name': group1['name']}, + {'name': group2['name']}]) + factories.Dataset(groups=[{'name': group1['name']}]) + factories.Dataset() + + data_dict = {'facet.field': ['groups'], + 'facet.limit': -1} # no limit + search_result = helpers.call_action('package_search', **data_dict) + + eq(len(search_result['search_facets']['groups']['items']), 2) + + def test_sort(self): + factories.Dataset(name='test0') + factories.Dataset(name='test1') + factories.Dataset(name='test2') + + search_result = helpers.call_action('package_search', + sort='metadata_created desc') + + result_names = [result['name'] for result in search_result['results']] + eq(result_names, [u'test2', u'test1', u'test0']) + def test_package_search_on_resource_name(self): ''' package_search() should allow searching on resource name field. ''' resource_name = 'resource_abc' - package = factories.Resource(name=resource_name) + factories.Resource(name=resource_name) search_result = helpers.call_action('package_search', q='resource_abc') eq(search_result['results'][0]['resources'][0]['name'], resource_name) From 34e1e3f05f52b9ec5b2c2ff79af9822e6d01dd0a Mon Sep 17 00:00:00 2001 From: David Read Date: Wed, 4 Nov 2015 17:12:01 +0000 Subject: [PATCH 5/6] [#2724] Improve log. --- ckan/logic/action/get.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 74ed45fe802..d8eaeb2c111 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -1945,7 +1945,8 @@ def package_search(context, data_dict): package_dict = item.before_view(package_dict) results.append(package_dict) else: - log.error('No package_dict is coming from solr for package with id {}'.format(package)) + log.error('No package_dict is coming from solr for package ' + 'id %s', package['id']) count = query.count facets = query.facets From 7a0ebbd8b783c183c6231c9f96d906c9e9263acc Mon Sep 17 00:00:00 2001 From: David Read Date: Thu, 5 Nov 2015 09:37:32 +0000 Subject: [PATCH 6/6] [#2724] Fix tests. --- ckan/tests/logic/action/test_get.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/ckan/tests/logic/action/test_get.py b/ckan/tests/logic/action/test_get.py index 83138302e77..066f49f0d22 100644 --- a/ckan/tests/logic/action/test_get.py +++ b/ckan/tests/logic/action/test_get.py @@ -877,7 +877,7 @@ def test_bad_solr_parameter(self): # depending on the solr version. def test_facets(self): - org = factories.Organization() + org = factories.Organization(name='test-org-facet', title='Test Org') factories.Dataset(owner_org=org['id']) factories.Dataset(owner_org=org['id']) @@ -887,13 +887,13 @@ def test_facets(self): eq(search_result['count'], 2) eq(search_result['search_facets'], {'organization': {'items': [{'count': 2, - 'display_name': u'Test Organization', - 'name': 'test_org_0'}], + 'display_name': u'Test Org', + 'name': 'test-org-facet'}], 'title': 'organization'}}) def test_facet_limit(self): - group1 = factories.Group() - group2 = factories.Group() + group1 = factories.Group(name='test-group-fl1', title='Test Group 1') + group2 = factories.Group(name='test-group-fl2', title='Test Group 2') factories.Dataset(groups=[{'name': group1['name']}, {'name': group2['name']}]) factories.Dataset(groups=[{'name': group1['name']}]) @@ -906,8 +906,8 @@ def test_facet_limit(self): eq(len(search_result['search_facets']['groups']['items']), 1) eq(search_result['search_facets'], {'groups': {'items': [{'count': 2, - 'display_name': u'Test Group 0', - 'name': 'test_group_0'}], + 'display_name': u'Test Group 1', + 'name': 'test-group-fl1'}], 'title': 'groups'}}) def test_facet_no_limit(self):