Skip to content

Commit

Permalink
Merge pull request #2725 from ckan/alexandru-m-g-package-search-facet…
Browse files Browse the repository at this point in the history
…s-speed

Package search facets speed
  • Loading branch information
wardi committed Nov 5, 2015
2 parents bdbb5a8 + 7a0ebbd commit b4592ab
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 174 deletions.
22 changes: 16 additions & 6 deletions ckan/logic/action/get.py
Expand Up @@ -1945,7 +1945,8 @@ def package_search(context, data_dict):
package_dict = item.before_view(package_dict)
results.append(package_dict)
else:
results.append(model_dictize.package_dictize(pkg, context))
log.error('No package_dict is coming from solr for package '
'id %s', package['id'])

count = query.count
facets = query.facets
Expand All @@ -1961,6 +1962,17 @@ def package_search(context, data_dict):
'sort': data_dict['sort']
}

# create a lookup table of group name to title for all the groups and
# organizations in the current search's facets.
group_names = []
for field_name in ('groups', 'organization'):
group_names.extend(facets.get(field_name, {}).keys())

groups = session.query(model.Group.name, model.Group.title) \
.filter(model.Group.name.in_(group_names)) \
.all()
group_titles_by_name = dict(groups)

# Transform facets into a more useful data structure.
restructured_facets = {}
for key, value in facets.items():
Expand All @@ -1972,11 +1984,9 @@ def package_search(context, data_dict):
new_facet_dict = {}
new_facet_dict['name'] = key_
if key in ('groups', 'organization'):
group = model.Group.get(key_)
if group:
new_facet_dict['display_name'] = group.display_name
else:
new_facet_dict['display_name'] = key_
display_name = group_titles_by_name.get(key_, key_)
display_name = display_name if display_name and display_name.strip() else key_
new_facet_dict['display_name'] = display_name
elif key == 'license_id':
license = model.Package.get_license_register().get(key_)
if license:
Expand Down
167 changes: 0 additions & 167 deletions ckan/tests/legacy/logic/test_action.py
Expand Up @@ -1053,173 +1053,6 @@ def test_2_update_many(self):
json.loads(res.body)




class TestActionPackageSearch(WsgiAppCase):

@classmethod
def setup_class(cls):
setup_test_search_index()
CreateTestData.create()
cls.sysadmin_user = model.User.get('testsysadmin')

@classmethod
def teardown_class(self):
model.repo.rebuild_db()

def test_1_basic(self):
params = {
'q':'tolstoy',
'facet.field': ['groups', 'tags', 'res_format', 'license'],
'rows': 20,
'start': 0,
}
postparams = '%s=1' % json.dumps(params)
res = self.app.post('/api/action/package_search', params=postparams)
res = json.loads(res.body)
result = res['result']
assert_equal(res['success'], True)
assert_equal(result['count'], 1)
assert_equal(result['results'][0]['name'], 'annakarenina')

# Test GET request
params_json_list = params
params_json_list['facet.field'] = json.dumps(params['facet.field'])
url_params = urllib.urlencode(params_json_list)
res = self.app.get('/api/action/package_search?{0}'.format(url_params))
res = json.loads(res.body)
result = res['result']
assert_equal(res['success'], True)
assert_equal(result['count'], 1)
assert_equal(result['results'][0]['name'], 'annakarenina')

def test_1_facet_limit(self):
params = {
'q':'*:*',
'facet.field': ['groups', 'tags', 'res_format', 'license'],
'rows': 20,
'start': 0,
}
postparams = '%s=1' % json.dumps(params)
res = self.app.post('/api/action/package_search', params=postparams)
res = json.loads(res.body)
assert_equal(res['success'], True)

assert_equal(len(res['result']['search_facets']['groups']['items']), 2)

params = {
'q':'*:*',
'facet.field': ['groups', 'tags', 'res_format', 'license'],
'facet.limit': 1,
'rows': 20,
'start': 0,
}
postparams = '%s=1' % json.dumps(params)
res = self.app.post('/api/action/package_search', params=postparams)
res = json.loads(res.body)
assert_equal(res['success'], True)

assert_equal(len(res['result']['search_facets']['groups']['items']), 1)

params = {
'q':'*:*',
'facet.field': ['groups', 'tags', 'res_format', 'license'],
'facet.limit': -1, # No limit
'rows': 20,
'start': 0,
}
postparams = '%s=1' % json.dumps(params)
res = self.app.post('/api/action/package_search', params=postparams)
res = json.loads(res.body)
assert_equal(res['success'], True)

assert_equal(len(res['result']['search_facets']['groups']['items']), 2)

def test_1_basic_no_params(self):
postparams = '%s=1' % json.dumps({})
res = self.app.post('/api/action/package_search', params=postparams)
res = json.loads(res.body)
result = res['result']
assert_equal(res['success'], True)
assert_equal(result['count'], 2)
assert result['results'][0]['name'] in ('annakarenina', 'warandpeace')

# Test GET request
res = self.app.get('/api/action/package_search')
res = json.loads(res.body)
result = res['result']
assert_equal(res['success'], True)
assert_equal(result['count'], 2)
assert result['results'][0]['name'] in ('annakarenina', 'warandpeace')

def test_2_bad_param(self):
postparams = '%s=1' % json.dumps({
'sort':'metadata_modified',
})
res = self.app.post('/api/action/package_search', params=postparams,
status=409)
assert '"message": "Search error:' in res.body, res.body
assert 'SOLR returned an error' in res.body, res.body
# solr error is 'Missing sort order' or 'Missing_sort_order',
# depending on the solr version.
assert 'sort' in res.body, res.body

def test_3_bad_param(self):
postparams = '%s=1' % json.dumps({
'weird_param':True,
})
res = self.app.post('/api/action/package_search', params=postparams,
status=400)
assert '"message": "Search Query is invalid:' in res.body, res.body
assert '"Invalid search parameters: [\'weird_param\']' in res.body, res.body

def test_4_sort_by_metadata_modified(self):
search_params = '%s=1' % json.dumps({
'q': '*:*',
'fl': 'name, metadata_modified',
'sort': u'metadata_modified desc'
})

# modify warandpeace, check that it is the first search result
rev = model.repo.new_revision()
pkg = model.Package.get('warandpeace')
pkg.title = "War and Peace [UPDATED]"

pkg.metadata_modified = datetime.datetime.utcnow()
model.repo.commit_and_remove()

res = self.app.post('/api/action/package_search', params=search_params)
result = json.loads(res.body)['result']
result_names = [r['name'] for r in result['results']]
assert result_names == ['warandpeace', 'annakarenina'], result_names

# modify annakarenina, check that it is the first search result
rev = model.repo.new_revision()
pkg = model.Package.get('annakarenina')
pkg.title = "A Novel By Tolstoy [UPDATED]"
pkg.metadata_modified = datetime.datetime.utcnow()
model.repo.commit_and_remove()

res = self.app.post('/api/action/package_search', params=search_params)
result = json.loads(res.body)['result']
result_names = [r['name'] for r in result['results']]
assert result_names == ['annakarenina', 'warandpeace'], result_names

# add a tag to warandpeace, check that it is the first result
pkg = model.Package.get('warandpeace')
pkg_params = '%s=1' % json.dumps({'id': pkg.id})
res = self.app.post('/api/action/package_show', params=pkg_params)
pkg_dict = json.loads(res.body)['result']
pkg_dict['tags'].append({'name': 'new-tag'})
pkg_params = '%s=1' % json.dumps(pkg_dict)
res = self.app.post('/api/action/package_update', params=pkg_params,
extra_environ={'Authorization': str(self.sysadmin_user.apikey)})

res = self.app.post('/api/action/package_search', params=search_params)
result = json.loads(res.body)['result']
result_names = [r['name'] for r in result['results']]
assert result_names == ['warandpeace', 'annakarenina'], result_names

class MockPackageSearchPlugin(SingletonPlugin):
implements(IPackageController, inherit=True)

Expand Down
94 changes: 93 additions & 1 deletion ckan/tests/logic/action/test_get.py
Expand Up @@ -5,6 +5,7 @@
import ckan.tests.helpers as helpers
import ckan.tests.factories as factories
import ckan.logic.schema as schema
from ckan.lib.search.common import SearchError


eq = nose.tools.eq_
Expand Down Expand Up @@ -843,12 +844,103 @@ def test_package_autocomplete_does_not_return_private_datasets(self):

class TestPackageSearch(helpers.FunctionalTestBase):

def test_search(self):
factories.Dataset(title='Rivers')
factories.Dataset(title='Lakes') # decoy

search_result = helpers.call_action('package_search', q='rivers')

eq(search_result['results'][0]['title'], 'Rivers')
eq(search_result['count'], 1)

def test_search_all(self):
factories.Dataset(title='Rivers')
factories.Dataset(title='Lakes')

search_result = helpers.call_action('package_search') # no q

eq(search_result['count'], 2)

def test_bad_action_parameter(self):
nose.tools.assert_raises(
SearchError,
helpers.call_action,
'package_search', weird_param=1)

def test_bad_solr_parameter(self):
nose.tools.assert_raises(
SearchError,
helpers.call_action,
'package_search', sort='metadata_modified')
# SOLR doesn't like that we didn't specify 'asc' or 'desc'
# SOLR error is 'Missing sort order' or 'Missing_sort_order',
# depending on the solr version.

def test_facets(self):
org = factories.Organization(name='test-org-facet', title='Test Org')
factories.Dataset(owner_org=org['id'])
factories.Dataset(owner_org=org['id'])

data_dict = {'facet.field': ['organization']}
search_result = helpers.call_action('package_search', **data_dict)

eq(search_result['count'], 2)
eq(search_result['search_facets'],
{'organization': {'items': [{'count': 2,
'display_name': u'Test Org',
'name': 'test-org-facet'}],
'title': 'organization'}})

def test_facet_limit(self):
group1 = factories.Group(name='test-group-fl1', title='Test Group 1')
group2 = factories.Group(name='test-group-fl2', title='Test Group 2')
factories.Dataset(groups=[{'name': group1['name']},
{'name': group2['name']}])
factories.Dataset(groups=[{'name': group1['name']}])
factories.Dataset()

data_dict = {'facet.field': ['groups'],
'facet.limit': 1}
search_result = helpers.call_action('package_search', **data_dict)

eq(len(search_result['search_facets']['groups']['items']), 1)
eq(search_result['search_facets'],
{'groups': {'items': [{'count': 2,
'display_name': u'Test Group 1',
'name': 'test-group-fl1'}],
'title': 'groups'}})

def test_facet_no_limit(self):
group1 = factories.Group()
group2 = factories.Group()
factories.Dataset(groups=[{'name': group1['name']},
{'name': group2['name']}])
factories.Dataset(groups=[{'name': group1['name']}])
factories.Dataset()

data_dict = {'facet.field': ['groups'],
'facet.limit': -1} # no limit
search_result = helpers.call_action('package_search', **data_dict)

eq(len(search_result['search_facets']['groups']['items']), 2)

def test_sort(self):
factories.Dataset(name='test0')
factories.Dataset(name='test1')
factories.Dataset(name='test2')

search_result = helpers.call_action('package_search',
sort='metadata_created desc')

result_names = [result['name'] for result in search_result['results']]
eq(result_names, [u'test2', u'test1', u'test0'])

def test_package_search_on_resource_name(self):
'''
package_search() should allow searching on resource name field.
'''
resource_name = 'resource_abc'
package = factories.Resource(name=resource_name)
factories.Resource(name=resource_name)

search_result = helpers.call_action('package_search', q='resource_abc')
eq(search_result['results'][0]['resources'][0]['name'], resource_name)
Expand Down

0 comments on commit b4592ab

Please sign in to comment.