Skip to content

Commit

Permalink
Merge pull request #4191 from smotornyuk/solr-package-autocomplete
Browse files Browse the repository at this point in the history
[#3993] package_autocomplete use solr
  • Loading branch information
wardi committed Dec 4, 2018
2 parents 99bd644 + 679d0cb commit 17577dd
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 19 deletions.
18 changes: 17 additions & 1 deletion ckan/config/solr/schema.xml
Expand Up @@ -24,7 +24,7 @@
<!-- We update the version when there is a backward-incompatible change to this
schema. In this case the version should be set to the next CKAN version number.
(x.y but not x.y.z since it needs to be a float) -->
<schema name="ckan" version="2.8">
<schema name="ckan" version="2.9">

<types>
<fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
Expand Down Expand Up @@ -81,6 +81,18 @@ schema. In this case the version should be set to the next CKAN version number.
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>

<fieldType name="text_ngram" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.NGramTokenizerFactory" minGramSize="2" maxGramSize="10"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.EdgeNGramTokenizerFactory" minGramSize="2" maxGramSize="10"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>

</types>


Expand All @@ -89,10 +101,12 @@ schema. In this case the version should be set to the next CKAN version number.
<field name="id" type="string" indexed="true" stored="true" required="true" />
<field name="site_id" type="string" indexed="true" stored="true" required="true" />
<field name="title" type="text" indexed="true" stored="true" />
<field name="title_ngram" type="text_ngram" indexed="true" stored="true" />
<field name="entity_type" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="dataset_type" type="string" indexed="true" stored="true" />
<field name="state" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="name" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="name_ngram" type="text_ngram" indexed="true" stored="true" />
<field name="revision_id" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="version" type="string" indexed="true" stored="true" />
<field name="url" type="string" indexed="true" stored="true" omitNorms="true" />
Expand Down Expand Up @@ -165,6 +179,8 @@ schema. In this case the version should be set to the next CKAN version number.
<solrQueryParser defaultOperator="AND"/>

<copyField source="url" dest="urls"/>
<copyField source="title" dest="title_ngram"/>
<copyField source="name" dest="name_ngram"/>
<copyField source="ckan_url" dest="urls"/>
<copyField source="download_url" dest="urls"/>
<copyField source="res_url" dest="urls"/>
Expand Down
2 changes: 1 addition & 1 deletion ckan/lib/search/__init__.py
Expand Up @@ -31,7 +31,7 @@ def text_traceback():
return res


SUPPORTED_SCHEMA_VERSIONS = ['2.8']
SUPPORTED_SCHEMA_VERSIONS = ['2.8', '2.9']

DEFAULT_OPTIONS = {
'limit': 20,
Expand Down
42 changes: 27 additions & 15 deletions ckan/logic/action/get.py
Expand Up @@ -1515,34 +1515,46 @@ def package_autocomplete(context, data_dict):
:rtype: list of dictionaries
'''
model = context['model']

_check_access('package_autocomplete', context, data_dict)
user = context.get('user')

limit = data_dict.get('limit', 10)
q = data_dict['q']

like_q = u"%s%%" % q
# enforce permission filter based on user
if context.get('ignore_auth') or (user and authz.is_sysadmin(user)):
labels = None
else:
labels = lib_plugins.get_permission_labels().get_user_dataset_labels(
context['auth_user_obj']
)

data_dict = {
'q': ' OR '.join([
'name_ngram:{0}',
'title_ngram:{0}',
'name:{0}',
'title:{0}',
]).format(search.query.solr_literal(q)),
'fl': ['name', 'title'],
'rows': limit
}
query = search.query_for(model.Package)

query = model.Session.query(model.Package)
query = query.filter(model.Package.state == 'active')
query = query.filter(model.Package.private == False)
query = query.filter(_or_(model.Package.name.ilike(like_q),
model.Package.title.ilike(like_q)))
query = query.limit(limit)
results = query.run(data_dict, permission_labels=labels)['results']

q_lower = q.lower()
pkg_list = []
for package in query:
if package.name.startswith(q_lower):
for package in results:
if q_lower in package['name']:
match_field = 'name'
match_displayed = package.name
match_displayed = package['name']
else:
match_field = 'title'
match_displayed = '%s (%s)' % (package.title, package.name)
match_displayed = '%s (%s)' % (package['title'], package['name'])
result_dict = {
'name': package.name,
'title': package.title,
'name': package['name'],
'title': package['title'],
'match_field': match_field,
'match_displayed': match_displayed}
pkg_list.append(result_dict)
Expand Down
33 changes: 31 additions & 2 deletions ckan/tests/logic/action/test_get.py
Expand Up @@ -923,10 +923,39 @@ def test_package_autocomplete_does_not_return_private_datasets(self):
dataset2 = factories.Dataset(user=user, owner_org=org['name'],
private=True, title='Some private stuff')

package_list = helpers.call_action('package_autocomplete',
q='some')
package_list = helpers.call_action(
'package_autocomplete', context={'ignore_auth': False}, q='some'
)
eq(len(package_list), 1)

def test_package_autocomplete_does_return_private_datasets_from_my_org(self):
user = factories.User()
org = factories.Organization(
users=[{'name': user['name'], 'capacity': 'member'}]
)
factories.Dataset(
user=user, owner_org=org['id'], title='Some public stuff'
)
factories.Dataset(
user=user, owner_org=org['id'], private=True,
title='Some private stuff'
)
package_list = helpers.call_action(
'package_autocomplete',
context={'user': user['name'], 'ignore_auth': False},
q='some'
)
eq(len(package_list), 2)

def test_package_autocomplete_works_for_the_middle_part_of_title(self):
factories.Dataset(title='Some public stuff')
factories.Dataset(title='Some random stuff')

package_list = helpers.call_action('package_autocomplete', q='bli')
eq(len(package_list), 1)
package_list = helpers.call_action('package_autocomplete', q='tuf')
eq(len(package_list), 2)


class TestPackageSearch(helpers.FunctionalTestBase):

Expand Down

0 comments on commit 17577dd

Please sign in to comment.