From d074ef2b4dd8988053b4b3c9eaf411fc331a7922 Mon Sep 17 00:00:00 2001 From: Ian Murray Date: Tue, 22 Nov 2011 12:05:32 +0000 Subject: [PATCH] [tagname search] Made /tag search case-insensitive. - This affects tagname autocomplete - Also fixed up a bug where '_' and '%' weren't being escaped in SQL LIKE expressions --- ckan/logic/action/get.py | 4 +- ckan/model/misc.py | 17 ++++++++ ckan/tests/functional/api/test_action.py | 27 +++++++------ ckan/tests/lib/test_solr_package_search.py | 7 +++- ckan/tests/lib/test_tag_search.py | 4 +- ckan/tests/models/test_misc.py | 45 ++++++++++++++++++++++ 6 files changed, 85 insertions(+), 19 deletions(-) create mode 100644 ckan/model/misc.py diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 0f99d5297ce..064bf3a330f 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -6,6 +6,7 @@ import ckan from ckan.logic import NotFound from ckan.logic import check_access +from ckan.model import misc from ckan.plugins import (PluginImplementations, IGroupController, IPackageController) @@ -789,7 +790,8 @@ def tag_search(context, data_dict): return for term in terms: - q = q.filter(model.Tag.name.contains(term)) + escaped_term = misc.escape_sql_like_special_characters(term, escape='\\') + q = q.filter(model.Tag.name.ilike('%' + escaped_term + '%')) count = q.count() q = q.offset(offset) diff --git a/ckan/model/misc.py b/ckan/model/misc.py new file mode 100644 index 00000000000..260e3a2a106 --- /dev/null +++ b/ckan/model/misc.py @@ -0,0 +1,17 @@ +""" +Contains miscelaneous set of DB-related functions +""" + +import re + +_special_characters = '%_' +def escape_sql_like_special_characters(term, escape='\\'): + """ + Escapes characters that are special to the the sql LIKE expression. + + In particular, for both postgres and sqlite this means '%' and '_'. + """ + for ch in escape + _special_characters: + term = term.replace(ch, escape+ch) + return term + diff --git a/ckan/tests/functional/api/test_action.py b/ckan/tests/functional/api/test_action.py index 180a95ade98..9f91aad20ac 100644 --- a/ckan/tests/functional/api/test_action.py +++ b/ckan/tests/functional/api/test_action.py @@ -663,13 +663,6 @@ def test_15_tag_autocomplete_tag_with_punctuation(self): def test_15_tag_autocomplete_tag_with_capital_letters(self): """ Asserts autocomplete finds tags that contain capital letters - - Note : the only reason this test passes for now is that the - search is for a lower cases substring. If we had searched - for "CAPITAL" then it would fail. This is because currently - the search API lower cases all search terms. There's a - sister test (`test_15_tag_autocomplete_search_with_capital_letters`) - that tests that functionality. """ CreateTestData.create_arbitrary([{ @@ -738,13 +731,6 @@ def test_15_tag_autocomplete_search_with_punctuation(self): def test_15_tag_autocomplete_search_with_capital_letters(self): """ Asserts that a search term containing capital letters works correctly - - NOTE - this test FAILS. This is because I haven't implemented the - search side of flexible tags yet. - - NOTE - when this test is fixed, remove the NOTE in - `test_15_tag_autocomplete_tag_with_capital_letters` that - references this one. """ CreateTestData.create_arbitrary([{ @@ -759,6 +745,19 @@ def test_15_tag_autocomplete_search_with_capital_letters(self): assert res_obj['success'] assert 'CAPITAL idea old chap' in res_obj['result'], res_obj['result'] + def test_15_tag_autocomplete_is_case_insensitive(self): + CreateTestData.create_arbitrary([{ + 'name': u'package-with-tag-that-has-a-capital-letter-3', + 'tags': [u'MIX of CAPITALS and LOWER case'], + 'license': 'never_heard_of_it', + }]) + + postparams = '%s=1' % json.dumps({'q':u'lower case'}) + res = self.app.post('/api/action/tag_autocomplete', params=postparams) + res_obj = json.loads(res.body) + assert res_obj['success'] + assert 'MIX of CAPITALS and LOWER case' in res_obj['result'], res_obj['result'] + def test_16_user_autocomplete(self): #Empty query postparams = '%s=1' % json.dumps({}) diff --git a/ckan/tests/lib/test_solr_package_search.py b/ckan/tests/lib/test_solr_package_search.py index 8ec00899f18..5d18bcb1faf 100644 --- a/ckan/tests/lib/test_solr_package_search.py +++ b/ckan/tests/lib/test_solr_package_search.py @@ -164,10 +164,10 @@ def test_tags_token_with_punctuation(self): result = search.query_for(model.Package).run({'q': u'tags:"surprise."'}) assert self._check_entity_names(result, ['se-publications']), self._pkg_names(result) - def dont_test_tags_token_with_basic_unicode(self): + def test_tags_token_with_basic_unicode(self): result = search.query_for(model.Package).run({'q': u'tags:"greek omega \u03a9"'}) assert self._check_entity_names(result, ['se-publications']), self._pkg_names(result) - + def test_pagination(self): # large search all_results = search.query_for(model.Package).run({'q': self.q_all}) @@ -333,6 +333,9 @@ def test_overall(self): self._check_search_results('groups:lenny', 0) self._check_search_results('tags:"russian"', 2) self._check_search_results(u'tags:"Flexible \u30a1"', 2) + self._check_search_results(u'Flexible \u30a1', 2) + self._check_search_results(u'Flexible', 2) + self._check_search_results(u'flexible', 2) class TestGeographicCoverage(TestController): diff --git a/ckan/tests/lib/test_tag_search.py b/ckan/tests/lib/test_tag_search.py index 3ba9c5a6385..9047e13e5aa 100644 --- a/ckan/tests/lib/test_tag_search.py +++ b/ckan/tests/lib/test_tag_search.py @@ -51,9 +51,9 @@ def test_search_with_unicode_in_search_query(self): result = search.query_for(model.Tag).run(query=u' \u30a1') assert u'Flexible \u30a1' in result['results'] - def test_search_is_case_sensitive(self): + def test_search_is_case_insensitive(self): result = search.query_for(model.Tag).run(query=u'flexible') - assert u'Flexible \u30a1' not in result['results'] + assert u'Flexible \u30a1' in result['results'] def test_good_search_fields(self): diff --git a/ckan/tests/models/test_misc.py b/ckan/tests/models/test_misc.py index c5e76fda709..2f73b4771af 100644 --- a/ckan/tests/models/test_misc.py +++ b/ckan/tests/models/test_misc.py @@ -1,5 +1,8 @@ +from nose.tools import assert_equal + from ckan.tests import * import ckan.model as model +from ckan.model.misc import escape_sql_like_special_characters class TestRevisionExtraAttributes: @classmethod @@ -23,3 +26,45 @@ def test_revision_user(self): assert rev.user is not None, rev assert rev.user.name == rev.author +_sql_escape = escape_sql_like_special_characters +class TestEscapeSqlLikeCharacters(object): + """ + Tests for model.misc.escape_sql_like_special_characters + """ + + def test_identity(self): + """Asserts that it escapes nothing if nothing needs escaping""" + terms = ['', + 'word', + 'two words'] + for term, expected_term in zip(terms, terms): + assert_equal(_sql_escape(term), expected_term) + + def test_escape_chararacter_is_escaped(self): + """Asserts that the escape character is escaped""" + term = r'backslash \ character' + assert_equal (_sql_escape(term, escape='\\'), + r'backslash \\ character') + + term = 'surprise!' + assert_equal (_sql_escape(term, escape='!'), + r'surprise!!') + + def test_default_escape_character_is_a_backslash(self): + """Asserts that the default escape character is the backslash""" + term = r'backslash \ character' + assert_equal (_sql_escape(term), + r'backslash \\ character') + + def test_sql_like_special_characters_are_escaped(self): + """Asserts that '%' and '_' are escaped correctly""" + terms = [ + (r'percents %', r'percents \%'), + (r'underscores _', r'underscores \_'), + (r'backslash \ ', r'backslash \\ '), + (r'all three \ _%', r'all three \\ \_\%'), + ] + + for term, expected_result in terms: + assert_equal(_sql_escape(term), expected_result) +