From 15345e3bb350505b8c1f6d804b62166675817228 Mon Sep 17 00:00:00 2001 From: Ian Murray Date: Tue, 15 Nov 2011 15:48:23 +0000 Subject: [PATCH] [solr tests] Testing that solr picks up on the flexible tags. --- ckan/lib/create_test_data.py | 12 ++--- ckan/tests/lib/test_solr_package_search.py | 57 +++++++++++++++++++++- 2 files changed, 61 insertions(+), 8 deletions(-) diff --git a/ckan/lib/create_test_data.py b/ckan/lib/create_test_data.py index 1f36a1a1e10..21cb3940ab2 100644 --- a/ckan/lib/create_test_data.py +++ b/ckan/lib/create_test_data.py @@ -535,7 +535,7 @@ def get_all_data(cls): search_items = [{'name':'gils', 'title':'Government Information Locator Service', 'url':'', - 'tags':'registry country-usa government federal gov workshop-20081101 penguin', + 'tags':'registry,country-usa,government,federal,gov,workshop-20081101,penguin'.split(','), 'resources':[{'url':'http://www.dcsf.gov.uk/rsgateway/DB/SFR/s000859/SFR17_2009_tables.xls', 'format':'XLS', 'last_modified': datetime.datetime(2005,10,01), @@ -559,7 +559,7 @@ def get_all_data(cls): 'title':'U.S. Government Photos and Graphics', 'url':'http://www.usa.gov/Topics/Graphics.shtml', 'download_url':'http://www.usa.gov/Topics/Graphics.shtml', - 'tags':'images graphics photographs photos pictures us usa america history wildlife nature war military todo-split gov penguin', + 'tags':'images,graphics,photographs,photos,pictures,us,usa,america,history,wildlife,nature,war,military,todo split,gov,penguin'.split(','), 'groups':'ukgov test1 penguin', 'license':'other-open', 'notes':'''## About @@ -575,7 +575,7 @@ def get_all_data(cls): 'title':'Text of US Federal Cases', 'url':'http://bulk.resource.org/courts.gov/', 'download_url':'http://bulk.resource.org/courts.gov/', - 'tags':'us courts case-law us courts case-law gov legal law access-bulk penguins penguin', + 'tags':'us,courts,case-law,us,courts,case-law,gov,legal,law,access-bulk,penguins,penguin'.split(','), 'groups':'ukgov test2 penguin', 'license':'cc-zero', 'notes':'''### Description @@ -590,7 +590,7 @@ def get_all_data(cls): }, {'name':'uk-government-expenditure', 'title':'UK Government Expenditure', - 'tags':'workshop-20081101 uk gov expenditure finance public funding penguin', + 'tags':'workshop-20081101,uk,gov,expenditure,finance,public,funding,penguin'.split(','), 'groups':'ukgov penguin', 'notes':'''Discussed at [Workshop on Public Information, 2008-11-02](http://okfn.org/wiki/PublicInformation). @@ -601,7 +601,7 @@ def get_all_data(cls): 'title':'Sweden - Government Offices of Sweden - Publications', 'url':'http://www.sweden.gov.se/sb/d/574', 'groups':'penguin', - 'tags':'country-sweden format-pdf access-www documents publications government eutransparency penguin', + 'tags':u'country-sweden,format-pdf,access-www,documents,publications,government,eutransparency,penguin,CAPITALS,surprise!,greek omega \u03a9,strange character \u0489'.split(','), 'license':'', 'notes':'''### About @@ -617,7 +617,7 @@ def get_all_data(cls): 'groups':'penguin', 'url':'http://www.opengov.se/', 'download_url':'http://www.opengov.se/data/open/', - 'tags':'country-sweden government data penguin', + 'tags':'country-sweden,government,data,penguin'.split(','), 'license':'cc-by-sa', 'notes':'''### About diff --git a/ckan/tests/lib/test_solr_package_search.py b/ckan/tests/lib/test_solr_package_search.py index 0c7dea8c089..6bebfd05044 100644 --- a/ckan/tests/lib/test_solr_package_search.py +++ b/ckan/tests/lib/test_solr_package_search.py @@ -118,12 +118,49 @@ def test_tags_field(self): result = search.query_for(model.Package).run({'q': u'country-sweden'}) assert self._check_entity_names(result, ['se-publications', 'se-opengov']), self._pkg_names(result) + def test_tags_field_split_word(self): + result = search.query_for(model.Package).run({'q': u'todo split'}) + assert self._check_entity_names(result, ['us-gov-images']), self._pkg_names(result) + + def test_tags_field_with_capitals(self): + result = search.query_for(model.Package).run({'q': u'CAPITALS'}) + assert self._check_entity_names(result, ['se-publications']), self._pkg_names(result) + + def test_tags_field_with_the_special_character_exclamation_mark(self): + """ + Asserts that a search for "surprise\\!" picks up the package with the "surprise!" tag. + + The reason for escaping the exclamation mark is that '!' is a special + character in the solr query syntax. And since we want to maintain being + able to run arbitrary solr searches through the package search, we + can't escape any special characters automatically. (As they may be + part of a genuine solr query expression). + + The reason for this test is two-fold: + + 1. It asserts that it's possible to find a packages with a special + character within it, as long as the query is correct. + + 2. It exhibits this behaviour of it being necessary to escape special + solr characters. + """ + result = search.query_for(model.Package).run({'q': u'surprise\\!'}) + assert self._check_entity_names(result, ['se-publications']), self._pkg_names(result) + + def dont_test_tags_field_with_basic_unicode(self): + result = search.query_for(model.Package).run({'q': u'greek omega \u03a9'}) + assert self._check_entity_names(result, ['se-publications']), self._pkg_names(result) + def test_tags_token_simple(self): result = search.query_for(model.Package).run({'q': u'tags:country-sweden'}) assert self._check_entity_names(result, ['se-publications', 'se-opengov']), self._pkg_names(result) result = search.query_for(model.Package).run({'q': u'tags:wildlife'}) assert self._pkg_names(result) == 'us-gov-images', self._pkg_names(result) + def test_tags_token_with_multi_word_tag(self): + result = search.query_for(model.Package).run({'q': u'tags:"todo split"'}) + assert self._check_entity_names(result, ['us-gov-images']), self._pkg_names(result) + def test_tags_token_simple_with_deleted_tag(self): # registry has been deleted result = search.query_for(model.Package).run({'q': u'tags:registry'}) @@ -132,11 +169,25 @@ def test_tags_token_simple_with_deleted_tag(self): def test_tags_token_multiple(self): result = search.query_for(model.Package).run({'q': u'tags:country-sweden tags:format-pdf'}) assert self._pkg_names(result) == 'se-publications', self._pkg_names(result) + result = search.query_for(model.Package).run({'q': u'tags:"todo split" tags:war'}) + assert self._pkg_names(result) == 'us-gov-images', self._pkg_names(result) def test_tags_token_complicated(self): result = search.query_for(model.Package).run({'q': u'tags:country-sweden tags:somethingrandom'}) assert self._pkg_names(result) == '', self._pkg_names(result) + def test_tags_token_with_capitals(self): + result = search.query_for(model.Package).run({'q': u'tags:"CAPITALS"'}) + assert self._check_entity_names(result, ['se-publications']), self._pkg_names(result) + + def test_tags_token_with_punctuation(self): + result = search.query_for(model.Package).run({'q': u'tags:"surprise!"'}) + assert self._check_entity_names(result, ['se-publications']), self._pkg_names(result) + + def dont_test_tags_token_with_basic_unicode(self): + result = search.query_for(model.Package).run({'q': u'tags:"greek omega \u03a9"'}) + assert self._check_entity_names(result, ['se-publications']), self._pkg_names(result) + def test_pagination(self): # large search all_results = search.query_for(model.Package).run({'q': self.q_all}) @@ -300,6 +351,8 @@ def test_overall(self): self._check_search_results('groups:david', 2) self._check_search_results('groups:roger', 1) self._check_search_results('groups:lenny', 0) + self._check_search_results('tags:"russian"', 2) + self._check_search_results(u'tags:"Flexible \u0489!"', 2) class TestGeographicCoverage(TestController): @@ -419,10 +472,10 @@ def setup_class(cls): setup_test_search_index() init_data = [{'name':u'test1-penguin-canary', 'title':u'penguin', - 'tags':u'canary goose squirrel wombat wombat'}, + 'tags':u'canary goose squirrel wombat wombat'.split()}, {'name':u'test2-squirrel-squirrel-canary-goose', 'title':u'squirrel goose', - 'tags':u'penguin wombat'}, + 'tags':u'penguin wombat'.split()}, ] CreateTestData.create_arbitrary(init_data) cls.pkg_names = [