From 15345e3bb350505b8c1f6d804b62166675817228 Mon Sep 17 00:00:00 2001
From: Ian Murray <ian.c.murray@gmail.com>
Date: Tue, 15 Nov 2011 15:48:23 +0000
Subject: [PATCH] [solr tests] Testing that solr picks up on the flexible tags.

---
 ckan/lib/create_test_data.py               | 12 ++---
 ckan/tests/lib/test_solr_package_search.py | 57 +++++++++++++++++++++-
 2 files changed, 61 insertions(+), 8 deletions(-)

diff --git a/ckan/lib/create_test_data.py b/ckan/lib/create_test_data.py
index 1f36a1a1e10..21cb3940ab2 100644
--- a/ckan/lib/create_test_data.py
+++ b/ckan/lib/create_test_data.py
@@ -535,7 +535,7 @@ def get_all_data(cls):
 search_items = [{'name':'gils',
               'title':'Government Information Locator Service',
               'url':'',
-              'tags':'registry  country-usa  government  federal  gov  workshop-20081101 penguin',
+              'tags':'registry,country-usa,government,federal,gov,workshop-20081101,penguin'.split(','),
               'resources':[{'url':'http://www.dcsf.gov.uk/rsgateway/DB/SFR/s000859/SFR17_2009_tables.xls',
                           'format':'XLS',
                           'last_modified': datetime.datetime(2005,10,01),
@@ -559,7 +559,7 @@ def get_all_data(cls):
               'title':'U.S. Government Photos and Graphics',
               'url':'http://www.usa.gov/Topics/Graphics.shtml',
               'download_url':'http://www.usa.gov/Topics/Graphics.shtml',
-              'tags':'images  graphics  photographs  photos  pictures  us  usa  america  history  wildlife  nature  war  military  todo-split  gov penguin',
+              'tags':'images,graphics,photographs,photos,pictures,us,usa,america,history,wildlife,nature,war,military,todo split,gov,penguin'.split(','),
               'groups':'ukgov test1 penguin',
               'license':'other-open',
               'notes':'''## About
@@ -575,7 +575,7 @@ def get_all_data(cls):
               'title':'Text of US Federal Cases',
               'url':'http://bulk.resource.org/courts.gov/',
               'download_url':'http://bulk.resource.org/courts.gov/',
-              'tags':'us  courts  case-law  us  courts  case-law  gov  legal  law  access-bulk  penguins penguin',
+              'tags':'us,courts,case-law,us,courts,case-law,gov,legal,law,access-bulk,penguins,penguin'.split(','),
               'groups':'ukgov test2 penguin',
               'license':'cc-zero',
               'notes':'''### Description
@@ -590,7 +590,7 @@ def get_all_data(cls):
               },
              {'name':'uk-government-expenditure',
               'title':'UK Government Expenditure',
-              'tags':'workshop-20081101  uk  gov  expenditure  finance  public  funding penguin',
+              'tags':'workshop-20081101,uk,gov,expenditure,finance,public,funding,penguin'.split(','),
               'groups':'ukgov penguin',              
               'notes':'''Discussed at [Workshop on Public Information, 2008-11-02](http://okfn.org/wiki/PublicInformation).
 
@@ -601,7 +601,7 @@ def get_all_data(cls):
               'title':'Sweden - Government Offices of Sweden - Publications',
               'url':'http://www.sweden.gov.se/sb/d/574',
               'groups':'penguin',              
-              'tags':'country-sweden  format-pdf  access-www  documents  publications  government  eutransparency penguin',
+              'tags':u'country-sweden,format-pdf,access-www,documents,publications,government,eutransparency,penguin,CAPITALS,surprise!,greek omega \u03a9,strange character \u0489'.split(','),
               'license':'',
               'notes':'''### About
 
@@ -617,7 +617,7 @@ def get_all_data(cls):
               'groups':'penguin',              
               'url':'http://www.opengov.se/',
               'download_url':'http://www.opengov.se/data/open/',
-              'tags':'country-sweden  government  data penguin',
+              'tags':'country-sweden,government,data,penguin'.split(','),
               'license':'cc-by-sa',
               'notes':'''### About
 
diff --git a/ckan/tests/lib/test_solr_package_search.py b/ckan/tests/lib/test_solr_package_search.py
index 0c7dea8c089..6bebfd05044 100644
--- a/ckan/tests/lib/test_solr_package_search.py
+++ b/ckan/tests/lib/test_solr_package_search.py
@@ -118,12 +118,49 @@ def test_tags_field(self):
         result = search.query_for(model.Package).run({'q': u'country-sweden'})
         assert self._check_entity_names(result, ['se-publications', 'se-opengov']), self._pkg_names(result)
 
+    def test_tags_field_split_word(self):
+        result = search.query_for(model.Package).run({'q': u'todo split'})
+        assert self._check_entity_names(result, ['us-gov-images']), self._pkg_names(result)
+
+    def test_tags_field_with_capitals(self):
+        result = search.query_for(model.Package).run({'q': u'CAPITALS'})
+        assert self._check_entity_names(result, ['se-publications']), self._pkg_names(result)
+
+    def test_tags_field_with_the_special_character_exclamation_mark(self):
+        """
+        Asserts that a search for "surprise\\!" picks up the package with the "surprise!" tag.
+
+        The reason for escaping the exclamation mark is that '!' is a special
+        character in the solr query syntax.  And since we want to maintain being
+        able to run arbitrary solr searches through the package search, we
+        can't escape any special characters automatically. (As they may be
+        part of a genuine solr query expression).
+
+        The reason for this test is two-fold:
+
+         1. It asserts that it's possible to find a packages with a special
+            character within it, as long as the query is correct.
+
+         2. It exhibits this behaviour of it being necessary to escape special
+            solr characters.
+        """
+        result = search.query_for(model.Package).run({'q': u'surprise\\!'})
+        assert self._check_entity_names(result, ['se-publications']), self._pkg_names(result)
+
+    def dont_test_tags_field_with_basic_unicode(self):
+        result = search.query_for(model.Package).run({'q': u'greek omega \u03a9'})
+        assert self._check_entity_names(result, ['se-publications']), self._pkg_names(result)
+        
     def test_tags_token_simple(self):
         result = search.query_for(model.Package).run({'q': u'tags:country-sweden'})
         assert self._check_entity_names(result, ['se-publications', 'se-opengov']), self._pkg_names(result)
         result = search.query_for(model.Package).run({'q': u'tags:wildlife'})
         assert self._pkg_names(result) == 'us-gov-images', self._pkg_names(result)
 
+    def test_tags_token_with_multi_word_tag(self):
+        result = search.query_for(model.Package).run({'q': u'tags:"todo split"'})
+        assert self._check_entity_names(result, ['us-gov-images']), self._pkg_names(result)
+    
     def test_tags_token_simple_with_deleted_tag(self):
         # registry has been deleted
         result = search.query_for(model.Package).run({'q': u'tags:registry'})
@@ -132,11 +169,25 @@ def test_tags_token_simple_with_deleted_tag(self):
     def test_tags_token_multiple(self):
         result = search.query_for(model.Package).run({'q': u'tags:country-sweden tags:format-pdf'})
         assert self._pkg_names(result) == 'se-publications', self._pkg_names(result)
+        result = search.query_for(model.Package).run({'q': u'tags:"todo split" tags:war'})
+        assert self._pkg_names(result) == 'us-gov-images', self._pkg_names(result)
 
     def test_tags_token_complicated(self):
         result = search.query_for(model.Package).run({'q': u'tags:country-sweden tags:somethingrandom'})
         assert self._pkg_names(result) == '', self._pkg_names(result)
 
+    def test_tags_token_with_capitals(self):
+        result = search.query_for(model.Package).run({'q': u'tags:"CAPITALS"'})
+        assert self._check_entity_names(result, ['se-publications']), self._pkg_names(result)
+
+    def test_tags_token_with_punctuation(self):
+        result = search.query_for(model.Package).run({'q': u'tags:"surprise!"'})
+        assert self._check_entity_names(result, ['se-publications']), self._pkg_names(result)
+
+    def dont_test_tags_token_with_basic_unicode(self):
+        result = search.query_for(model.Package).run({'q': u'tags:"greek omega \u03a9"'})
+        assert self._check_entity_names(result, ['se-publications']), self._pkg_names(result)
+        
     def test_pagination(self):
         # large search
         all_results = search.query_for(model.Package).run({'q': self.q_all})
@@ -300,6 +351,8 @@ def test_overall(self):
         self._check_search_results('groups:david', 2)
         self._check_search_results('groups:roger', 1)
         self._check_search_results('groups:lenny', 0)
+        self._check_search_results('tags:"russian"', 2)
+        self._check_search_results(u'tags:"Flexible \u0489!"', 2)
         
 
 class TestGeographicCoverage(TestController):
@@ -419,10 +472,10 @@ def setup_class(cls):
         setup_test_search_index()
         init_data = [{'name':u'test1-penguin-canary',
                       'title':u'penguin',
-                      'tags':u'canary goose squirrel wombat wombat'},
+                      'tags':u'canary goose squirrel wombat wombat'.split()},
                      {'name':u'test2-squirrel-squirrel-canary-goose',
                       'title':u'squirrel goose',
-                      'tags':u'penguin wombat'},
+                      'tags':u'penguin wombat'.split()},
                      ]
         CreateTestData.create_arbitrary(init_data)
         cls.pkg_names = [