[#1067] Resolve issues with % in column names in search

ckan · Jul 3, 2013 · 773b0e1 · 773b0e1
1 parent 45cb716
commit 773b0e1
Show file tree

Hide file tree

Showing 3 changed files with 47 additions and 30 deletions.
diff --git a/ckanext/datastore/db.py b/ckanext/datastore/db.py
@@ -334,14 +334,14 @@ def create_table(context, data_dict):
 
     fields = datastore_fields + supplied_fields + extra_fields
     sql_fields = u", ".join([u'"{0}" {1}'.format(
-        f['id'].replace('%', '%%'), f['type']) for f in fields])
+        f['id'], f['type']) for f in fields])
 
     sql_string = u'CREATE TABLE "{0}" ({1});'.format(
         data_dict['resource_id'],
         sql_fields
     )
 
-    context['connection'].execute(sql_string)
+    context['connection'].execute(sql_string.replace('%', '%%'))
 
 
 def _get_aliases(context, data_dict):
@@ -454,16 +454,17 @@ def generate_index_name():
                             index)]
                 })
         fields_string = u', '.join(
-            ['(("{0}").json::text)'.format(field.replace('%', '%%'))
+            ['(("{0}").json::text)'.format(field)
                 if field in json_fields else
-                '"%s"' % field.replace('%', '%%')
+                '"%s"' % field
                 for field in index_fields])
         sql_index_strings.append(sql_index_string.format(
             res_id=data_dict['resource_id'],
             unique='unique' if index == primary_key else '',
             name=generate_index_name(),
             fields=fields_string))
 
+    sql_index_strings = map(lambda x: x.replace('%', '%%'), sql_index_strings)
     map(context['connection'].execute, sql_index_strings)
 
 
@@ -546,9 +547,9 @@ def alter_table(context, data_dict):
     for field in new_fields:
         sql = 'ALTER TABLE "{0}" ADD "{1}" {2}'.format(
             data_dict['resource_id'],
-            field['id'].replace('%', '%%'),
+            field['id'],
             field['type'])
-        context['connection'].execute(sql)
+        context['connection'].execute(sql.replace('%', '%%'))
 
 
 def insert_data(context, data_dict):
@@ -914,9 +915,13 @@ def search_data(context, data_dict):
         rank=rank_column,
         resource=data_dict['resource_id'],
         ts_query=ts_query,
-        where=where_clause,
-        sort=sort, limit=limit, offset=offset)
-    results = context['connection'].execute(sql_string, [where_values])
+        where='{where}',
+        sort=sort,
+        limit=limit,
+        offset=offset)
+    sql_string = sql_string.replace('%', '%%')
+    results = context['connection'].execute(
+        sql_string.format(where=where_clause), [where_values])
 
     _insert_links(data_dict, limit, offset)
     return format_results(context, results, data_dict)
@@ -1119,7 +1124,12 @@ def search(context, data_dict):
                 'query': ['Search took too long']
             })
         raise ValidationError({
-            'query': ['Invalid query']
+            'query': ['Invalid query'],
+            'info': {
+                'statement': [e.statement],
+                'params': [e.params],
+                'orig': [str(e.orig)]
+            }
         })
     finally:
         context['connection'].close()

diff --git a/ckanext/datastore/tests/test_create.py b/ckanext/datastore/tests/test_create.py
@@ -79,7 +79,7 @@ def test_create_invalid_alias_name(self):
 
         data = {
             'resource_id': resource.id,
-            'aliases': u'fo%25bar',
+            'aliases': u'fo%25bar',  # alias with percent
             'fields': [{'id': 'book', 'type': 'text'},
                        {'id': 'author', 'type': 'text'}]
         }
@@ -294,7 +294,7 @@ def test_create_basic(self):
         data = {
             'resource_id': resource.id,
             'aliases': aliases,
-            'fields': [{'id': 'boo%k', 'type': 'text'},
+            'fields': [{'id': 'boo%k', 'type': 'text'},  # column with percent
                        {'id': 'author', 'type': 'json'}],
             'indexes': [['boo%k', 'author'], 'author'],
             'records': [{'boo%k': 'crime', 'author': ['tolstoy', 'dostoevsky']},
@@ -362,7 +362,7 @@ def test_create_basic(self):
         res = self.app.post('/api/action/resource_show', params=postparams,
                             extra_environ=auth)
         res_dict = json.loads(res.body)
-        assert res_dict['result']['datastore_active'] == True
+        assert res_dict['result']['datastore_active']
 
         #######  insert again simple
         data2 = {
@@ -514,10 +514,11 @@ def test_create_basic(self):
 
         assert res_dict['success'] is True, res_dict
 
-        #######  insert with paramter id rather than resource_id which is a shortcut
+        #######  insert with parameter id rather than resource_id which is a shortcut
         data8 = {
             'id': resource.id,
-            'records': [{'boo%k': 'warandpeace'}]
+             # insert with percent
+            'records': [{'boo%k': 'warandpeace', 'author': '99% good'}]
         }
 
         postparams = '%s=1' % json.dumps(data8)

diff --git a/ckanext/datastore/tests/test_search.py b/ckanext/datastore/tests/test_search.py
@@ -34,12 +34,14 @@ def setup_class(cls):
             'fields': [{'id': u'b\xfck', 'type': 'text'},
                        {'id': 'author', 'type': 'text'},
                        {'id': 'published'},
-                       {'id': u'characters', u'type': u'_text'}],
+                       {'id': u'characters', u'type': u'_text'},
+                       {'id': 'rating with %'}],
             'records': [{u'b\xfck': 'annakarenina', 'author': 'tolstoy',
                         'published': '2005-03-01', 'nested': ['b', {'moo': 'moo'}],
-                        u'characters': [u'Princess Anna', u'Sergius']},
+                        u'characters': [u'Princess Anna', u'Sergius'],
+                        'rating with %': '60%'},
                         {u'b\xfck': 'warandpeace', 'author': 'tolstoy',
-                        'nested': {'a': 'b'}}
+                        'nested': {'a': 'b'}, 'rating with %': '99%'}
                        ]
         }
         postparams = '%s=1' % json.dumps(cls.data)
@@ -54,13 +56,15 @@ def setup_class(cls):
                                  u'nested': [u'b', {u'moo': u'moo'}],
                                  u'b\xfck': u'annakarenina',
                                  u'author': u'tolstoy',
-                                 u'characters': [u'Princess Anna', u'Sergius']},
+                                 u'characters': [u'Princess Anna', u'Sergius'],
+                                 u'rating with %': u'60%'},
                                 {u'published': None,
                                  u'_id': 2,
                                  u'nested': {u'a': u'b'},
                                  u'b\xfck': u'warandpeace',
                                  u'author': u'tolstoy',
-                                 u'characters': None}]
+                                 u'characters': None,
+                                 u'rating with %': u'99%'}]
 
         engine = db._get_engine(
                 None,
@@ -342,9 +346,10 @@ def test_search_full_text(self):
         result = res_dict['result']
         assert result['total'] == 1
 
-        results = [extract(result['records'][0],
-            [u'_id', u'author', u'b\xfck', u'nested', u'published', u'characters'])]
-        assert results == [self.expected_records[0]], result['records']
+        results = [extract(result['records'][0], [
+            u'_id', u'author', u'b\xfck', u'nested',
+            u'published', u'characters', u'rating with %'])]
+        assert results == [self.expected_records[0]], results['records']
 
         data = {'resource_id': self.data['resource_id'],
                 'q': 'tolstoy'}
@@ -356,9 +361,10 @@ def test_search_full_text(self):
         result = res_dict['result']
         assert result['total'] == 2
         results = [extract(
-                record,
-                [u'_id', u'author', u'b\xfck', u'nested', u'published', u'characters']
-            ) for record in result['records']]
+            record,
+            [u'_id', u'author', u'b\xfck', u'nested',
+             u'published', u'characters', u'rating with %']
+        ) for record in result['records']]
         assert results == self.expected_records, result['records']
 
         expected_fields = [{u'type': u'int4', u'id': u'_id'},
@@ -382,10 +388,10 @@ def test_search_full_text(self):
         result = res_dict['result']
         assert result['total'] == 1
         results = [extract(
-                result['records'][0],
-                [u'_id', u'author', u'b\xfck', u'nested', u'published', u'characters']
-            )]
-        assert results == [self.expected_records[0]], result['records']
+            result['records'][0],
+            [u'_id', u'author', u'b\xfck', u'nested', u'published',
+             u'characters', u'rating with %'])]
+        assert results == [self.expected_records[0]], results['records']
 
         for field in expected_fields:
             assert field in result['fields'], field