Skip to content

Commit

Permalink
Merge pull request #1967 from aliceh75/1966-full-text-search-on-integ…
Browse files Browse the repository at this point in the history
…er-fails

unicode_or_json_validator should assume literals are unicode rather than json
  • Loading branch information
wardi committed Nov 3, 2014
2 parents 0bbfcc1 + 4a6f1ab commit cf68e74
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 10 deletions.
6 changes: 4 additions & 2 deletions ckanext/datastore/db.py
Expand Up @@ -752,12 +752,14 @@ def _validate_record(record, num, field_names):

def _to_full_text(fields, record):
full_text = []
ft_types = ['int8', 'int4', 'int2', 'float4', 'float8', 'date', 'time',
'timetz', 'timestamp', 'numeric', 'text']
for field in fields:
value = record.get(field['id'])
if field['type'].lower() == 'nested' and value:
full_text.extend(json_get_values(value))
elif field['type'].lower() == 'text' and value:
full_text.append(value)
elif field['type'].lower() in ft_types and str(value):
full_text.append(str(value))
return ' '.join(full_text)


Expand Down
21 changes: 19 additions & 2 deletions ckanext/datastore/logic/schema.py
Expand Up @@ -56,7 +56,13 @@ def list_of_strings_or_string(key, data, errors, context):


def json_validator(value, context):
if isinstance(value, dict) or isinstance(value, list):
'''Validate and parse a JSON value.
dicts and lists will be returned untouched, while other values
will be run through a JSON parser before being returned. If the
parsing fails, raise an Invalid exception.
'''
if isinstance(value, (list, dict)):
return value
try:
value = json.loads(value)
Expand All @@ -66,10 +72,21 @@ def json_validator(value, context):


def unicode_or_json_validator(value, context):
'''Return a parsed JSON object when applicable, a unicode string when not.
dicts and None will be returned untouched; otherwise return a JSON object
if the value can be parsed as such. Return unicode(value) in all other
cases.
'''
try:
if value is None:
return value
return json_validator(value, context)
v = json_validator(value, context)
# json.loads will parse literals; however we want literals as unicode.
if not isinstance(v, dict):
return unicode(value)
else:
return v
except df.Invalid:
return unicode(value)

Expand Down
93 changes: 87 additions & 6 deletions ckanext/datastore/tests/test_search.py
Expand Up @@ -621,12 +621,12 @@ def setup_class(cls):
{'id': 'lon'}
],
records=[
{'id': 0, 'date': '2011-01-01', 'x': 1, 'y': 2, 'z': 3, 'country': 'DE', 'title': 'first', 'lat':52.56, 'lon':13.40},
{'id': 0, 'date': '2011-01-01', 'x': 1, 'y': 2, 'z': 3, 'country': 'DE', 'title': 'first 99', 'lat':52.56, 'lon':13.40},
{'id': 1, 'date': '2011-02-02', 'x': 2, 'y': 4, 'z': 24, 'country': 'UK', 'title': 'second', 'lat':54.97, 'lon':-1.60},
{'id': 2, 'date': '2011-03-03', 'x': 3, 'y': 6, 'z': 9, 'country': 'US', 'title': 'third', 'lat':40.00, 'lon':-75.5},
{'id': 3, 'date': '2011-04-04', 'x': 4, 'y': 8, 'z': 6, 'country': 'UK', 'title': 'fourth', 'lat':57.27, 'lon':-6.20},
{'id': 4, 'date': '2011-05-04', 'x': 5, 'y': 10, 'z': 15, 'country': 'UK', 'title': 'fifth', 'lat':51.58, 'lon':0},
{'id': 5, 'date': '2011-06-02', 'x': 6, 'y': 12, 'z': 18, 'country': 'DE', 'title': 'sixth', 'lat':51.04, 'lon':7.9}
{'id': 5, 'date': '2011-06-02', 'x': 6, 'y': 12, 'z': 18, 'country': 'DE', 'title': 'sixth 53.56', 'lat':51.04, 'lon':7.9}
]
)
postparams = '%s=1' % json.dumps(cls.data)
Expand Down Expand Up @@ -662,6 +662,66 @@ def test_advanced_search_full_text(self):
res_dict = json.loads(res.body)
assert res_dict['result']['total'] == 5, pprint.pformat(res_dict)

def test_full_text_search_on_integers_within_text_strings(self):
data = {'resource_id': self.data['resource_id'],
'q': '99'}
postparams = '%s=1' % json.dumps(data)
auth = {'Authorization': str(self.normal_user.apikey)}
res = self.app.post('/api/action/datastore_search', params=postparams,
extra_environ=auth)
res_dict = json.loads(res.body)
assert res_dict['result']['total'] == 1, pprint.pformat(res_dict)

def test_full_text_search_on_integers(self):
data = {'resource_id': self.data['resource_id'],
'q': '4'}
postparams = '%s=1' % json.dumps(data)
auth = {'Authorization': str(self.normal_user.apikey)}
res = self.app.post('/api/action/datastore_search', params=postparams,
extra_environ=auth)
res_dict = json.loads(res.body)
assert res_dict['result']['total'] == 3, pprint.pformat(res_dict)

def test_full_text_search_on_decimal_within_text_strings(self):
data = {'resource_id': self.data['resource_id'],
'q': '53.56'}
postparams = '%s=1' % json.dumps(data)
auth = {'Authorization': str(self.normal_user.apikey)}
res = self.app.post('/api/action/datastore_search', params=postparams,
extra_environ=auth)
res_dict = json.loads(res.body)
assert res_dict['result']['total'] == 1, pprint.pformat(res_dict)

def test_full_text_search_on_decimal(self):
data = {'resource_id': self.data['resource_id'],
'q': '52.56'}
postparams = '%s=1' % json.dumps(data)
auth = {'Authorization': str(self.normal_user.apikey)}
res = self.app.post('/api/action/datastore_search', params=postparams,
extra_environ=auth)
res_dict = json.loads(res.body)
assert res_dict['result']['total'] == 1, pprint.pformat(res_dict)

def test_full_text_search_on_date(self):
data = {'resource_id': self.data['resource_id'],
'q': '2011-01-01'}
postparams = '%s=1' % json.dumps(data)
auth = {'Authorization': str(self.normal_user.apikey)}
res = self.app.post('/api/action/datastore_search', params=postparams,
extra_environ=auth)
res_dict = json.loads(res.body)
assert res_dict['result']['total'] == 1, pprint.pformat(res_dict)

def test_full_text_search_on_json_like_string_succeeds(self):
data = {'resource_id': self.data['resource_id'],
'q': '"{}"'}
postparams = '%s=1' % json.dumps(data)
auth = {'Authorization': str(self.normal_user.apikey)}
res = self.app.post('/api/action/datastore_search', params=postparams,
extra_environ=auth)
res_dict = json.loads(res.body)
assert res_dict['success'], pprint.pformat(res_dict)


class TestDatastoreSQL(tests.WsgiAppCase):
sysadmin_user = None
Expand Down Expand Up @@ -709,13 +769,16 @@ def setup_class(cls):
name='test_org',
apikey=cls.sysadmin_user.apikey)

cls.expected_records = [{u'_full_text': u"'annakarenina':1 'b':3 'moo':4 'tolstoy':2",
cls.expected_records = [{u'_full_text': [u"'annakarenina'", u"'b'",
u"'moo'", u"'tolstoy'",
u"'2005'"],
u'_id': 1,
u'author': u'tolstoy',
u'b\xfck': u'annakarenina',
u'nested': [u'b', {u'moo': u'moo'}],
u'published': u'2005-03-01T00:00:00'},
{u'_full_text': u"'b':3 'tolstoy':2 'warandpeac':1",
{u'_full_text': [u"'tolstoy'", u"'warandpeac'",
u"'b'"],
u'_id': 2,
u'author': u'tolstoy',
u'b\xfck': u'warandpeace',
Expand Down Expand Up @@ -761,7 +824,16 @@ def test_select_basic(self):
res_dict = json.loads(res.body)
assert res_dict['success'] is True
result = res_dict['result']
assert result['records'] == self.expected_records
assert len(result['records']) == len(self.expected_records)
for (row_index, row) in enumerate(result['records']):
expected_row = self.expected_records[row_index]
assert set(row.keys()) == set(expected_row.keys())
for field in row:
if field == '_full_text':
for ft_value in expected_row['_full_text']:
assert ft_value in row['_full_text']
else:
assert row[field] == expected_row[field]

# test alias search
query = 'SELECT * FROM "{0}"'.format(self.data['aliases'])
Expand All @@ -783,7 +855,16 @@ def test_select_where_like_with_percent(self):
res_dict = json.loads(res.body)
assert res_dict['success'] is True
result = res_dict['result']
assert result['records'] == self.expected_records
assert len(result['records']) == len(self.expected_records)
for (row_index, row) in enumerate(result['records']):
expected_row = self.expected_records[row_index]
assert set(row.keys()) == set(expected_row.keys())
for field in row:
if field == '_full_text':
for ft_value in expected_row['_full_text']:
assert ft_value in row['_full_text']
else:
assert row[field] == expected_row[field]

def test_self_join(self):
query = '''
Expand Down

0 comments on commit cf68e74

Please sign in to comment.