Skip to content

Commit

Permalink
[#4462] datastore_search: exclude rank columns if not in fields
Browse files Browse the repository at this point in the history
  • Loading branch information
wardi committed Sep 23, 2018
1 parent 04f9c7d commit 73a4aa5
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 50 deletions.
102 changes: 56 additions & 46 deletions ckanext/datastore/backend/postgres.py
Expand Up @@ -177,6 +177,7 @@ def _result_fields(fields_types, field_info, fields, q):
passed and query passed.
:param fields_types: OrderedDict returned from _get_fields_types(..)
with rank column types added by plugins' datastore_search
:param field_info: dict returned from _get_field_info(..)
:param fields: list of field names passed to datastore_search
or None for all
Expand All @@ -190,7 +191,7 @@ def _result_fields(fields_types, field_info, fields, q):
for field_id in fields:
f = {u'id': field_id, u'type': fields_types[field_id]}
if field_id in field_info:
f['info'] = field_info[f['id']]
f[u'info'] = field_info[f['id']]
result_fields.append(f)
return result_fields

Expand Down Expand Up @@ -391,41 +392,49 @@ def _where_clauses(data_dict, fields_types):
clause_str = u'_full_text @@ {0}'.format(query_field)
clauses.append((clause_str,))

clause_str = (u'to_tsvector({0}, cast("{1}" as text)) '
u'@@ {2}').format(
literal_string(lang),
field, query_field)
clause_str = (
u'to_tsvector({0}, cast({1} as text)) @@ {2}').format(
literal_string(lang),
identifier(field),
query_field)
clauses.append((clause_str,))

return clauses


def _textsearch_query(lang, q, plain):
u'''
:param lang: language for to_tsvector
:param q: string to search _full_text or dict to search columns
:param plain: True to use plainto_tsquery, False for to_tsquery
return (query, rank_columns) based on passed text/dict query
rank_columns is a {alias: statement} dict where alias is "rank" for
_full_text queries, and "rank <column-name>" for column search
'''
if not q:
return '', ''
return '', {}

statements = []
rank_columns = []
rank_columns = {}
if isinstance(q, string_types):
query, rank = _build_query_and_rank_statements(
lang, q, plain)
statements.append(query)
rank_columns.append(rank)
rank_columns[u'rank'] = rank
elif isinstance(q, dict):
for field, value in q.iteritems():
query, rank = _build_query_and_rank_statements(
lang, value, plain, field)
statements.append(query)
rank_columns.append(rank)
rank_columns[u'rank ' + field] = rank

statements_str = ', ' + ', '.join(statements)
rank_columns_str = ', '.join(rank_columns)
return statements_str, rank_columns_str
return statements_str, rank_columns


def _build_query_and_rank_statements(lang, query, plain, field=None):
query_alias = _ts_query_alias(field)
rank_alias = _ts_rank_alias(field)
lang_literal = literal_string(lang)
query_literal = literal_string(query)
if plain:
Expand All @@ -436,14 +445,12 @@ def _build_query_and_rank_statements(lang, query, plain, field=None):
lang_literal=lang_literal,
literal=query_literal, alias=query_alias)
if field is None:
rank_field = '_full_text'
rank_field = u'_full_text'
else:
rank_field = u'to_tsvector({lang_literal}, cast("{field}" as text))'
rank_field = rank_field.format(lang_literal=lang_literal, field=field)
rank_statement = u'ts_rank({rank_field}, {query_alias}, 32) AS {alias}'
rank_statement = rank_statement.format(rank_field=rank_field,
query_alias=query_alias,
alias=rank_alias)
rank_field = u'to_tsvector({0}, cast({1} as text))'.format(
lang_literal, identifier(field))
rank_statement = u'ts_rank({0}, {1}, 32)'.format(
rank_field, query_alias)
return statement, rank_statement


Expand All @@ -454,25 +461,18 @@ def _fts_lang(lang=None):
return lang or default_fts_lang


def _ts_rank_alias(field=None):
rank_alias = u'rank'
if field:
rank_alias += u' ' + field
return u'"{0}"'.format(rank_alias)

def _sort(sort, fields_types, rank_columns):
u'''
:param sort: string or list sort parameter passed to datastore_search,
use None if not given
:param fields_types: OrderedDict returned from _get_fields_types(..)
:param rank_columns: rank_columns returned from _ts_query(..)
def _sort(data_dict, fields_types):
sort = data_dict.get('sort')
returns sort expression as a string. When sort is None use rank_columns
to order by best text search match
'''
if not sort:
q = data_dict.get('q')
if q:
if isinstance(q, string_types):
return [_ts_rank_alias()]
elif isinstance(q, dict):
return [_ts_rank_alias(field) for field in q
if field not in fields_types]
else:
return []
return rank_columns.values()

clauses = datastore_helpers.get_list(sort, False)

Expand All @@ -490,7 +490,7 @@ def _ts_query_alias(field=None):
query_alias = u'query'
if field:
query_alias += u' ' + field
return u'"{0}"'.format(query_alias)
return identifier(query_alias)


def _get_aliases(context, data_dict):
Expand Down Expand Up @@ -1691,19 +1691,26 @@ def datastore_search(self, context, data_dict, fields_types, query_dict):

fields = data_dict.get('fields')

ts_query, rank_columns = _textsearch_query(
_fts_lang(data_dict.get('lang')),
data_dict.get('q'),
data_dict.get('plain', True))
# mutate parameter to add rank columns for _result_fields
for rank_alias in rank_columns:
fields_types[rank_alias] = u'float'

if fields:
field_ids = datastore_helpers.get_list(fields)
else:
field_ids = fields_types.keys()

ts_query, rank_column = _textsearch_query(
_fts_lang(data_dict.get('lang')),
data_dict.get('q'),
data_dict.get('plain', True))
limit = data_dict.get('limit', 100)
offset = data_dict.get('offset', 0)

sort = _sort(data_dict, fields_types)
sort = _sort(
data_dict.get('sort'),
fields_types,
rank_columns)
where = _where_clauses(data_dict, fields_types)

select_cols = []
Expand All @@ -1719,12 +1726,15 @@ def datastore_search(self, context, data_dict, fields_types, query_dict):
fmt = u"to_json({0})".format(fmt)
elif typ.startswith(u'_') or typ.endswith(u'[]'):
fmt = u'array_to_json({0})'

if field_id in rank_columns:
select_cols.append((fmt + ' as {1}').format(
rank_columns[field_id], identifier(field_id)))
continue

if records_format == u'objects':
fmt += u' as {0}'
select_cols.append(fmt.format(
identifier(field_id)))
if rank_column:
select_cols.append(rank_column)
select_cols.append(fmt.format(identifier(field_id)))

query_dict['distinct'] = data_dict.get('distinct', False)
query_dict['select'] += select_cols
Expand Down
9 changes: 6 additions & 3 deletions ckanext/datastore/plugin.py
Expand Up @@ -168,9 +168,6 @@ def after_delete(self, context, resources):

def datastore_validate(self, context, data_dict, fields_types):
column_names = fields_types.keys()
fields = data_dict.get('fields')
if fields:
data_dict['fields'] = list(set(fields) - set(column_names))

filters = data_dict.get('filters', {})
for key in filters.keys():
Expand All @@ -181,12 +178,18 @@ def datastore_validate(self, context, data_dict, fields_types):
if q:
if isinstance(q, string_types):
del data_dict['q']
column_names.append(u'rank')
elif isinstance(q, dict):
for key in q.keys():
if key in fields_types and isinstance(q[key],
string_types):
column_names.append(u'rank ' + key)
del q[key]

fields = data_dict.get('fields')
if fields:
data_dict['fields'] = list(set(fields) - set(column_names))

language = data_dict.get('language')
if language:
if isinstance(language, string_types):
Expand Down
17 changes: 16 additions & 1 deletion ckanext/datastore/tests/test_search.py
Expand Up @@ -40,7 +40,7 @@ def test_fts_on_field_calculates_ranks_only_on_that_specific_field(self):
result = helpers.call_action('datastore_create', **data)
search_data = {
'resource_id': resource['id'],
'fields': 'from',
'fields': 'from, rank from',
'q': {
'from': 'Brazil'
},
Expand Down Expand Up @@ -1217,3 +1217,18 @@ def test_fields_results_csv(self):
u'2020-01-02T00:00:00,9,aaab\n'
u'2020-01-01T00:00:00,9,aaac\n'
)
r = helpers.call_action(
'datastore_search',
resource_id=r['resource_id'],
records_format=u'csv',
fields=u'dt, num, txt',
q=u'aaac',
)
assert_equals(r['fields'], [
{u'id': u'dt', u'type': u'timestamp'},
{u'id': u'num', u'type': u'numeric'},
{u'id': u'txt', u'type': u'text'}])
assert_equals(
r['records'],
u'2020-01-01T00:00:00,9,aaac\n'
)

0 comments on commit 73a4aa5

Please sign in to comment.