Skip to content

Commit

Permalink
[#1815] Add "distinct" param to datastore_search
Browse files Browse the repository at this point in the history
This allows the users to get only the distinct values of the query. The "total"
field on the returned data is related to all fields, not only the distincts.
For example, in our test we have the data:

```json
[
    {"author": "tolstoy", "title": "Anna Karenina"},
    {"author": "tolstoy", "title": "War and Peace"}
]
```

We want to get a list with all authors on the datastore resource. Then we query
the "datastore_search" with:

```json
{
    "resource_id": "the_resource_id",
    "fields": ["author"],
    "distinct": True
}
```

The result is;

```json
{
    "success": True,
    "total": 2,
    "records": [{"author": "tolstoy"}]
}
```

Note that even though there's only one returned record, the "total" field is 2. This is because "total" doesn't take into account the distinct values, but all rows that match our query.
  • Loading branch information
vitorbaptista committed Jul 3, 2014
1 parent 41f67a0 commit 6071824
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 1 deletion.
8 changes: 7 additions & 1 deletion ckanext/datastore/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -868,14 +868,20 @@ def search_data(context, data_dict):
limit = query_dict['limit']
offset = query_dict['offset']

if query_dict.get('distinct'):
distinct = 'DISTINCT'
else:
distinct = ''

if sort:
sort_clause = 'ORDER BY %s' % ', '.join(sort)
else:
sort_clause = ''

sql_string = u'''SELECT {select}
sql_string = u'''SELECT {distinct} {select}
FROM "{resource}" {ts_query}
{where} {sort} LIMIT {limit} OFFSET {offset}'''.format(
distinct=distinct,
select=select_columns,
resource=data_dict['resource_id'],
ts_query=ts_query,
Expand Down
2 changes: 2 additions & 0 deletions ckanext/datastore/logic/action.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,8 @@ def datastore_search(context, data_dict):
:type filters: dictionary
:param q: full text query (optional)
:type q: string
:param distinct: return only distinct rows (optional, default: false)
:type distinct: bool
:param plain: treat as plain text query (optional, default: true)
:type plain: bool
:param language: language of the full text query (optional, default: english)
Expand Down
1 change: 1 addition & 0 deletions ckanext/datastore/logic/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ def datastore_search_schema():
'offset': [ignore_missing, int_validator],
'fields': [ignore_missing, list_of_strings_or_string],
'sort': [ignore_missing, list_of_strings_or_string],
'distinct': [ignore_missing, boolean_validator],
'__junk': [empty],
'__before': [rename('id', 'resource_id')]
}
Expand Down
6 changes: 6 additions & 0 deletions ckanext/datastore/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,11 @@ def datastore_validate(self, context, data_dict, column_names):
if isinstance(plain, bool):
del data_dict['plain']

distinct = data_dict.get('distinct')
if distinct:
if isinstance(distinct, bool):
del data_dict['distinct']

sort_clauses = data_dict.get('sort')
if sort_clauses:
invalid_clauses = [c for c in sort_clauses
Expand Down Expand Up @@ -361,6 +366,7 @@ def datastore_search(self, context, data_dict, column_names, query_dict):
select_cols = [u'"{0}"'.format(field_id) for field_id in field_ids] +\
[u'count(*) over() as "_full_count" %s' % rank_column]

query_dict['distinct'] = data_dict.get('distinct', False)
query_dict['select'] += select_cols
query_dict['ts_query'] = ts_query
query_dict['sort'] += sort
Expand Down
14 changes: 14 additions & 0 deletions ckanext/datastore/tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,20 @@ def test_search_fields(self):
assert result['records'] == [{u'b\xfck': 'annakarenina', 'author': 'tolstoy'},
{u'b\xfck': 'warandpeace', 'author': 'tolstoy'}], result['records']

def test_search_distinct(self):
data = {'resource_id': self.data['resource_id'],
'fields': [u'author'],
'distinct': True}
postparams = '%s=1' % json.dumps(data)
auth = {'Authorization': str(self.normal_user.apikey)}
res = self.app.post('/api/action/datastore_search', params=postparams,
extra_environ=auth)
res_dict = json.loads(res.body)
assert res_dict['success'] is True
result = res_dict['result']
assert result['total'] == 2
assert result['records'] == [{u'author': 'tolstoy'}], result['records']

def test_search_filters(self):
data = {'resource_id': self.data['resource_id'],
'filters': {u'b\xfck': 'annakarenina'}}
Expand Down

0 comments on commit 6071824

Please sign in to comment.