Skip to content

Commit

Permalink
[#2204] Add filters for more common view types
Browse files Browse the repository at this point in the history
If the view types that need to be checked are some of the core ones we
can filter the dataset search to only return the relevant ones, which
will reduce the number of datasets to be checked.

Refactored the text view plugin to allow importing the formats used.
  • Loading branch information
amercader committed Feb 17, 2015
1 parent 9ee6590 commit 7d2836f
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 39 deletions.
124 changes: 95 additions & 29 deletions ckan/lib/cli.py
Expand Up @@ -6,6 +6,7 @@
import sys
from pprint import pprint
import re
import itertools
import logging
import ckan.logic as logic
import ckan.model as model
Expand Down Expand Up @@ -2252,48 +2253,105 @@ def _get_view_plugins(self, view_plugin_types,

return loaded_view_plugins

def create_views_search(self, view_plugin_types=[]):
def _add_default_filters(self, search_data_dict, view_types):
'''
Adds extra filters to the `package_search` dict for common view types
from ckan.lib.datapreview import add_views_to_dataset_resources
It basically adds `fq` parameters that filter relevant resource formats
for the view types provided. For instance, if one ov the view types is
`pdf_view` the following will be added to the final query:
log = logging.getLogger(__name__)
fq=res_format:"pdf" OR res_format:"PDF"
datastore_enabled = 'datastore' in config['ckan.plugins'].split()
This obviously should only be used if all view types are known and can
be filtered, otherwise we want all datasets to be returned. If a
non-filterable view type is provided, the search params are not
modified.
loaded_view_plugins = self._get_view_plugins(view_plugin_types,
datastore_enabled)
Returns the provided data_dict for `package_search`, optionally
modified with extra filters.
'''

def _search_datasets(page=1):
from ckanext.imageview.plugin import DEFAULT_IMAGE_FORMATS
from ckanext.textview.plugin import get_formats as get_text_formats
from ckanext.datapusher.plugin import DEFAULT_FORMATS as \
datapusher_formats

n = 100
filter_formats = []

search_data_dict = {
'q': '*:*',
'fq': 'dataset_type:dataset',
'rows': n,
'start': n * (page - 1),
}
for view_type in view_types:
if view_type == 'image_view':

query = p.toolkit.get_action('package_search')({},
search_data_dict)
for _format in DEFAULT_IMAGE_FORMATS:
filter_formats.extend([_format, _format.upper()])

return query
elif view_type == 'text_view':
formats = get_text_formats(config)
for _format in itertools.chain.from_iterable(formats.values()):
filter_formats.extend([_format, _format.upper()])

context = {'user': self.site_user['name']}
elif view_type == 'pdf_view':
filter_formats.extend(['pdf', 'PDF'])

elif view_type in ['recline_view', 'recline_grid_view',
'recline_graph_view', 'recline_map_view']:

if datapusher_formats[0] in filter_formats:
continue

for _format in datapusher_formats:
if '/' not in _format:
filter_formats.extend([_format, _format.upper()])
else:
# There is another view type provided so we can't add any
# filter
return search_data_dict

filter_formats_query = ['+res_format:"{0}"'.format(_format)
for _format in filter_formats]
search_data_dict['fq_list'].append(' OR '.join(filter_formats_query))

return search_data_dict

def _search_datasets(self, page=1, view_types=[]):
'''
Perform a query with `package_search` and return the result
def _add_views_to_dataset(dataset_dict, view_types):
Results can be paginated using the `page` parameter
'''

n = 100

search_data_dict = {
'q': '*:*',
'fq': 'dataset_type:dataset',
'fq_list': [],
'rows': n,
'start': n * (page - 1),
}

self._add_default_filters(search_data_dict, view_types)

query = p.toolkit.get_action('package_search')({},
search_data_dict)
return query

if not dataset.get('resources'):
return []
def create_views_search(self, view_plugin_types=[]):

views = add_views_to_dataset_resources(context,
dataset_dict,
view_types=view_types)
return views
from ckan.lib.datapreview import add_views_to_dataset_resources

log = logging.getLogger(__name__)

datastore_enabled = 'datastore' in config['ckan.plugins'].split()

loaded_view_plugins = self._get_view_plugins(view_plugin_types,
datastore_enabled)

context = {'user': self.site_user['name']}

page = 1
while True:
query = _search_datasets(page)
query = self._search_datasets(page, loaded_view_plugins)

if page == 1 and query['count'] == 0:
log.info('No datasets to create resource views on, exiting...')
Expand All @@ -2313,16 +2371,24 @@ def _add_views_to_dataset(dataset_dict, view_types):
sys.exit(1)

if query['results']:
for dataset in query['results']:
views = _add_views_to_dataset(dataset, loaded_view_plugins)
for dataset_dict in query['results']:

if not dataset_dict.get('resources'):
continue

views = add_views_to_dataset_resources(
context,
dataset_dict,
view_types=loaded_view_plugins)

if views:
view_types = list(set([view['view_type']
for view in views]))
msg = ('Added {0} view(s) of type(s) {1} to ' +
'resources from dataset {2}')
log.debug(msg.format(len(views),
', '.join(view_types),
dataset['name']))
dataset_dict['name']))
page += 1
else:
break
Expand Down
32 changes: 22 additions & 10 deletions ckanext/textview/plugin.py
Expand Up @@ -13,6 +13,25 @@
DEFAULT_JSONP_FORMATS = ['jsonp']


def get_formats(config):

out = {}

text_formats = config.get('ckan.preview.text_formats', '').split()
out['text_formats'] = text_formats or DEFAULT_TEXT_FORMATS

xml_formats = config.get('ckan.preview.xml_formats', '').split()
out['xml_formats'] = xml_formats or DEFAULT_XML_FORMATS

json_formats = config.get('ckan.preview.json_formats', '').split()
out['json_formats'] = json_formats or DEFAULT_JSON_FORMATS

jsonp_formats = config.get('ckan.preview.jsonp_formats', '').split()
out['jsonp_formats'] = jsonp_formats or DEFAULT_JSONP_FORMATS

return out


class TextView(p.SingletonPlugin):
'''This extension previews JSON(P).'''

Expand All @@ -28,17 +47,10 @@ class TextView(p.SingletonPlugin):
no_jsonp_formats = []

def update_config(self, config):
text_formats = config.get('ckan.preview.text_formats', '').split()
self.text_formats = text_formats or DEFAULT_TEXT_FORMATS

xml_formats = config.get('ckan.preview.xml_formats', '').split()
self.xml_formats = xml_formats or DEFAULT_XML_FORMATS

json_formats = config.get('ckan.preview.json_formats', '').split()
self.json_formats = json_formats or DEFAULT_JSON_FORMATS

jsonp_formats = config.get('ckan.preview.jsonp_formats', '').split()
self.jsonp_formats = jsonp_formats or DEFAULT_JSONP_FORMATS
formats = get_formats(config)
for key, value in formats.iteritems():
setattr(self, key, value)

self.no_jsonp_formats = (self.text_formats +
self.xml_formats +
Expand Down

0 comments on commit 7d2836f

Please sign in to comment.