Skip to content

Commit

Permalink
Merge branch 'master' into 1792-filterable-resource-views
Browse files Browse the repository at this point in the history
  • Loading branch information
vitorbaptista committed Aug 4, 2014
2 parents 6ef5aca + d9e6240 commit b673f48
Show file tree
Hide file tree
Showing 32 changed files with 1,068 additions and 632 deletions.
1 change: 1 addition & 0 deletions Dockerfile
Expand Up @@ -49,6 +49,7 @@ RUN mkdir /var/cache/nginx
ADD ./contrib/docker/main.cf /etc/postfix/main.cf

# Configure runit
ADD ./contrib/docker/my_init.d /etc/my_init.d
ADD ./contrib/docker/svc /etc/service
CMD ["/sbin/my_init"]

Expand Down
235 changes: 139 additions & 96 deletions ckan/lib/dictization/model_dictize.py
@@ -1,3 +1,14 @@
'''
These dictize functions generally take a domain object (such as Package) and
convert it to a dictionary, including related objects (e.g. for Package it
includes PackageTags, PackageExtras, PackageGroup etc).
The basic recipe is to call:
dictized = ckan.lib.dictization.table_dictize(domain_object)
which builds the dictionary by iterating over the table columns.
'''
import datetime
import urlparse

Expand All @@ -15,62 +26,37 @@
## package save

def group_list_dictize(obj_list, context,
sort_key=lambda x:x['display_name'], reverse=False,
with_package_counts=True):

active = context.get('active', True)
with_private = context.get('include_private_packages', False)

if with_package_counts:
query = search.PackageSearchQuery()
q = {'q': '+capacity:public' if not with_private else '*:*',
'fl': 'groups', 'facet.field': ['groups', 'owner_org'],
'facet.limit': -1, 'rows': 1}
query.run(q)

result_list = []

for obj in obj_list:
if context.get('with_capacity'):
obj, capacity = obj
group_dict = d.table_dictize(obj, context, capacity=capacity)
else:
group_dict = d.table_dictize(obj, context)
group_dict.pop('created')
if active and obj.state not in ('active', 'pending'):
continue

group_dict['display_name'] = (group_dict.get('title') or
group_dict.get('name'))

image_url = group_dict.get('image_url')
group_dict['image_display_url'] = image_url
if image_url and not image_url.startswith('http'):
#munge here should not have an effect only doing it incase
#of potential vulnerability of dodgy api input
image_url = munge.munge_filename(image_url)
group_dict['image_display_url'] = h.url_for_static(
'uploads/group/%s' % group_dict.get('image_url'),
qualified=True
)

if with_package_counts:
facets = query.facets
if obj.is_organization:
group_dict['packages'] = facets['owner_org'].get(obj.id, 0)
else:
group_dict['packages'] = facets['groups'].get(obj.name, 0)

if context.get('for_view'):
if group_dict['is_organization']:
plugin = plugins.IOrganizationController
else:
plugin = plugins.IGroupController
for item in plugins.PluginImplementations(plugin):
group_dict = item.before_view(group_dict)
sort_key=lambda x: x['display_name'], reverse=False,
with_package_counts=True,
include_groups=False,
include_tags=False,
include_extras=False):

group_dictize_context = dict(context.items()[:])
# Set options to avoid any SOLR queries for each group, which would
# slow things further.
group_dictize_options = {
'packages_field': 'dataset_count' if with_package_counts else None,
# don't allow packages_field='datasets' as it is too slow
'include_groups': include_groups,
'include_tags': include_tags,
'include_extras': include_extras,
'include_users': False, # too slow - don't allow
}
if with_package_counts and 'dataset_counts' not in group_dictize_context:
# 'dataset_counts' will already be in the context in the case that
# group_list_dictize recurses via group_dictize (groups in groups)
group_dictize_context['dataset_counts'] = get_group_dataset_counts()
if context.get('with_capacity'):
group_list = [group_dictize(group, group_dictize_context,
capacity=capacity, **group_dictize_options)
for group, capacity in obj_list]
else:
group_list = [group_dictize(group, group_dictize_context,
**group_dictize_options)
for group in obj_list]

result_list.append(group_dict)
return sorted(result_list, key=sort_key, reverse=reverse)
return sorted(group_list, key=sort_key, reverse=reverse)

def resource_list_dictize(res_list, context):

Expand Down Expand Up @@ -257,6 +243,7 @@ def package_dictize(pkg, context):
context['with_capacity'] = False
## no package counts as cannot fetch from search index at the same
## time as indexing to it.
## tags, extras and sub-groups are not included for speed
result_dict["groups"] = group_list_dictize(result, context,
with_package_counts=False)
#owning organization
Expand Down Expand Up @@ -323,54 +310,110 @@ def _get_members(context, group, member_type):
return q.all()


def group_dictize(group, context):
result_dict = d.table_dictize(group, context)

result_dict['display_name'] = group.display_name

result_dict['extras'] = extras_dict_dictize(
group._extras, context)

include_datasets = context.get('include_datasets', True)

q = {
'facet': 'false',
'rows': 0,
}

if group.is_organization:
q['fq'] = 'owner_org:"{0}"'.format(group.id)
else:
q['fq'] = 'groups:"{0}"'.format(group.name)

is_group_member = (context.get('user') and
new_authz.has_user_permission_for_group_or_org(group.id, context.get('user'), 'read'))
if is_group_member:
context['ignore_capacity_check'] = True

if include_datasets:
q['rows'] = 1000 # Only the first 1000 datasets are returned
def get_group_dataset_counts():
'''For all public groups, return their dataset counts, as a SOLR facet'''
query = search.PackageSearchQuery()
q = {'q': '+capacity:public',
'fl': 'groups', 'facet.field': ['groups', 'owner_org'],
'facet.limit': -1, 'rows': 1}
query.run(q)
return query.facets


def group_dictize(group, context,
include_groups=True,
include_tags=True,
include_users=True,
include_extras=True,
packages_field='datasets',
**kw):
'''
Turns a Group object and related into a dictionary. The related objects
like tags are included unless you specify it in the params.
:param packages_field: determines the format of the `packages` field - can
be `datasets`, `dataset_count`, `none_but_include_package_count` or None.
If set to `dataset_count` or `none_but_include_package_count` then you
can precalculate dataset counts in advance by supplying:
context['dataset_counts'] = get_group_dataset_counts()
'''
assert packages_field in ('datasets', 'dataset_count',
'none_but_include_package_count', None)
if packages_field in ('dataset_count', 'none_but_include_package_count'):
dataset_counts = context.get('dataset_counts', None)

context_ = dict((k, v) for (k, v) in context.items() if k != 'schema')
search_results = logic.get_action('package_search')(context_, q)
result_dict = d.table_dictize(group, context)
result_dict.update(kw)

if include_datasets:
result_dict['packages'] = search_results['results']
result_dict['display_name'] = group.title or group.name

result_dict['package_count'] = search_results['count']
if include_extras:
result_dict['extras'] = extras_dict_dictize(
group._extras, context)

context['with_capacity'] = True
result_dict['tags'] = tag_list_dictize(
_get_members(context, group, 'tags'),
context)

result_dict['groups'] = group_list_dictize(
_get_members(context, group, 'groups'),
context)
if packages_field:
def get_packages_for_this_group(group_):
# Ask SOLR for the list of packages for this org/group
q = {
'facet': 'false',
'rows': 0,
}

result_dict['users'] = user_list_dictize(
_get_members(context, group, 'users'),
context)
if group_.is_organization:
q['fq'] = 'owner_org:"{0}"'.format(group_.id)
else:
q['fq'] = 'groups:"{0}"'.format(group_.name)

is_group_member = (context.get('user') and
new_authz.has_user_permission_for_group_or_org(group_.id, context.get('user'), 'read'))
if is_group_member:
context['ignore_capacity_check'] = True

if not context.get('for_view'):
q['rows'] = 1000 # Only the first 1000 datasets are returned

search_context = dict((k, v) for (k, v) in context.items() if k != 'schema')
search_results = logic.get_action('package_search')(search_context, q)
return search_results['count'], search_results['results']
if packages_field == 'datasets':
package_count, packages = get_packages_for_this_group(group)
result_dict['packages'] = packages
else:
# i.e. packages_field is 'dataset_count' or
# 'none_but_include_package_count'
if dataset_counts is None:
package_count, packages = get_packages_for_this_group(group)
else:
# Use the pre-calculated package_counts passed in.
facets = dataset_counts
if group.is_organization:
package_count = facets['owner_org'].get(group.id, 0)
else:
package_count = facets['groups'].get(group.name, 0)
if packages_field != 'none_but_include_package_count':
result_dict['packages'] = package_count

result_dict['package_count'] = package_count

if include_tags:
# group tags are not creatable via the API yet, but that was(/is) a
# future intention (see kindly's commit 5c8df894 on 2011/12/23)
result_dict['tags'] = tag_list_dictize(
_get_members(context, group, 'tags'),
context)

if include_groups:
# these sub-groups won't have tags or extras for speed
result_dict['groups'] = group_list_dictize(
_get_members(context, group, 'groups'),
context, include_groups=True)

if include_users:
result_dict['users'] = user_list_dictize(
_get_members(context, group, 'users'),
context)

context['with_capacity'] = False

Expand Down
10 changes: 9 additions & 1 deletion ckan/lib/search/index.py
Expand Up @@ -3,6 +3,7 @@
import logging
import collections
import json
import datetime
from dateutil.parser import parse

import re
Expand Down Expand Up @@ -219,11 +220,18 @@ def index_package(self, pkg_dict, defer_commit=False):
# be needed? For my data not changing the keys seems to not cause a
# problem.
new_dict = {}
bogus_date = datetime.datetime(1, 1, 1)
for key, value in pkg_dict.items():
key = key.encode('ascii', 'ignore')
if key.endswith('_date'):
try:
value = parse(value).isoformat() + 'Z'
date = parse(value, default=bogus_date)
if date != bogus_date:
value = date.isoformat() + 'Z'
else:
# The date field was empty, so dateutil filled it with
# the default bogus date
value = None
except ValueError:
continue
new_dict[key] = value
Expand Down
2 changes: 1 addition & 1 deletion ckan/lib/search/query.py
Expand Up @@ -264,7 +264,7 @@ def get_all_entity_ids(self, max_results=1000):
def get_index(self,reference):
query = {
'rows': 1,
'q': 'name:%s OR id:%s' % (reference,reference),
'q': 'name:"%s" OR id:"%s"' % (reference,reference),
'wt': 'json',
'fq': 'site_id:"%s"' % config.get('ckan.site_id')}

Expand Down
4 changes: 2 additions & 2 deletions ckan/lib/uploader.py
Expand Up @@ -27,8 +27,8 @@ def get_storage_path():
if storage_path:
_storage_path = storage_path
elif ofs_impl == 'pairtree' and ofs_storage_dir:
log.warn('''Please use config option ckan.storage_path instaed of
ofs.storage_path''')
log.warn('''Please use config option ckan.storage_path instead of
ofs.storage_dir''')
_storage_path = ofs_storage_dir
return _storage_path
elif ofs_impl:
Expand Down
45 changes: 45 additions & 0 deletions ckan/lib/util.py
@@ -0,0 +1,45 @@
'''Shared utility functions for any Python code to use.
Unlike :py:mod:`ckan.lib.helpers`, the functions in this module are not
available to templates.
'''
import subprocess


# We implement our own check_output() function because
# subprocess.check_output() isn't in Python 2.6.
# This code is copy-pasted from Python 2.7 and adapted to make it work with
# Python 2.6.
# http://hg.python.org/cpython/file/d37f963394aa/Lib/subprocess.py#l544
def check_output(*popenargs, **kwargs):
r"""Run command with arguments and return its output as a byte string.
If the exit code was non-zero it raises a CalledProcessError. The
CalledProcessError object will have the return code in the returncode
attribute and output in the output attribute.
The arguments are the same as for the Popen constructor. Example:
>>> check_output(["ls", "-l", "/dev/null"])
'crw-rw-rw- 1 root root 1, 3 Oct 18 2007 /dev/null\n'
The stdout argument is not allowed as it is used internally.
To capture standard error in the result, use stderr=STDOUT.
>>> check_output(["/bin/sh", "-c",
... "ls -l non_existent_file ; exit 0"],
... stderr=STDOUT)
'ls: non_existent_file: No such file or directory\n'
"""
if 'stdout' in kwargs:
raise ValueError('stdout argument not allowed, it will be overridden.')
process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
output, unused_err = process.communicate()
retcode = process.poll()
if retcode:
cmd = kwargs.get("args")
if cmd is None:
cmd = popenargs[0]
raise subprocess.CalledProcessError(retcode, cmd)
return output

0 comments on commit b673f48

Please sign in to comment.