Skip to content

Commit

Permalink
[#1768] Add tests. Take advantage of dictization params where conveni…
Browse files Browse the repository at this point in the history
…ent. Refactor packages_field option.
  • Loading branch information
David Read committed Jul 10, 2014
1 parent cdfefcf commit 123f715
Show file tree
Hide file tree
Showing 4 changed files with 372 additions and 96 deletions.
163 changes: 94 additions & 69 deletions ckan/lib/dictization/model_dictize.py
@@ -1,3 +1,14 @@
'''
These dictize functions generally take a domain object (such as Package) and
convert it to a dictionary, including related objects (e.g. for Package it
includes PackageTags, PackageExtras, PackageGroup etc).
The basic recipe is to call:
dictized = ckan.lib.dictization.table_dictize(domain_object)
which builds the dictionary by iterating over the table columns.
'''
import datetime
import urlparse

Expand All @@ -17,26 +28,38 @@
def group_list_dictize(obj_list, context,
sort_key=lambda x: x['display_name'], reverse=False,
with_package_counts=True,
include_groups=False):
include_groups=False,
include_tags=False,
include_extras=False):

group_dictize_context = dict(context.items()[:])
# Set options to avoid any SOLR queries for each group, which would
# slow things further.
group_dictize_context.update((
('include_dataset_count', with_package_counts),
('dataset_count_instead_of_dicts', True),
('include_groups', include_groups),
('include_datasets', False), # too slow - don't allow
('include_users', False), # too slow - don't allow
))
group_dictize_options = {
'packages_field': 'dataset_count' if with_package_counts else None,
# don't allow packages_field='datasets' as it is too slow
'include_groups': include_groups,
'include_tags': include_tags,
'include_extras': include_extras,
'include_users': False, # too slow - don't allow
}
#('include_dataset_count', with_package_counts),
#('dataset_count_instead_of_dicts', True),
#('include_groups', include_groups),
#('include_datasets', False), # too slow - don't allow
#('include_users', False), # too slow - don't allow
#))
if with_package_counts and 'dataset_counts' not in group_dictize_context:
group_dictize_context['dataset_counts'] = _get_group_package_counts()
# 'dataset_counts' will already be in the context in the case that
# group_list_dictize recurses via group_dictize (groups in groups)
group_dictize_context['dataset_counts'] = get_group_dataset_counts()
if context.get('with_capacity'):
group_list = [group_dictize(group, group_dictize_context,
capacity=capacity)
capacity=capacity, **group_dictize_options)
for group, capacity in obj_list]
else:
group_list = [group_dictize(group, group_dictize_context)
group_list = [group_dictize(group, group_dictize_context,
**group_dictize_options)
for group in obj_list]

return sorted(group_list, key=sort_key, reverse=reverse)
Expand Down Expand Up @@ -226,10 +249,8 @@ def package_dictize(pkg, context):
context['with_capacity'] = False
## no package counts as cannot fetch from search index at the same
## time as indexing to it.
group_list_dictize_context = dict(context.items()[:])
group_list_dictize_context['include_extras'] = False # for speed
result_dict["groups"] = group_list_dictize(result,
group_list_dictize_context,
## tags, extras and sub-groups are not included for speed
result_dict["groups"] = group_list_dictize(result, context,
with_package_counts=False)
#owning organization
group_rev = model.group_revision_table
Expand Down Expand Up @@ -295,27 +316,41 @@ def _get_members(context, group, member_type):
return q.all()


def _get_group_package_counts():
'''For all public groups, return their package counts, as a SOLR facet'''
def get_group_dataset_counts():
'''For all public groups, return their dataset counts, as a SOLR facet'''
query = search.PackageSearchQuery()
q = {'q': '+capacity:public',
'fl': 'groups', 'facet.field': ['groups', 'owner_org'],
'facet.limit': -1, 'rows': 1}
'fl': 'groups', 'facet.field': ['groups', 'owner_org'],
'facet.limit': -1, 'rows': 1}
query.run(q)
return query.facets


def group_dictize(group, context, **kw):
def group_dictize(group, context,
include_groups=True,
include_tags=True,
include_users=True,
include_extras=True,
packages_field='datasets',
**kw):
'''
Turns a Group object and related into a dictionary. The related objects
like tags are included unless you specify it in the params.
:param packages_field: determines the format of the `packages` field - can
be `datasets`, `dataset_count`, `none_but_include_package_count` or None.
If set to `dataset_count` or `none_but_include_package_count` then you
can precalculate dataset counts in advance by supplying:
context['dataset_counts'] = get_group_dataset_counts()
'''
assert packages_field in ('datasets', 'dataset_count',
'none_but_include_package_count', None)
if packages_field in ('dataset_count', 'none_but_include_package_count'):
dataset_counts = context.get('dataset_counts', None)

result_dict = d.table_dictize(group, context)
result_dict.update(kw)

include_datasets = context.get('include_datasets', True)
include_dataset_count = context.get('include_dataset_count', True)
include_groups = context.get('include_groups', True)
include_tags = context.get('include_tags', True)
include_users = context.get('include_users', True)
include_extras = context.get('include_extras', True)

result_dict['display_name'] = group.title or group.name

if include_extras:
Expand All @@ -324,77 +359,67 @@ def group_dictize(group, context, **kw):

context['with_capacity'] = True

if include_datasets or include_dataset_count:
# group_list traditionally returned count instead of the dicts. It is
# deprecated, but this behaviour is maintained for now by setting:
# dataset_count_instead_of_dicts = True
# dataset_counts = _get_group_package_counts()
dataset_count_instead_of_dicts = context.get('dataset_count_instead_of_dicts', False)
dataset_counts = context.get('dataset_counts')

if dataset_count_instead_of_dicts is False or dataset_counts is None:
# Ask SOLR for the packages for this org/group
if packages_field:
def get_packages_for_this_group(group_):
# Ask SOLR for the list of packages for this org/group
q = {
'facet': 'false',
'rows': 0,
}

if group.is_organization:
q['fq'] = 'owner_org:"{0}"'.format(group.id)
if group_.is_organization:
q['fq'] = 'owner_org:"{0}"'.format(group_.id)
else:
q['fq'] = 'groups:"{0}"'.format(group.name)
q['fq'] = 'groups:"{0}"'.format(group_.name)

is_group_member = (context.get('user') and
new_authz.has_user_permission_for_group_or_org(group.id, context.get('user'), 'read'))
new_authz.has_user_permission_for_group_or_org(group_.id, context.get('user'), 'read'))
if is_group_member:
context['ignore_capacity_check'] = True

if include_datasets and not context.get('for_view'):
if not context.get('for_view'):
q['rows'] = 1000 # Only the first 1000 datasets are returned

search_context = dict((k, v) for (k, v) in context.items() if k != 'schema')
search_results = logic.get_action('package_search')(search_context, q)
package_count = search_results['count']
return search_results['count'], search_results['results']
if packages_field == 'datasets':
package_count, packages = get_packages_for_this_group(group)
result_dict['packages'] = packages
else:
# Use the package_counts passed in
facets = dataset_counts
if group.is_organization:
package_count = facets['owner_org'].get(group.id, 0)
# i.e. packages_field is 'dataset_count' or
# 'none_but_include_package_count'
if dataset_counts is None:
package_count, packages = get_packages_for_this_group(group)
else:
package_count = facets['groups'].get(group.name, 0)

if dataset_count_instead_of_dicts:
result_dict['packages'] = package_count
elif include_datasets:
result_dict['packages'] = search_results['results']
# Use the pre-calculated package_counts passed in.
facets = dataset_counts
if group.is_organization:
package_count = facets['owner_org'].get(group.id, 0)
else:
package_count = facets['groups'].get(group.name, 0)
if packages_field != 'none_but_include_package_count':
result_dict['packages'] = package_count

result_dict['package_count'] = package_count

if include_tags:
# group tags are not creatable via the API yet, but that was(/is) a
# future intention (see kindly's commit 5c8df894 on 2011/12/23)
result_dict['tags'] = tag_list_dictize(
_get_members(context, group, 'tags'),
context)

if include_groups:
group_list_context = dict(context.items()[:])
group_list_context.update((
('include_tags', False),
('include_users', False),
))
# these sub-groups won't have tags or extras for speed
result_dict['groups'] = group_list_dictize(
_get_members(group_list_context, group, 'groups'),
context)
_get_members(context, group, 'groups'),
context, include_groups=True)

import pdb; pdb.set_trace()
if include_users:
if 'is_group_member' not in dir():
# expensive call, so avoid if was done earlier in this method
is_group_member = (context.get('user') and
new_authz.has_user_permission_for_group_or_org(group.id, context.get('user'), 'read'))
if is_group_member:
result_dict['users'] = user_list_dictize(
_get_members(context, group, 'users'),
context)
result_dict['users'] = user_list_dictize(
_get_members(context, group, 'users'),
context)

context['with_capacity'] = False

Expand Down
33 changes: 17 additions & 16 deletions ckan/logic/action/get.py
Expand Up @@ -388,10 +388,11 @@ def _group_or_org_list(context, data_dict, is_org=False):
total=1)

all_fields = data_dict.get('all_fields', None)
include_extras = asbool(data_dict.get('include_extras', False))
include_extras = all_fields and \
asbool(data_dict.get('include_extras', False))

query = model.Session.query(model.Group).join(model.GroupRevision)
if all_fields and include_extras:
if include_extras:
# this does an eager load of the extras, avoiding an sql query every
# time group_list_dictize accesses a group's extra.
query = query.options(sqlalchemy.orm.joinedload(model.Group._extras))
Expand All @@ -410,23 +411,21 @@ def _group_or_org_list(context, data_dict, is_org=False):
query = query.filter(model.GroupRevision.is_organization == is_org)

groups = query.all()
group_list_context = dict(context.items()[:])
if not all_fields:
group_list_context.update((
('include_tags', False),
('include_extras', False),
))
if all_fields:
include_tags = asbool(data_dict.get('include_tags', False))
else:
group_list_context.update((
('include_tags', asbool(data_dict.get('include_tags', False))),
('include_extras', include_extras),
))
include_tags = False
# even if we are not going to return all_fields, we need to dictize all the
# groups so that we can sort by any field.
group_list = model_dictize.group_list_dictize(
groups, group_list_context,
groups, context,
sort_key=lambda x: x[sort_info[0][0]],
reverse=sort_info[0][1] == 'desc',
with_package_counts=all_fields or sort_info[0][0] == 'packages',
include_groups=asbool(data_dict.get('include_groups', False)))
include_groups=asbool(data_dict.get('include_groups', False)),
include_tags=include_tags,
include_extras=include_extras,
)

if not all_fields:
group_list = [group[ref_group_by] for group in group_list]
Expand Down Expand Up @@ -1059,7 +1058,8 @@ def _group_or_org_show(context, data_dict, is_org=False):
include_datasets = data_dict.get('include_datasets', True)
if isinstance(include_datasets, basestring):
include_datasets = (include_datasets.lower() in ('true', '1'))
context['include_datasets'] = include_datasets
packages_field = 'datasets' if include_datasets \
else 'none_but_include_package_count'

if group is None:
raise NotFound
Expand All @@ -1073,7 +1073,8 @@ def _group_or_org_show(context, data_dict, is_org=False):
else:
_check_access('group_show', context, data_dict)

group_dict = model_dictize.group_dictize(group, context)
group_dict = model_dictize.group_dictize(group, context,
packages_field=packages_field)

if is_org:
plugin_type = plugins.IOrganizationController
Expand Down

0 comments on commit 123f715

Please sign in to comment.