[#1768] Add tests. Take advantage of dictization params where conveni…

…ent. Refactor packages_field option.
ckan · Jul 10, 2014 · 123f715 · 123f715
1 parent cdfefcf
commit 123f715
Show file tree

Hide file tree

Showing 4 changed files with 372 additions and 96 deletions.
diff --git a/ckan/lib/dictization/model_dictize.py b/ckan/lib/dictization/model_dictize.py
@@ -1,3 +1,14 @@
+'''
+These dictize functions generally take a domain object (such as Package) and
+convert it to a dictionary, including related objects (e.g. for Package it
+includes PackageTags, PackageExtras, PackageGroup etc).
+
+The basic recipe is to call:
+
+    dictized = ckan.lib.dictization.table_dictize(domain_object)
+
+which builds the dictionary by iterating over the table columns.
+'''
 import datetime
 import urlparse
 
@@ -17,26 +28,38 @@
 def group_list_dictize(obj_list, context,
                        sort_key=lambda x: x['display_name'], reverse=False,
                        with_package_counts=True,
-                       include_groups=False):
+                       include_groups=False,
+                       include_tags=False,
+                       include_extras=False):
 
     group_dictize_context = dict(context.items()[:])
     # Set options to avoid any SOLR queries for each group, which would
     # slow things further.
-    group_dictize_context.update((
-        ('include_dataset_count', with_package_counts),
-        ('dataset_count_instead_of_dicts', True),
-        ('include_groups', include_groups),
-        ('include_datasets', False),  # too slow - don't allow
-        ('include_users', False),  # too slow - don't allow
-        ))
+    group_dictize_options = {
+            'packages_field': 'dataset_count' if with_package_counts else None,
+            # don't allow packages_field='datasets' as it is too slow
+            'include_groups': include_groups,
+            'include_tags': include_tags,
+            'include_extras': include_extras,
+            'include_users': False,  # too slow - don't allow
+            }
+        #('include_dataset_count', with_package_counts),
+        #('dataset_count_instead_of_dicts', True),
+        #('include_groups', include_groups),
+        #('include_datasets', False),  # too slow - don't allow
+        #('include_users', False),  # too slow - don't allow
+        #))
     if with_package_counts and 'dataset_counts' not in group_dictize_context:
-        group_dictize_context['dataset_counts'] = _get_group_package_counts()
+        # 'dataset_counts' will already be in the context in the case that
+        # group_list_dictize recurses via group_dictize (groups in groups)
+        group_dictize_context['dataset_counts'] = get_group_dataset_counts()
     if context.get('with_capacity'):
         group_list = [group_dictize(group, group_dictize_context,
-                                    capacity=capacity)
+                                    capacity=capacity, **group_dictize_options)
                       for group, capacity in obj_list]
     else:
-        group_list = [group_dictize(group, group_dictize_context)
+        group_list = [group_dictize(group, group_dictize_context,
+                                    **group_dictize_options)
                       for group in obj_list]
 
     return sorted(group_list, key=sort_key, reverse=reverse)
@@ -226,10 +249,8 @@ def package_dictize(pkg, context):
     context['with_capacity'] = False
     ## no package counts as cannot fetch from search index at the same
     ## time as indexing to it.
-    group_list_dictize_context = dict(context.items()[:])
-    group_list_dictize_context['include_extras'] = False  # for speed
-    result_dict["groups"] = group_list_dictize(result,
-                                               group_list_dictize_context,
+    ## tags, extras and sub-groups are not included for speed
+    result_dict["groups"] = group_list_dictize(result, context,
                                                with_package_counts=False)
     #owning organization
     group_rev = model.group_revision_table
@@ -295,27 +316,41 @@ def _get_members(context, group, member_type):
     return q.all()
 
 
-def _get_group_package_counts():
-    '''For all public groups, return their package counts, as a SOLR facet'''
+def get_group_dataset_counts():
+    '''For all public groups, return their dataset counts, as a SOLR facet'''
     query = search.PackageSearchQuery()
     q = {'q': '+capacity:public',
-            'fl': 'groups', 'facet.field': ['groups', 'owner_org'],
-            'facet.limit': -1, 'rows': 1}
+         'fl': 'groups', 'facet.field': ['groups', 'owner_org'],
+         'facet.limit': -1, 'rows': 1}
     query.run(q)
     return query.facets
 
 
-def group_dictize(group, context, **kw):
+def group_dictize(group, context,
+                  include_groups=True,
+                  include_tags=True,
+                  include_users=True,
+                  include_extras=True,
+                  packages_field='datasets',
+                  **kw):
+    '''
+    Turns a Group object and related into a dictionary. The related objects
+    like tags are included unless you specify it in the params.
+
+    :param packages_field: determines the format of the `packages` field - can
+    be `datasets`, `dataset_count`, `none_but_include_package_count` or None.
+    If set to `dataset_count` or `none_but_include_package_count` then you
+    can precalculate dataset counts in advance by supplying:
+    context['dataset_counts'] = get_group_dataset_counts()
+    '''
+    assert packages_field in ('datasets', 'dataset_count',
+                              'none_but_include_package_count', None)
+    if packages_field in ('dataset_count', 'none_but_include_package_count'):
+        dataset_counts = context.get('dataset_counts', None)
+
     result_dict = d.table_dictize(group, context)
     result_dict.update(kw)
 
-    include_datasets = context.get('include_datasets', True)
-    include_dataset_count = context.get('include_dataset_count', True)
-    include_groups = context.get('include_groups', True)
-    include_tags = context.get('include_tags', True)
-    include_users = context.get('include_users', True)
-    include_extras = context.get('include_extras', True)
-
     result_dict['display_name'] = group.title or group.name
 
     if include_extras:
@@ -324,77 +359,67 @@ def group_dictize(group, context, **kw):
 
     context['with_capacity'] = True
 
-    if include_datasets or include_dataset_count:
-        # group_list traditionally returned count instead of the dicts. It is
-        # deprecated, but this behaviour is maintained for now by setting:
-        # dataset_count_instead_of_dicts = True
-        # dataset_counts = _get_group_package_counts()
-        dataset_count_instead_of_dicts = context.get('dataset_count_instead_of_dicts', False)
-        dataset_counts = context.get('dataset_counts')
-
-        if dataset_count_instead_of_dicts is False or dataset_counts is None:
-            # Ask SOLR for the packages for this org/group
+    if packages_field:
+        def get_packages_for_this_group(group_):
+            # Ask SOLR for the list of packages for this org/group
             q = {
                 'facet': 'false',
                 'rows': 0,
             }
 
-            if group.is_organization:
-                q['fq'] = 'owner_org:"{0}"'.format(group.id)
+            if group_.is_organization:
+                q['fq'] = 'owner_org:"{0}"'.format(group_.id)
             else:
-                q['fq'] = 'groups:"{0}"'.format(group.name)
+                q['fq'] = 'groups:"{0}"'.format(group_.name)
 
             is_group_member = (context.get('user') and
-                new_authz.has_user_permission_for_group_or_org(group.id, context.get('user'), 'read'))
+                new_authz.has_user_permission_for_group_or_org(group_.id, context.get('user'), 'read'))
             if is_group_member:
                 context['ignore_capacity_check'] = True
 
-            if include_datasets and not context.get('for_view'):
+            if not context.get('for_view'):
                 q['rows'] = 1000    # Only the first 1000 datasets are returned
 
             search_context = dict((k, v) for (k, v) in context.items() if k != 'schema')
             search_results = logic.get_action('package_search')(search_context, q)
-            package_count = search_results['count']
+            return search_results['count'], search_results['results']
+        if packages_field == 'datasets':
+            package_count, packages = get_packages_for_this_group(group)
+            result_dict['packages'] = packages
         else:
-            # Use the package_counts passed in
-            facets = dataset_counts
-            if group.is_organization:
-                package_count = facets['owner_org'].get(group.id, 0)
+            # i.e. packages_field is 'dataset_count' or
+            # 'none_but_include_package_count'
+            if dataset_counts is None:
+                package_count, packages = get_packages_for_this_group(group)
             else:
-                package_count = facets['groups'].get(group.name, 0)
-
-        if dataset_count_instead_of_dicts:
-            result_dict['packages'] = package_count
-        elif include_datasets:
-            result_dict['packages'] = search_results['results']
+                # Use the pre-calculated package_counts passed in.
+                facets = dataset_counts
+                if group.is_organization:
+                    package_count = facets['owner_org'].get(group.id, 0)
+                else:
+                    package_count = facets['groups'].get(group.name, 0)
+            if packages_field != 'none_but_include_package_count':
+                result_dict['packages'] = package_count
 
         result_dict['package_count'] = package_count
 
     if include_tags:
+        # group tags are not creatable via the API yet, but that was(/is) a
+        # future intention (see kindly's commit 5c8df894 on 2011/12/23)
         result_dict['tags'] = tag_list_dictize(
             _get_members(context, group, 'tags'),
             context)
 
     if include_groups:
-        group_list_context = dict(context.items()[:])
-        group_list_context.update((
-            ('include_tags', False),
-            ('include_users', False),
-            ))
+        # these sub-groups won't have tags or extras for speed
         result_dict['groups'] = group_list_dictize(
-            _get_members(group_list_context, group, 'groups'),
-            context)
+            _get_members(context, group, 'groups'),
+            context, include_groups=True)
 
-    import pdb; pdb.set_trace()
     if include_users:
-        if 'is_group_member' not in dir():
-            # expensive call, so avoid if was done earlier in this method
-            is_group_member = (context.get('user') and
-                new_authz.has_user_permission_for_group_or_org(group.id, context.get('user'), 'read'))
-        if is_group_member:
-            result_dict['users'] = user_list_dictize(
-                _get_members(context, group, 'users'),
-                context)
+        result_dict['users'] = user_list_dictize(
+            _get_members(context, group, 'users'),
+            context)
 
     context['with_capacity'] = False
 

diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py
@@ -388,10 +388,11 @@ def _group_or_org_list(context, data_dict, is_org=False):
                                total=1)
 
     all_fields = data_dict.get('all_fields', None)
-    include_extras = asbool(data_dict.get('include_extras', False))
+    include_extras = all_fields and \
+                     asbool(data_dict.get('include_extras', False))
 
     query = model.Session.query(model.Group).join(model.GroupRevision)
-    if all_fields and include_extras:
+    if include_extras:
         # this does an eager load of the extras, avoiding an sql query every
         # time group_list_dictize accesses a group's extra.
         query = query.options(sqlalchemy.orm.joinedload(model.Group._extras))
@@ -410,23 +411,21 @@ def _group_or_org_list(context, data_dict, is_org=False):
     query = query.filter(model.GroupRevision.is_organization == is_org)
 
     groups = query.all()
-    group_list_context = dict(context.items()[:])
-    if not all_fields:
-        group_list_context.update((
-            ('include_tags', False),
-            ('include_extras', False),
-            ))
+    if all_fields:
+        include_tags = asbool(data_dict.get('include_tags', False))
     else:
-        group_list_context.update((
-            ('include_tags', asbool(data_dict.get('include_tags', False))),
-            ('include_extras', include_extras),
-            ))
+        include_tags = False
+    # even if we are not going to return all_fields, we need to dictize all the
+    # groups so that we can sort by any field.
     group_list = model_dictize.group_list_dictize(
-        groups, group_list_context,
+        groups, context,
         sort_key=lambda x: x[sort_info[0][0]],
         reverse=sort_info[0][1] == 'desc',
         with_package_counts=all_fields or sort_info[0][0] == 'packages',
-        include_groups=asbool(data_dict.get('include_groups', False)))
+        include_groups=asbool(data_dict.get('include_groups', False)),
+        include_tags=include_tags,
+        include_extras=include_extras,
+        )
 
     if not all_fields:
         group_list = [group[ref_group_by] for group in group_list]
@@ -1059,7 +1058,8 @@ def _group_or_org_show(context, data_dict, is_org=False):
     include_datasets = data_dict.get('include_datasets', True)
     if isinstance(include_datasets, basestring):
         include_datasets = (include_datasets.lower() in ('true', '1'))
-    context['include_datasets'] = include_datasets
+    packages_field = 'datasets' if include_datasets \
+                     else 'none_but_include_package_count'
 
     if group is None:
         raise NotFound
@@ -1073,7 +1073,8 @@ def _group_or_org_show(context, data_dict, is_org=False):
     else:
         _check_access('group_show', context, data_dict)
 
-    group_dict = model_dictize.group_dictize(group, context)
+    group_dict = model_dictize.group_dictize(group, context,
+                                             packages_field=packages_field)
 
     if is_org:
         plugin_type = plugins.IOrganizationController