diff --git a/ckan/lib/dictization/model_dictize.py b/ckan/lib/dictization/model_dictize.py index 0ff85e3295f..8e346d11286 100644 --- a/ckan/lib/dictization/model_dictize.py +++ b/ckan/lib/dictization/model_dictize.py @@ -1,3 +1,14 @@ +''' +These dictize functions generally take a domain object (such as Package) and +convert it to a dictionary, including related objects (e.g. for Package it +includes PackageTags, PackageExtras, PackageGroup etc). + +The basic recipe is to call: + + dictized = ckan.lib.dictization.table_dictize(domain_object) + +which builds the dictionary by iterating over the table columns. +''' import datetime import urlparse @@ -17,26 +28,38 @@ def group_list_dictize(obj_list, context, sort_key=lambda x: x['display_name'], reverse=False, with_package_counts=True, - include_groups=False): + include_groups=False, + include_tags=False, + include_extras=False): group_dictize_context = dict(context.items()[:]) # Set options to avoid any SOLR queries for each group, which would # slow things further. - group_dictize_context.update(( - ('include_dataset_count', with_package_counts), - ('dataset_count_instead_of_dicts', True), - ('include_groups', include_groups), - ('include_datasets', False), # too slow - don't allow - ('include_users', False), # too slow - don't allow - )) + group_dictize_options = { + 'packages_field': 'dataset_count' if with_package_counts else None, + # don't allow packages_field='datasets' as it is too slow + 'include_groups': include_groups, + 'include_tags': include_tags, + 'include_extras': include_extras, + 'include_users': False, # too slow - don't allow + } + #('include_dataset_count', with_package_counts), + #('dataset_count_instead_of_dicts', True), + #('include_groups', include_groups), + #('include_datasets', False), # too slow - don't allow + #('include_users', False), # too slow - don't allow + #)) if with_package_counts and 'dataset_counts' not in group_dictize_context: - group_dictize_context['dataset_counts'] = _get_group_package_counts() + # 'dataset_counts' will already be in the context in the case that + # group_list_dictize recurses via group_dictize (groups in groups) + group_dictize_context['dataset_counts'] = get_group_dataset_counts() if context.get('with_capacity'): group_list = [group_dictize(group, group_dictize_context, - capacity=capacity) + capacity=capacity, **group_dictize_options) for group, capacity in obj_list] else: - group_list = [group_dictize(group, group_dictize_context) + group_list = [group_dictize(group, group_dictize_context, + **group_dictize_options) for group in obj_list] return sorted(group_list, key=sort_key, reverse=reverse) @@ -226,10 +249,8 @@ def package_dictize(pkg, context): context['with_capacity'] = False ## no package counts as cannot fetch from search index at the same ## time as indexing to it. - group_list_dictize_context = dict(context.items()[:]) - group_list_dictize_context['include_extras'] = False # for speed - result_dict["groups"] = group_list_dictize(result, - group_list_dictize_context, + ## tags, extras and sub-groups are not included for speed + result_dict["groups"] = group_list_dictize(result, context, with_package_counts=False) #owning organization group_rev = model.group_revision_table @@ -295,27 +316,41 @@ def _get_members(context, group, member_type): return q.all() -def _get_group_package_counts(): - '''For all public groups, return their package counts, as a SOLR facet''' +def get_group_dataset_counts(): + '''For all public groups, return their dataset counts, as a SOLR facet''' query = search.PackageSearchQuery() q = {'q': '+capacity:public', - 'fl': 'groups', 'facet.field': ['groups', 'owner_org'], - 'facet.limit': -1, 'rows': 1} + 'fl': 'groups', 'facet.field': ['groups', 'owner_org'], + 'facet.limit': -1, 'rows': 1} query.run(q) return query.facets -def group_dictize(group, context, **kw): +def group_dictize(group, context, + include_groups=True, + include_tags=True, + include_users=True, + include_extras=True, + packages_field='datasets', + **kw): + ''' + Turns a Group object and related into a dictionary. The related objects + like tags are included unless you specify it in the params. + + :param packages_field: determines the format of the `packages` field - can + be `datasets`, `dataset_count`, `none_but_include_package_count` or None. + If set to `dataset_count` or `none_but_include_package_count` then you + can precalculate dataset counts in advance by supplying: + context['dataset_counts'] = get_group_dataset_counts() + ''' + assert packages_field in ('datasets', 'dataset_count', + 'none_but_include_package_count', None) + if packages_field in ('dataset_count', 'none_but_include_package_count'): + dataset_counts = context.get('dataset_counts', None) + result_dict = d.table_dictize(group, context) result_dict.update(kw) - include_datasets = context.get('include_datasets', True) - include_dataset_count = context.get('include_dataset_count', True) - include_groups = context.get('include_groups', True) - include_tags = context.get('include_tags', True) - include_users = context.get('include_users', True) - include_extras = context.get('include_extras', True) - result_dict['display_name'] = group.title or group.name if include_extras: @@ -324,77 +359,67 @@ def group_dictize(group, context, **kw): context['with_capacity'] = True - if include_datasets or include_dataset_count: - # group_list traditionally returned count instead of the dicts. It is - # deprecated, but this behaviour is maintained for now by setting: - # dataset_count_instead_of_dicts = True - # dataset_counts = _get_group_package_counts() - dataset_count_instead_of_dicts = context.get('dataset_count_instead_of_dicts', False) - dataset_counts = context.get('dataset_counts') - - if dataset_count_instead_of_dicts is False or dataset_counts is None: - # Ask SOLR for the packages for this org/group + if packages_field: + def get_packages_for_this_group(group_): + # Ask SOLR for the list of packages for this org/group q = { 'facet': 'false', 'rows': 0, } - if group.is_organization: - q['fq'] = 'owner_org:"{0}"'.format(group.id) + if group_.is_organization: + q['fq'] = 'owner_org:"{0}"'.format(group_.id) else: - q['fq'] = 'groups:"{0}"'.format(group.name) + q['fq'] = 'groups:"{0}"'.format(group_.name) is_group_member = (context.get('user') and - new_authz.has_user_permission_for_group_or_org(group.id, context.get('user'), 'read')) + new_authz.has_user_permission_for_group_or_org(group_.id, context.get('user'), 'read')) if is_group_member: context['ignore_capacity_check'] = True - if include_datasets and not context.get('for_view'): + if not context.get('for_view'): q['rows'] = 1000 # Only the first 1000 datasets are returned search_context = dict((k, v) for (k, v) in context.items() if k != 'schema') search_results = logic.get_action('package_search')(search_context, q) - package_count = search_results['count'] + return search_results['count'], search_results['results'] + if packages_field == 'datasets': + package_count, packages = get_packages_for_this_group(group) + result_dict['packages'] = packages else: - # Use the package_counts passed in - facets = dataset_counts - if group.is_organization: - package_count = facets['owner_org'].get(group.id, 0) + # i.e. packages_field is 'dataset_count' or + # 'none_but_include_package_count' + if dataset_counts is None: + package_count, packages = get_packages_for_this_group(group) else: - package_count = facets['groups'].get(group.name, 0) - - if dataset_count_instead_of_dicts: - result_dict['packages'] = package_count - elif include_datasets: - result_dict['packages'] = search_results['results'] + # Use the pre-calculated package_counts passed in. + facets = dataset_counts + if group.is_organization: + package_count = facets['owner_org'].get(group.id, 0) + else: + package_count = facets['groups'].get(group.name, 0) + if packages_field != 'none_but_include_package_count': + result_dict['packages'] = package_count result_dict['package_count'] = package_count if include_tags: + # group tags are not creatable via the API yet, but that was(/is) a + # future intention (see kindly's commit 5c8df894 on 2011/12/23) result_dict['tags'] = tag_list_dictize( _get_members(context, group, 'tags'), context) if include_groups: - group_list_context = dict(context.items()[:]) - group_list_context.update(( - ('include_tags', False), - ('include_users', False), - )) + # these sub-groups won't have tags or extras for speed result_dict['groups'] = group_list_dictize( - _get_members(group_list_context, group, 'groups'), - context) + _get_members(context, group, 'groups'), + context, include_groups=True) - import pdb; pdb.set_trace() if include_users: - if 'is_group_member' not in dir(): - # expensive call, so avoid if was done earlier in this method - is_group_member = (context.get('user') and - new_authz.has_user_permission_for_group_or_org(group.id, context.get('user'), 'read')) - if is_group_member: - result_dict['users'] = user_list_dictize( - _get_members(context, group, 'users'), - context) + result_dict['users'] = user_list_dictize( + _get_members(context, group, 'users'), + context) context['with_capacity'] = False diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index 35927492606..d6fa46d5a3f 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -388,10 +388,11 @@ def _group_or_org_list(context, data_dict, is_org=False): total=1) all_fields = data_dict.get('all_fields', None) - include_extras = asbool(data_dict.get('include_extras', False)) + include_extras = all_fields and \ + asbool(data_dict.get('include_extras', False)) query = model.Session.query(model.Group).join(model.GroupRevision) - if all_fields and include_extras: + if include_extras: # this does an eager load of the extras, avoiding an sql query every # time group_list_dictize accesses a group's extra. query = query.options(sqlalchemy.orm.joinedload(model.Group._extras)) @@ -410,23 +411,21 @@ def _group_or_org_list(context, data_dict, is_org=False): query = query.filter(model.GroupRevision.is_organization == is_org) groups = query.all() - group_list_context = dict(context.items()[:]) - if not all_fields: - group_list_context.update(( - ('include_tags', False), - ('include_extras', False), - )) + if all_fields: + include_tags = asbool(data_dict.get('include_tags', False)) else: - group_list_context.update(( - ('include_tags', asbool(data_dict.get('include_tags', False))), - ('include_extras', include_extras), - )) + include_tags = False + # even if we are not going to return all_fields, we need to dictize all the + # groups so that we can sort by any field. group_list = model_dictize.group_list_dictize( - groups, group_list_context, + groups, context, sort_key=lambda x: x[sort_info[0][0]], reverse=sort_info[0][1] == 'desc', with_package_counts=all_fields or sort_info[0][0] == 'packages', - include_groups=asbool(data_dict.get('include_groups', False))) + include_groups=asbool(data_dict.get('include_groups', False)), + include_tags=include_tags, + include_extras=include_extras, + ) if not all_fields: group_list = [group[ref_group_by] for group in group_list] @@ -1059,7 +1058,8 @@ def _group_or_org_show(context, data_dict, is_org=False): include_datasets = data_dict.get('include_datasets', True) if isinstance(include_datasets, basestring): include_datasets = (include_datasets.lower() in ('true', '1')) - context['include_datasets'] = include_datasets + packages_field = 'datasets' if include_datasets \ + else 'none_but_include_package_count' if group is None: raise NotFound @@ -1073,7 +1073,8 @@ def _group_or_org_show(context, data_dict, is_org=False): else: _check_access('group_show', context, data_dict) - group_dict = model_dictize.group_dictize(group, context) + group_dict = model_dictize.group_dictize(group, context, + packages_field=packages_field) if is_org: plugin_type = plugins.IOrganizationController diff --git a/ckan/new_tests/lib/dictization/test_model_dictize.py b/ckan/new_tests/lib/dictization/test_model_dictize.py new file mode 100644 index 00000000000..9f45a402d68 --- /dev/null +++ b/ckan/new_tests/lib/dictization/test_model_dictize.py @@ -0,0 +1,206 @@ +from nose.tools import assert_equal + +from ckan.lib.dictization import model_dictize +from ckan import model +from ckan.lib import search + +from ckan.new_tests import helpers, factories + + +class TestDictize: + + def setup(self): + helpers.reset_db() + search.clear() + + def test_group_list_dictize(self): + group = factories.Group() + group_list = model.Session.query(model.Group).filter_by().all() + context = {'model': model, 'session': model.Session} + + group_dicts = model_dictize.group_list_dictize(group_list, context) + + assert_equal(len(group_dicts), 1) + assert_equal(group_dicts[0]['name'], group['name']) + assert_equal(group_dicts[0]['packages'], 0) + assert 'extras' not in group_dicts[0] + assert 'tags' not in group_dicts[0] + assert 'groups' not in group_dicts[0] + + def test_group_list_dictize_sorted(self): + factories.Group(name='aa') + factories.Group(name='bb') + group_list = [model.Group.get(u'bb'), + model.Group.get(u'aa')] + context = {'model': model, 'session': model.Session} + + group_dicts = model_dictize.group_list_dictize(group_list, context) + + # list is resorted by name + assert_equal(group_dicts[0]['name'], 'aa') + assert_equal(group_dicts[1]['name'], 'bb') + + def test_group_list_dictize_reverse_sorted(self): + factories.Group(name='aa') + factories.Group(name='bb') + group_list = [model.Group.get(u'aa'), + model.Group.get(u'bb')] + context = {'model': model, 'session': model.Session} + + group_dicts = model_dictize.group_list_dictize(group_list, context, + reverse=True) + + assert_equal(group_dicts[0]['name'], 'bb') + assert_equal(group_dicts[1]['name'], 'aa') + + def test_group_list_dictize_without_package_count(self): + group = factories.Group() + factories.Dataset(group=group['name']) + group_list = [model.Group.get(group['name'])] + context = {'model': model, 'session': model.Session} + + group_dicts = model_dictize.group_list_dictize( + group_list, context, with_package_counts=False) + + assert 'packages' not in group_dicts[0] + + def test_group_list_dictize_including_extras(self): + factories.Group(extras=[{'key': 'k1', 'value': 'v1'}]) + group_list = model.Session.query(model.Group).filter_by().all() + context = {'model': model, 'session': model.Session} + + group_dicts = model_dictize.group_list_dictize(group_list, context, + include_extras=True) + + assert_equal(group_dicts[0]['extras'][0]['key'], 'k1') + + def test_group_list_dictize_including_tags(self): + factories.Group() + # group tags aren't in the group_create schema, so its slightly more + # convoluted way to create them + group_obj = model.Session.query(model.Group).first() + tag = model.Tag(name='t1') + model.Session.add(tag) + model.Session.commit() + tag = model.Session.query(model.Tag).first() + group_obj = model.Session.query(model.Group).first() + member = model.Member(group=group_obj, table_id=tag.id, + table_name='tag') + model.Session.add(member) + model.repo.new_revision() + model.Session.commit() + group_list = model.Session.query(model.Group).filter_by().all() + context = {'model': model, 'session': model.Session} + + group_dicts = model_dictize.group_list_dictize(group_list, context, + include_tags=True) + + assert_equal(group_dicts[0]['tags'][0]['name'], 't1') + + def test_group_list_dictize_including_groups(self): + factories.Group(name='parent') + factories.Group(name='child', groups=[{'name': 'parent'}]) + group_list = [model.Group.get(u'parent'), model.Group.get(u'child')] + context = {'model': model, 'session': model.Session} + + child_dict, parent_dict = model_dictize.group_list_dictize( + group_list, context, include_groups=True) + + assert_equal(parent_dict['name'], 'parent') + assert_equal(child_dict['name'], 'child') + assert_equal(parent_dict['groups'], []) + assert_equal(child_dict['groups'][0]['name'], 'parent') + + def test_group_dictize(self): + group = factories.Group() + factories.Dataset(group=group['name']) + group_obj = model.Session.query(model.Group).filter_by().first() + context = {'model': model, 'session': model.Session} + + group = model_dictize.group_dictize(group_obj, context) + + assert_equal(group['name'], 'test_group_0') + assert_equal(group['packages'], []) + assert_equal(group['extras'], []) + assert_equal(group['tags'], []) + assert_equal(group['groups'], []) + + def test_group_dictize_without_packages(self): + # group_list_dictize might not be interested in packages at all + # so sets these options. e.g. it is not all_fields nor are the results + # sorted by the number of packages. + factories.Group() + group_obj = model.Session.query(model.Group).filter_by().first() + context = {'model': model, 'session': model.Session} + + group = model_dictize.group_dictize(group_obj, context, + packages_field=None) + + assert 'packages' not in group + + def test_group_dictize_with_package_list(self): + group_ = factories.Group() + package = factories.Dataset(groups=[{'name': group_['name']}]) + group_obj = model.Session.query(model.Group).filter_by().first() + context = {'model': model, 'session': model.Session} + + group = model_dictize.group_dictize(group_obj, context) + + assert_equal(type(group['packages']), list) + assert_equal(len(group['packages']), 1) + assert_equal(group['packages'][0]['name'], package['name']) + + def test_group_dictize_with_package_count(self): + # group_list_dictize calls it like this by default + group_ = factories.Group() + factories.Dataset(groups=[{'name': group_['name']}]) + group_obj = model.Session.query(model.Group).filter_by().first() + context = {'model': model, 'session': model.Session, + 'dataset_counts': model_dictize.get_group_dataset_counts() + } + + group = model_dictize.group_dictize(group_obj, context, + packages_field='dataset_count') + + assert_equal(group['packages'], 1) + assert_equal(group['package_count'], 1) + + def test_group_dictize_with_no_packages_field_but_still_package_count(self): + # logic.get.group_show calls it like this when not include_datasets + group_ = factories.Group() + factories.Dataset(groups=[{'name': group_['name']}]) + group_obj = model.Session.query(model.Group).filter_by().first() + context = {'model': model, 'session': model.Session} + # not supplying dataset_counts in this case either + + group = model_dictize.group_dictize(group_obj, context, + packages_field='none_but_include_package_count') + + assert 'packages' not in group + assert_equal(group['package_count'], 1) + + def test_group_dictize_for_org_with_package_list(self): + org_ = factories.Organization() + package = factories.Dataset(owner_org=org_['id']) + group_obj = model.Session.query(model.Group).filter_by().first() + context = {'model': model, 'session': model.Session} + + org = model_dictize.group_dictize(group_obj, context) + + assert_equal(type(org['packages']), list) + assert_equal(len(org['packages']), 1) + assert_equal(org['packages'][0]['name'], package['name']) + + def test_group_dictize_for_org_with_package_count(self): + # group_list_dictize calls it like this by default + org_ = factories.Organization() + factories.Dataset(owner_org=org_['id']) + org_obj = model.Session.query(model.Group).filter_by().first() + context = {'model': model, 'session': model.Session, + 'dataset_counts': model_dictize.get_group_dataset_counts() + } + + org = model_dictize.group_dictize(org_obj, context, + packages_field='dataset_count') + + assert_equal(org['packages'], 1) diff --git a/ckan/new_tests/logic/action/test_get.py b/ckan/new_tests/logic/action/test_get.py index 816004aab42..cf523586b23 100644 --- a/ckan/new_tests/logic/action/test_get.py +++ b/ckan/new_tests/logic/action/test_get.py @@ -11,30 +11,74 @@ class TestGet(object): - @classmethod - def setup_class(cls): - helpers.reset_db() - def setup(self): - import ckan.model as model - - # Reset the db before each test method. - model.repo.rebuild_db() + helpers.reset_db() # Clear the search index search.clear() def test_group_list(self): - user = factories.User() - group1 = factories.Group(user=user) - group2 = factories.Group(user=user) + group1 = factories.Group() + group2 = factories.Group() group_list = helpers.call_action('group_list') assert (sorted(group_list) == sorted([g['name'] for g in [group1, group2]])) + def test_group_list_all_fields(self): + + group = factories.Group() + + group_list = helpers.call_action('group_list', all_fields=True) + + expected_group = dict(group.items()[:]) + for field in ('users', 'tags', 'extras', 'groups'): + del expected_group[field] + expected_group['packages'] = 0 + assert group_list[0] == expected_group + assert 'extras' not in group_list[0] + assert 'tags' not in group_list[0] + assert 'groups' not in group_list[0] + assert 'users' not in group_list[0] + assert 'datasets' not in group_list[0] + + def test_group_list_extras_returned(self): + + group = factories.Group(extras=[{'key': 'key1', 'value': 'val1'}]) + + group_list = helpers.call_action('group_list', all_fields=True, + include_extras=True) + + eq(group_list[0]['extras'], group['extras']) + eq(group_list[0]['extras'][0]['key'], 'key1') + + # NB there is no test_group_list_tags_returned because tags are not in the + # group_create schema (yet) + + def test_group_list_groups_returned(self): + + parent_group = factories.Group(tags=[{'name': 'river'}]) + child_group = factories.Group(groups=[{'name': parent_group['name']}], + tags=[{'name': 'river'}]) + + group_list = helpers.call_action('group_list', all_fields=True, + include_groups=True) + + child_group_returned = group_list[0] + if group_list[0]['name'] == child_group['name']: + child_group_returned, parent_group_returned = group_list + else: + child_group_returned, parent_group_returned = group_list[::-1] + expected_parent_group = dict(parent_group.items()[:]) + for field in ('users', 'tags', 'extras'): + del expected_parent_group[field] + expected_parent_group['capacity'] = u'public' + expected_parent_group['packages'] = 0 + expected_parent_group['package_count'] = 0 + eq(child_group_returned['groups'], [expected_parent_group]) + def test_group_show(self): group = factories.Group(user=factories.User())