Skip to content

Commit

Permalink
Don't dictize packages when dictizing a vocab
Browse files Browse the repository at this point in the history
When dictizing a vocabulary, don't dictize every package of every one of
the vocab's tags. This makes vocabulary_dictize and vocabulary_show much
faster, which makes a big difference when creating and updating datasets
that have some custom fields that use tag vocabularies.
  • Loading branch information
seanh committed Aug 18, 2014
1 parent e3d9d5d commit d9a2ef6
Showing 1 changed file with 30 additions and 18 deletions.
48 changes: 30 additions & 18 deletions ckan/lib/dictization/model_dictize.py
Expand Up @@ -447,22 +447,26 @@ def tag_list_dictize(tag_list, context):

def tag_dictize(tag, context):
tag_dict = d.table_dictize(tag, context)
query = search.PackageSearchQuery()

tag_query = u'+capacity:public '
vocab_id = tag_dict.get('vocabulary_id')
include_datasets = context.get('include_datasets', True)

if vocab_id:
model = context['model']
vocab = model.Vocabulary.get(vocab_id)
tag_query += u'+vocab_{0}:"{1}"'.format(vocab.name, tag.name)
else:
tag_query += u'+tags:"{0}"'.format(tag.name)
if include_datasets:
query = search.PackageSearchQuery()

q = {'q': tag_query, 'fl': 'data_dict', 'wt': 'json', 'rows': 1000}
tag_query = u'+capacity:public '
vocab_id = tag_dict.get('vocabulary_id')

package_dicts = [h.json.loads(result['data_dict'])
for result in query.run(q)['results']]
if vocab_id:
model = context['model']
vocab = model.Vocabulary.get(vocab_id)
tag_query += u'+vocab_{0}:"{1}"'.format(vocab.name, tag.name)
else:
tag_query += u'+tags:"{0}"'.format(tag.name)

q = {'q': tag_query, 'fl': 'data_dict', 'wt': 'json', 'rows': 1000}

package_dicts = [h.json.loads(result['data_dict'])
for result in query.run(q)['results']]

# Add display_names to tags. At first a tag's display_name is just the
# same as its name, but the display_name might get changed later (e.g.
Expand All @@ -474,13 +478,15 @@ def tag_dictize(tag, context):
for item in plugins.PluginImplementations(plugins.ITagController):
tag_dict = item.before_view(tag_dict)

tag_dict['packages'] = []
for package_dict in package_dicts:
for item in plugins.PluginImplementations(plugins.IPackageController):
package_dict = item.before_view(package_dict)
tag_dict['packages'].append(package_dict)
if include_datasets:
tag_dict['packages'] = []
for package_dict in package_dicts:
for item in plugins.PluginImplementations(plugins.IPackageController):
package_dict = item.before_view(package_dict)
tag_dict['packages'].append(package_dict)
else:
tag_dict['packages'] = package_dicts
if include_datasets:
tag_dict['packages'] = package_dicts

return tag_dict

Expand Down Expand Up @@ -645,6 +651,12 @@ def make_api_2(package_id):
def vocabulary_dictize(vocabulary, context):
vocabulary_dict = d.table_dictize(vocabulary, context)
assert not vocabulary_dict.has_key('tags')

# We don't want to include each tag's list of datasets in the tag dict by
# default because it's slow.
if 'include_datasets' not in context:
context['include_datasets'] = False

vocabulary_dict['tags'] = [tag_dictize(tag, context) for tag
in vocabulary.tags]
return vocabulary_dict
Expand Down

0 comments on commit d9a2ef6

Please sign in to comment.