Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] [#1078] package_show performance improvements #1079

Closed
wants to merge 8 commits into from
7 changes: 6 additions & 1 deletion ckan/controllers/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
'text': 'text/plain;charset=utf-8',
'html': 'text/html;charset=utf-8',
'json': 'application/json;charset=utf-8',
'json_string': 'application/json;charset=utf-8',
}


Expand Down Expand Up @@ -161,7 +162,7 @@ def action(self, logic_function, ver=None):
_('Action name not known: %s') % logic_function)

context = {'model': model, 'session': model.Session, 'user': c.user,
'api_version': ver}
'api_version': ver, 'json_string': True}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As per the email I'd prefer "return_type": "json" the implicit default being "dict"

model.Session()._context = context
return_dict = {'help': function.__doc__}
try:
Expand All @@ -185,6 +186,10 @@ def action(self, logic_function, ver=None):
try:
result = function(context, request_data)
return_dict['success'] = True
if context.get('json_string_returned', False):
return_dict['result'] = 395108 # magic placeholder
return self._finish_ok(h.json.dumps(
return_dict).replace('395108', result), 'json_string')
return_dict['result'] = result
except DataError, e:
log.error('Format incorrect: %s - %s' % (e.error, request_data))
Expand Down
15 changes: 14 additions & 1 deletion ckan/lib/search/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,13 @@
from ckan.plugins import (PluginImplementations,
IPackageController)
import ckan.logic as logic
import ckan.lib.plugins as lib_plugins
import ckan.lib.navl.dictization_functions

log = logging.getLogger(__name__)

_validate = ckan.lib.navl.dictization_functions.validate

TYPE_FIELD = "entity_type"
PACKAGE_TYPE = "package"
KEY_CHARS = string.digits + string.letters + "_-"
Expand Down Expand Up @@ -102,7 +106,16 @@ def update_dict(self, pkg_dict, defer_commit=False):
def index_package(self, pkg_dict, defer_commit=False):
if pkg_dict is None:
return
pkg_dict['data_dict'] = json.dumps(pkg_dict)

# store complete, show_package_schema-validated version
package_plugin = lib_plugins.lookup_package_plugin(pkg_dict['type'])
schema = package_plugin.show_package_schema()
context = {'model': model, 'session': model.Session}
validated_pkg, errors = _validate(pkg_dict, schema, context)
for item in PluginImplementations(IPackageController):
item.after_show(context, validated_pkg)

pkg_dict['data_dict'] = json.dumps(validated_pkg)

# add to string field for sorting
title = pkg_dict.get('title')
Expand Down
35 changes: 24 additions & 11 deletions ckan/logic/action/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import logging
import json
import datetime
import socket

from pylons import config
import sqlalchemy
Expand Down Expand Up @@ -752,22 +753,34 @@ def package_show(context, data_dict):

_check_access('package_show', context, data_dict)

package_dict = model_dictize.package_dictize(pkg, context)

for item in plugins.PluginImplementations(plugins.IPackageController):
item.read(pkg)

package_plugin = lib_plugins.lookup_package_plugin(package_dict['type'])
if 'schema' in context:
schema = context['schema']
else:
schema = package_plugin.show_package_schema()
package_dict = None
no_cache_context = ['revision_id', 'revision_date', 'schema']
if not any(k in context for k in no_cache_context):
try:
package_dict = search.show(name_or_id)['data_dict']
except (search.SearchError, socket.error):
pass
if not context.get('json_string', False):
package_dict = json.loads(package_dict)
else:
context['json_string_returned'] = True
if not package_dict:
package_dict = model_dictize.package_dictize(pkg, context)

if schema and context.get('validate', True):
package_dict, errors = _validate(package_dict, schema, context=context)
package_plugin = lib_plugins.lookup_package_plugin(package_dict['type'])
if 'schema' in context:
schema = context['schema']
else:
schema = package_plugin.show_package_schema()

for item in plugins.PluginImplementations(plugins.IPackageController):
item.after_show(context, package_dict)
if schema and context.get('validate', True):
package_dict, errors = _validate(package_dict, schema, context=context)

for item in plugins.PluginImplementations(plugins.IPackageController):
item.after_show(context, package_dict)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks a problem. If I add/remove a plugin from my .ini then it should change the results I see.

I'm not sure how we deal with this currently with the solr index.

unfortunately this part may need the json -> dict -> json cycle if IPackageControllers exist I'm not sure how we can get around this with the current interface.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if our schemas were possible to serialize (using converter names instead of functions, say) then the validated data could store the hash of the schema that was used for generating it


return package_dict

Expand Down