From bab3b109a27713b68752187fb14bc3906909696e Mon Sep 17 00:00:00 2001 From: Tyler Kennedy Date: Tue, 7 Feb 2017 16:37:52 -0500 Subject: [PATCH 1/2] Query performance improvements. Removes a forced order_by on revisions that causes poor query performance on many other table queries. Replaced `count()` queries on number_of_edits and number_of_packages with vastly improved queries. SQLAlchemy uses subqueries for `count()`, along with an odd default unindexed order_by on the table mapper causing even trivial counts to perform unindexed table scans. This change reduces the average query time for a user with 500k revisions from 15 *seconds* to under 20ms. --- ckan/model/resource.py | 1 - ckan/model/user.py | 42 +++++++++++++++++++++++++++++++++--------- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/ckan/model/resource.py b/ckan/model/resource.py index 03a8b512015..49d8f81e412 100644 --- a/ckan/model/resource.py +++ b/ckan/model/resource.py @@ -189,7 +189,6 @@ def activity_stream_detail(self, activity_id, activity_type): ), ) }, -order_by=[resource_table.c.package_id], extension=[vdm.sqlalchemy.Revisioner(resource_revision_table), extension.PluginMapperExtension(), ], diff --git a/ckan/model/user.py b/ckan/model/user.py index 7a4c111caa1..e46a58127b9 100644 --- a/ckan/model/user.py +++ b/ckan/model/user.py @@ -9,7 +9,7 @@ from passlib.hash import pbkdf2_sha512 from sqlalchemy.sql.expression import or_ from sqlalchemy.orm import synonym -from sqlalchemy import types, Column, Table +from sqlalchemy import types, Column, Table, func import vdm.sqlalchemy import meta @@ -194,21 +194,45 @@ def as_dict(self): def number_of_edits(self): # have to import here to avoid circular imports import ckan.model as model - revisions_q = meta.Session.query(model.Revision) - revisions_q = revisions_q.filter_by(author=self.name) - return revisions_q.count() + + # Get count efficiently without spawning the SQLAlchemy subquery + # wrapper. Reset the VDM-forced order_by on timestamp. + return meta.Session.execute( + meta.Session.query( + model.Revision + ).filter_by( + author=self.name + ).statement.with_only_columns( + [func.count()] + ).order_by( + None + ) + ).scalar() def number_created_packages(self, include_private_and_draft=False): # have to import here to avoid circular imports import ckan.model as model - q = meta.Session.query(model.Package)\ - .filter_by(creator_user_id=self.id) + + # Get count efficiently without spawning the SQLAlchemy subquery + # wrapper. Reset the VDM-forced order_by on timestamp. + q = meta.Session.query( + model.Package + ).filter_by( + creator_user_id=self.id + ) + if include_private_and_draft: q = q.filter(model.Package.state != 'deleted') else: - q = q.filter_by(state='active')\ - .filter_by(private=False) - return q.count() + q = q.filter_by(state='active', private=False) + + return meta.Session.execute( + q.statement.with_only_columns( + [func.count()] + ).order_by( + None + ) + ).scalar() def activate(self): ''' Activate the user ''' From 5d6949ca7602eb5039227229a0113269d213f856 Mon Sep 17 00:00:00 2001 From: Tyler Kennedy Date: Mon, 13 Feb 2017 13:33:05 -0500 Subject: [PATCH 2/2] Move an expensive, repetitive check_access outside of the resource_item.html loop. --- ckan/templates/package/resources.html | 3 ++- ckan/templates/package/snippets/resource_item.html | 1 - ckan/templates/package/snippets/resources_list.html | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/ckan/templates/package/resources.html b/ckan/templates/package/resources.html index 0e02b3846dd..dcb708e6d40 100644 --- a/ckan/templates/package/resources.html +++ b/ckan/templates/package/resources.html @@ -11,8 +11,9 @@ {% block primary_content_inner %} {% if pkg.resources %} {% else %} diff --git a/ckan/templates/package/snippets/resource_item.html b/ckan/templates/package/snippets/resource_item.html index e38f3e1d50f..8e437da6f7d 100644 --- a/ckan/templates/package/snippets/resource_item.html +++ b/ckan/templates/package/snippets/resource_item.html @@ -1,4 +1,3 @@ -{% set can_edit = h.check_access('package_update', {'id':pkg.id }) %} {% set url_action = 'resource_edit' if url_is_edit and can_edit else 'resource_read' %} {% set url = h.url_for(controller='package', action=url_action, id=pkg.name, resource_id=res.id) %} diff --git a/ckan/templates/package/snippets/resources_list.html b/ckan/templates/package/snippets/resources_list.html index 5fdd853a458..140df8fb627 100644 --- a/ckan/templates/package/snippets/resources_list.html +++ b/ckan/templates/package/snippets/resources_list.html @@ -15,8 +15,9 @@

{{ _('Data and Resources') }}

{% if resources %}