Skip to content

Commit

Permalink
Merge pull request #5514 from jmchilton/optimize_public_grids
Browse files Browse the repository at this point in the history
[18.01] Optimize public grid database interactions.
  • Loading branch information
martenson committed Feb 13, 2018
2 parents fd1ac05 + 80a41f3 commit ed23cc6
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 15 deletions.
24 changes: 20 additions & 4 deletions lib/galaxy/model/mapping.py
Expand Up @@ -1570,7 +1570,11 @@ def simple_mapping(model, **kwds):
backref="histories"),
ratings=relation(model.HistoryRatingAssociation,
order_by=model.HistoryRatingAssociation.table.c.id,
backref="histories")
backref="histories"),
average_rating=column_property(
select([func.avg(model.HistoryRatingAssociation.table.c.rating)]).where(model.HistoryRatingAssociation.table.c.history_id == model.History.table.c.id),
deferred=True
)
))

# Set up proxy so that
Expand Down Expand Up @@ -2184,7 +2188,11 @@ def simple_mapping(model, **kwds):
backref="stored_workflows"),
ratings=relation(model.StoredWorkflowRatingAssociation,
order_by=model.StoredWorkflowRatingAssociation.table.c.id,
backref="stored_workflows")
backref="stored_workflows"),
average_rating=column_property(
select([func.avg(model.StoredWorkflowRatingAssociation.table.c.rating)]).where(model.StoredWorkflowRatingAssociation.table.c.stored_workflow_id == model.StoredWorkflow.table.c.id),
deferred=True
)
))

# Set up proxy so that
Expand Down Expand Up @@ -2316,7 +2324,11 @@ def simple_mapping(model, **kwds):
backref="pages"),
ratings=relation(model.PageRatingAssociation,
order_by=model.PageRatingAssociation.table.c.id,
backref="pages")
backref="pages"),
average_rating=column_property(
select([func.avg(model.PageRatingAssociation.table.c.rating)]).where(model.PageRatingAssociation.table.c.page_id == model.Page.table.c.id),
deferred=True
)
))

# Set up proxy so that
Expand Down Expand Up @@ -2348,7 +2360,11 @@ def simple_mapping(model, **kwds):
backref="visualizations"),
ratings=relation(model.VisualizationRatingAssociation,
order_by=model.VisualizationRatingAssociation.table.c.id,
backref="visualizations")
backref="visualizations"),
average_rating=column_property(
select([func.avg(model.VisualizationRatingAssociation.table.c.rating)]).where(model.VisualizationRatingAssociation.table.c.visualization_id == model.Visualization.table.c.id),
deferred=True
)
))

# Set up proxy so that
Expand Down
7 changes: 6 additions & 1 deletion lib/galaxy/web/framework/helpers/grids.py
Expand Up @@ -573,7 +573,12 @@ class CommunityRatingColumn(GridColumn, UsesItemRatings):
""" Column that displays community ratings for an item. """

def get_value(self, trans, grid, item):
ave_item_rating, num_ratings = self.get_ave_item_rating_data(trans.sa_session, item, webapp_model=trans.model)
if not hasattr(item, "average_rating"):
# No prefetched column property, generate it on the fly.
ave_item_rating, num_ratings = self.get_ave_item_rating_data(trans.sa_session, item, webapp_model=trans.model)
else:
ave_item_rating = item.average_rating
num_ratings = 2 # just used for pluralization
return trans.fill_template("tool_shed_rating.mako",
ave_item_rating=ave_item_rating,
num_ratings=num_ratings,
Expand Down
18 changes: 15 additions & 3 deletions lib/galaxy/webapps/galaxy/controllers/history.py
Expand Up @@ -5,7 +5,7 @@
from six import string_types
from six.moves.urllib.parse import unquote_plus
from sqlalchemy import and_, false, func, null, true
from sqlalchemy.orm import eagerload, eagerload_all
from sqlalchemy.orm import eagerload, eagerload_all, undefer

import galaxy.util
from galaxy import exceptions
Expand Down Expand Up @@ -209,8 +209,20 @@ class NameURLColumn(grids.PublicURLColumn, NameColumn):
operations = []

def build_initial_query(self, trans, **kwargs):
# Join so that searching history.user makes sense.
return trans.sa_session.query(self.model_class).join(model.User.table)
# TODO: Tags are still loaded one at a time, consider doing this all at once:
# - eagerload would keep everything in one query but would explode the number of rows and potentially
# result in unneeded info transferred over the wire.
# - subqueryload("tags").subqueryload("tag") would probably be better under postgres but I'd
# like some performance data against a big database first - might cause problems?

# - Pull down only username from associated User table since that is all that is used
# (can be used during search). Need join in addition to the eagerload since it is used in
# the .count() query which doesn't respect the eagerload options (could eliminate this with #5523).
# - Undefer average_rating column to prevent loading individual ratings per-history.
# - Eager load annotations - this causes a left join which might be inefficient if there were
# potentially many items per history (like if joining HDAs for instance) but there should only
# be at most one so this is fine.
return trans.sa_session.query(self.model_class).join("user").options(eagerload("user").load_only("username"), eagerload("annotations"), undefer("average_rating"))

def apply_query_filter(self, trans, query, **kwargs):
# A public history is published, has a slug, and is not deleted.
Expand Down
5 changes: 3 additions & 2 deletions lib/galaxy/webapps/galaxy/controllers/page.py
Expand Up @@ -2,6 +2,7 @@

from markupsafe import escape
from sqlalchemy import and_, desc, false, true
from sqlalchemy.orm import eagerload, undefer

from galaxy import managers, model, util, web
from galaxy.model.item_attrs import UsesItemRatings
Expand Down Expand Up @@ -82,8 +83,8 @@ class PageAllPublishedGrid(grids.Grid):
)

def build_initial_query(self, trans, **kwargs):
# Join so that searching history.user makes sense.
return trans.sa_session.query(self.model_class).join(model.User.table)
# See optimization description comments and TODO for tags in matching public histories query.
return trans.sa_session.query(self.model_class).join("user").options(eagerload("user").load_only("username"), eagerload("annotations"), undefer("average_rating"))

def apply_query_filter(self, trans, query, **kwargs):
return query.filter(self.model_class.deleted == false()).filter(self.model_class.published == true())
Expand Down
5 changes: 3 additions & 2 deletions lib/galaxy/webapps/galaxy/controllers/visualization.py
Expand Up @@ -12,6 +12,7 @@
)
from six import string_types
from sqlalchemy import and_, desc, false, or_, true
from sqlalchemy.orm import eagerload, undefer

from galaxy import managers, model, util, web
from galaxy.datatypes.interval import Bed
Expand Down Expand Up @@ -212,8 +213,8 @@ class VisualizationAllPublishedGrid(grids.Grid):
)

def build_initial_query(self, trans, **kwargs):
# Join so that searching history.user makes sense.
return trans.sa_session.query(self.model_class).join(model.User.table)
# See optimization description comments and TODO for tags in matching public histories query.
return trans.sa_session.query(self.model_class).join("user").options(eagerload("user").load_only("username"), eagerload("annotations"), undefer("average_rating"))

def apply_query_filter(self, trans, query, **kwargs):
return query.filter(self.model_class.deleted == false()).filter(self.model_class.published == true())
Expand Down
8 changes: 5 additions & 3 deletions lib/galaxy/webapps/galaxy/controllers/workflow.py
Expand Up @@ -10,7 +10,7 @@
from markupsafe import escape
from six.moves.http_client import HTTPConnection
from sqlalchemy import and_
from sqlalchemy.orm import joinedload
from sqlalchemy.orm import eagerload, joinedload, lazyload, undefer
from sqlalchemy.sql import expression

from galaxy import (
Expand Down Expand Up @@ -138,8 +138,10 @@ class StoredWorkflowAllPublishedGrid(grids.Grid):
]

def build_initial_query(self, trans, **kwargs):
# Join so that searching stored_workflow.user makes sense.
return trans.sa_session.query(self.model_class).join(model.User.table)
# See optimization description comments and TODO for tags in matching public histories query.
# In addition to that - be sure to lazyload the latest_workflow - it isn't needed and it causes all
# of its steps to be eagerly loaded.
return trans.sa_session.query(self.model_class).join("user").options(lazyload("latest_workflow"), eagerload("user").load_only("username"), eagerload("annotations"), undefer("average_rating"))

def apply_query_filter(self, trans, query, **kwargs):
# A public workflow is published, has a slug, and is not deleted.
Expand Down

0 comments on commit ed23cc6

Please sign in to comment.