Skip to content

Commit

Permalink
Improve performance of queries for unseen submissions
Browse files Browse the repository at this point in the history
With large numbers of sources or submissions, the journalist interface
index page queries could exceed the 60-second Apache timeout. This
uses joins instead of the n+1 checks for submission seen status, both
there and when downloading unread submissions.
  • Loading branch information
rmol committed Mar 16, 2021
1 parent f81b937 commit d1e5951
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 40 deletions.
96 changes: 66 additions & 30 deletions securedrop/journalist_app/main.py
Expand Up @@ -8,6 +8,8 @@
from flask import (Blueprint, request, current_app, session, url_for, redirect,
render_template, g, flash, abort, Markup, escape)
from flask_babel import gettext
from sqlalchemy.orm import joinedload
from sqlalchemy.sql import func

import store

Expand Down Expand Up @@ -61,31 +63,60 @@ def select_logo() -> werkzeug.Response:
else:
return redirect(url_for('static', filename='i/logo.png'))

@view.route('/')
@view.route("/")
def index() -> str:
unstarred = []
starred = []

# Long SQLAlchemy statements look best when formatted according to
# the Pocoo style guide, IMHO:
# http://www.pocoo.org/internal/styleguide/
sources = Source.query.filter_by(pending=False, deleted_at=None) \
.filter(Source.last_updated.isnot(None)) \
.order_by(Source.last_updated.desc()) \
.all()
for source in sources:
star = SourceStar.query.filter_by(source_id=source.id).first()
if star and star.starred:
starred.append(source)
else:
unstarred.append(source)
submissions = Submission.query.filter_by(source_id=source.id).all()
unseen_submissions = [s for s in submissions if not s.seen]
source.num_unread = len(unseen_submissions)

return render_template('index.html',
unstarred=unstarred,
starred=starred)
# Gather the count of unread submissions for each source
# ID. This query will be joined in the queries for starred and
# unstarred sources below, and the unread counts added to
# their result sets as an extra column.
unread_stmt = (
db.session.query(Submission.source_id, func.count("*").label("num_unread"))
.filter_by(seen_files=None, seen_messages=None)
.group_by(Submission.source_id)
.subquery()
)

# Query for starred sources, along with their unread
# submission counts.
starred = (
db.session.query(Source, unread_stmt.c.num_unread)
.filter_by(pending=False, deleted_at=None)
.filter(Source.last_updated.isnot(None))
.filter(SourceStar.starred == True) # noqa: E712
.outerjoin(SourceStar)
.options(joinedload(Source.submissions))
.options(joinedload(Source.star))
.outerjoin(unread_stmt, Source.id == unread_stmt.c.source_id)
.order_by(Source.last_updated.desc())
.all()
)

# Now, add "num_unread" attributes to the source entities.
for source, num_unread in starred:
source.num_unread = num_unread or 0
starred = [source for source, num_unread in starred]

# Query for sources without stars, along with their unread
# submission counts.
unstarred = (
db.session.query(Source, unread_stmt.c.num_unread)
.filter_by(pending=False, deleted_at=None)
.filter(Source.last_updated.isnot(None))
.filter(~Source.star.has(SourceStar.starred == True)) # noqa: E712
.options(joinedload(Source.submissions))
.options(joinedload(Source.star))
.outerjoin(unread_stmt, Source.id == unread_stmt.c.source_id)
.order_by(Source.last_updated.desc())
.all()
)

# Again, add "num_unread" attributes to the source entities.
for source, num_unread in unstarred:
source.num_unread = num_unread or 0
unstarred = [source for source, num_unread in unstarred]

response = render_template("index.html", unstarred=unstarred, starred=starred)
return response

@view.route('/reply', methods=('POST',))
def reply() -> werkzeug.Response:
Expand Down Expand Up @@ -200,12 +231,17 @@ def bulk() -> Union[str, werkzeug.Response]:

@view.route('/download_unread/<filesystem_id>')
def download_unread_filesystem_id(filesystem_id: str) -> werkzeug.Response:
id = Source.query.filter(Source.filesystem_id == filesystem_id) \
.filter_by(deleted_at=None).one().id
submissions = Submission.query.filter(Submission.source_id == id).all()
unseen_submissions = [s for s in submissions if not s.seen]
if unseen_submissions == []:
flash(gettext("No unread submissions for this source."))
unseen_submissions = (
Submission.query.join(Source)
.filter(
Source.deleted_at == None, # noqa: E711
Source.filesystem_id == filesystem_id
)
.filter(Submission.seen_files == None, Submission.seen_messages == None)
.all()
)
if len(unseen_submissions) == 0:
flash(gettext("No unread submissions for this source."), "error")
return redirect(url_for('col.col', filesystem_id=filesystem_id))
source = get_source(filesystem_id)
return download(source.journalist_filename, unseen_submissions)
Expand Down
19 changes: 9 additions & 10 deletions securedrop/journalist_app/utils.py
Expand Up @@ -449,18 +449,17 @@ def col_download_unread(cols_selected: List[str]) -> werkzeug.Response:
"""
Download all unseen submissions from all selected sources.
"""
unseen_submissions = [] # type: List[Union[Source, Submission]]

for filesystem_id in cols_selected:
source = (
Source.query.filter(Source.filesystem_id == filesystem_id)
.filter_by(deleted_at=None)
.one()
unseen_submissions = (
Submission.query.join(Source)
.filter(
Source.deleted_at == None, # noqa: E711
Source.filesystem_id.in_(cols_selected)
)
submissions = Submission.query.filter_by(source_id=source.id).all()
unseen_submissions += [s for s in submissions if not s.seen]
.filter(Submission.seen_files == None, Submission.seen_messages == None)
.all()
)

if not unseen_submissions:
if len(unseen_submissions) == 0:
flash(gettext("No unread submissions in selected collections."), "error")
return redirect(url_for("main.index"))

Expand Down

0 comments on commit d1e5951

Please sign in to comment.