Skip to content

Commit

Permalink
Splitting UNION queries in joint mode (#241)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe committed Mar 4, 2022
1 parent 5e04b4d commit 1513b1f
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 24 deletions.
3 changes: 3 additions & 0 deletions HISTORY.rst
Expand Up @@ -48,6 +48,7 @@ End-User Summary
- Fixing problem with ACMD classifiction where VUS-3 was given but should be LB-2 (#359)
- Adding REST API for creating small variant queries (#332)
- Fixing beaconsite queries with dots in the key id (#369)
- Allowing joint queries of larger cohorts (#241)

Full Change List
================
Expand Down Expand Up @@ -106,6 +107,8 @@ Full Change List
- Adding REST API for creating small variant queries (#332)
- Upgrading sodar-core dependency to 0.10.10
- Fixing beaconsite queries with dots in the key id (#369)
- Allowing joint queries of larger cohorts (#241).
This is achieved by performing fewer UNION queries (at most ``VARFISH_QUERY_MAX_UNION=20`` at one time).

-------
v0.23.9
Expand Down
3 changes: 3 additions & 0 deletions config/settings/base.py
Expand Up @@ -502,6 +502,9 @@ def set_logging(level=None):
"FIELD_ENCRYPTION_KEY", "_XRAzgLd6NHj8G4q9FNV0p3Um9g4hy8BPBN-AL0JWO0="
)

# Number of cases to perform in one query for joint queries.
QUERY_MAX_UNION = env.int("VARFISH_QUERY_MAX_UNION", 20)

# Varfish: Exomiser
# ------------------------------------------------------------------------------

Expand Down
5 changes: 4 additions & 1 deletion docs_manual/admin_config.rst
Expand Up @@ -185,11 +185,14 @@ In the default ``docker-compose`` setup, postgres server is thus not exposed to
Miscellaneous Configuration
---------------------------

``VARFISH_LOGIN_PAGE_TEXT=``
``VARFISH_LOGIN_PAGE_TEXT``
Text to display on the login page.
``FIELD_ENCRYPTION_KEY``
Key to use for encrypting secrets in the database (such as saved public keys for the Beacon Site feature).
You can generate such a key with the following command: ``python -c 'import os, base64; print(base64.urlsafe_b64encode(os.urandom(32)))'``.
``VARFISH_QUERY_MAX_UNION``
Maximal number of cases to query for at the same time for joint queries.
Default is ``20``.

--------------------
Sentry Configuration
Expand Down
70 changes: 47 additions & 23 deletions variants/queries.py
@@ -1,3 +1,4 @@
import contextlib
from itertools import chain
import typing

Expand Down Expand Up @@ -1598,6 +1599,20 @@ def to_stmt(self, kwargs, order_by=None):
return union(comphet_stmt, default_stmt).order_by(*(order_by or []))


def _chunked(arr, max_size):
chunks = []
i = 0
while i < len(arr):
chunks.append(arr[i : i + max_size])
i += max_size
return chunks


class _ClosingWrapper(list):
def closing(self):
pass


class CasePrefetchQuery:
builder = QueryPartsBuilder

Expand All @@ -1618,30 +1633,39 @@ def run(self, kwargs):
column("alternative"),
column("family_name"),
]
stmts = []
for case in self.cases:
comp_het_index = kwargs.get("compound_recessive_indices", {}).get(case.name)
recessive_index = kwargs.get("recessive_indices", {}).get(case.name)
if comp_het_index and self.query_id is None:
# Set the current compound recessive index
kwargs["compound_recessive_index"] = comp_het_index
combiner = CompHetCombiner(case, self.builder)
elif recessive_index and self.query_id is None:
# Set the current compound recessive index
kwargs["compound_recessive_index"] = recessive_index
combiner = RecessiveCombiner(case, self.builder)
else: # compound recessive not in kwargs or disabled
combiner = DefaultCombiner(case, self.builder, self.query_id)
stmts.append(combiner.to_stmt(kwargs))
stmt = union(*stmts).order_by(*order_by)
if settings.DEBUG:
print(
"\n"
+ sqlparse.format(
stmt.compile(self.engine).string, reindent=True, keyword_case="upper"
result = []
chunks = _chunked(self.cases, settings.QUERY_MAX_UNION)
for chunk in chunks:
stmts = []
for case in chunk:
comp_het_index = kwargs.get("compound_recessive_indices", {}).get(case.name)
recessive_index = kwargs.get("recessive_indices", {}).get(case.name)
if comp_het_index and self.query_id is None:
# Set the current compound recessive index
kwargs["compound_recessive_index"] = comp_het_index
combiner = CompHetCombiner(case, self.builder)
elif recessive_index and self.query_id is None:
# Set the current compound recessive index
kwargs["compound_recessive_index"] = recessive_index
combiner = RecessiveCombiner(case, self.builder)
else: # compound recessive not in kwargs or disabled
combiner = DefaultCombiner(case, self.builder, self.query_id)
stmts.append(combiner.to_stmt(kwargs))
stmt = union(*stmts).order_by(*order_by)
if settings.DEBUG:
print(
"\n"
+ sqlparse.format(
stmt.compile(self.engine).string, reindent=True, keyword_case="upper"
)
)
)
return self.engine.execute(stmt)
query_res = self.engine.execute(stmt)
if len(chunks) == 1:
return query_res
else:
with contextlib.closing(query_res) as query_res:
result += list(query_res)
return _ClosingWrapper(result)


class CaseLoadPrefetchedQuery(CasePrefetchQuery):
Expand Down

0 comments on commit 1513b1f

Please sign in to comment.