Skip to content

Commit

Permalink
Fixed recompute of variant stats of large small variant sets. (#234) (#…
Browse files Browse the repository at this point in the history
…256)

Added index for ``SmallVariant`` model filtering for ``case_id`` and ``set_id``.
  • Loading branch information
stolpeo committed Nov 29, 2021
1 parent 75dd9fd commit cf0909d
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 6 deletions.
6 changes: 6 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ End-User Summary
- Fixing broken flags & comments popup for structural variants.
- Fixing broken search field.
- Extended manual for bug report workflow.
- Fixed recompute of variant stats of large small variant sets.
- Added index for ``SmallVariant`` model filtering for ``case_id`` and ``set_id``.
This may take a while!

Full Change List
================
Expand All @@ -27,6 +30,9 @@ Full Change List
- Fixing broken flags & comments popup for structural variants.
- Fixing broken search field.
- Extended manual for bug report workflow.
- Fixed recompute of variant stats of large small variant sets.
- Added index for ``SmallVariant`` model filtering for ``case_id`` and ``set_id``.
This may take a while!

-------
v0.23.9
Expand Down
19 changes: 14 additions & 5 deletions importer/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,22 +390,29 @@ def run(self):
self.import_info.save()
self._purge_variant_set(variant_set, table_names)
raise RuntimeError("Problem during variant import ") from e

self.import_job.add_log_entry("Activating variant set ...")

with transaction.atomic():
self.import_job.add_log_entry("Activating variant set ...")
variant_set.state = "active"
variant_set.save()
if not case_created: # Case needs to be updated.
self.case.index = self.import_info.index
self.case.pedigree = self.import_info.pedigree
setattr(self.case, latest_set, variant_set)
self.case.save()
self.import_job.add_log_entry(
"Updating variant counts for variant type %s" % variant_set_info.variant_type
)

self.import_job.add_log_entry(
"Updating variant counts for variant type %s" % variant_set_info.variant_type
)

with transaction.atomic():
update_variant_counts(
self.case, variant_set_info.variant_type, logger=self.import_job.add_log_entry
)
self._post_import(variant_set, variant_set_info.variant_type)

self._post_import(variant_set, variant_set_info.variant_type)

if variant_set.state == "active":
self._clear_old_variant_sets(variant_set, table_names)
variant_set_info.state = VariantSetImportState.IMPORTED.value
Expand Down Expand Up @@ -559,7 +566,9 @@ def _rebuild_small_variants_stats(self, variant_set):

before = timezone.now()
self.import_job.add_log_entry("Computing variant statistics...")

rebuild_case_variant_stats(get_engine(), variant_set, logger=self.import_job.add_log_entry)

elapsed = timezone.now() - before
self.import_job.add_log_entry(
"Finished computing variant statistics in %.2f s" % elapsed.total_seconds()
Expand Down
83 changes: 83 additions & 0 deletions variants/migrations/0083_auto_20211129_1443.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# Generated by Django 3.2.9 on 2021-11-29 14:43

from django.db import migrations
import varfish.utils


class Migration(migrations.Migration):

dependencies = [
("variants", "0082_auto_20210617_1409"),
]

operations = [
migrations.AlterField(
model_name="caddpathogenicityscorecache", name="info", field=varfish.utils.JSONField(),
),
migrations.AlterField(
model_name="caddsubmissionbgjob",
name="query_args",
field=varfish.utils.JSONField(help_text="(Validated) query parameters"),
),
migrations.AlterField(model_name="case", name="pedigree", field=varfish.utils.JSONField(),),
migrations.AlterField(
model_name="casealignmentstats",
name="bam_stats",
field=varfish.utils.JSONField(default=dict),
),
migrations.AlterField(
model_name="distillersubmissionbgjob",
name="query_args",
field=varfish.utils.JSONField(help_text="(Validated) query parameters"),
),
migrations.AlterField(
model_name="exportfilebgjob",
name="query_args",
field=varfish.utils.JSONField(help_text="(Validated) query parameters"),
),
migrations.AlterField(
model_name="exportprojectcasesfilebgjob",
name="query_args",
field=varfish.utils.JSONField(help_text="(Validated) query parameters"),
),
migrations.AlterField(
model_name="projectcasessmallvariantquery",
name="query_settings",
field=varfish.utils.JSONField(help_text="The query settings"),
),
migrations.AlterField(
model_name="projectcasessmallvariantqueryvariantscores",
name="info",
field=varfish.utils.JSONField(default=dict),
),
migrations.AlterField(
model_name="samplevariantstatistics",
name="ontarget_dps",
field=varfish.utils.JSONField(),
),
migrations.AlterField(
model_name="samplevariantstatistics",
name="ontarget_effect_counts",
field=varfish.utils.JSONField(),
),
migrations.AlterField(
model_name="samplevariantstatistics",
name="ontarget_indel_sizes",
field=varfish.utils.JSONField(),
),
migrations.AlterField(
model_name="smallvariantquery",
name="query_settings",
field=varfish.utils.JSONField(help_text="The query settings"),
),
migrations.AlterField(
model_name="smallvariantqueryvariantscores",
name="info",
field=varfish.utils.JSONField(default=dict),
),
migrations.AlterField(
model_name="spanrsubmissionbgjob",
name="query_args",
field=varfish.utils.JSONField(help_text="(Validated) query parameters"),
),
]
2 changes: 2 additions & 0 deletions variants/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,8 @@ class Meta:
models.Index(fields=["case_id", "refseq_gene_id"]),
# For mitochondrial frequency join
models.Index(fields=["case_id", "chromosome_no"]),
# For selecting all variants of a set of a case (used for gathering variant stats).
models.Index(fields=["case_id", "set_id"]),
]
managed = settings.IS_TESTING
db_table = "variants_smallvariant"
Expand Down
2 changes: 1 addition & 1 deletion variants/variant_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def gather_variant_stats(variant_set):
ignore_set = set(IGNORE_EFFECTS)
for small_var in SmallVariant.objects.filter(
set_id=variant_set.pk, case_id=variant_set.case.pk
):
).iterator():
if not (set(small_var.ensembl_effect) & ignore_set):
for sample in samples:
if small_var.genotype[sample]["gt"].count("1") == 1:
Expand Down

0 comments on commit cf0909d

Please sign in to comment.