Skip to content

Commit

Permalink
Add Transcripts GnomadAD constraints and clinvar reports in the export (
Browse files Browse the repository at this point in the history
#592)

Closes: #568
Related-Issue: #568
Projected-Results-Impact: none

Co-authored-by: Incardona Pietro <incardon@mpi-cbg.de>
  • Loading branch information
incardon and Incardona Pietro committed Jul 26, 2022
1 parent 7e33914 commit 8c3dd2b
Show file tree
Hide file tree
Showing 8 changed files with 284 additions and 57 deletions.
1 change: 1 addition & 0 deletions HISTORY.rst
Expand Up @@ -9,6 +9,7 @@ v1.2.2 (anthenea; UNRELEASED)
End-User Summary
================

- Add Transcripts GnomadAD constraints and clinvar reports in the export(#568)
- Extra annotations in export completed and tested (#495).

Full Change List
Expand Down
3 changes: 3 additions & 0 deletions geneinfo/tests/factories.py
Expand Up @@ -176,6 +176,9 @@ class Meta:
obs_lof = factory.Sequence(lambda n: n)
lof_z = factory.Sequence(lambda n: 1 / 2 ** (n % 12))
oe_lof = factory.Sequence(lambda n: 1 / 2 ** (n % 12))
pLI = factory.Sequence(lambda n: 1 / 2 ** (n % 12) + 1.234)
oe_lof_upper = factory.Sequence(lambda n: 1 / 3 ** (n % 12))
oe_lof_lower = factory.Sequence(lambda n: 1 / 0.75 ** (n % 12))


class GeneIdToInheritanceFactory(factory.django.DjangoModelFactory):
Expand Down
23 changes: 22 additions & 1 deletion variants/file_export.py
Expand Up @@ -25,6 +25,7 @@
annotate_with_phenotype_scores,
annotate_with_pathogenicity_scores,
annotate_with_joint_scores,
annotate_with_transcripts,
unroll_extra_annos_result,
prioritize_genes,
VariantScoresFactory,
Expand Down Expand Up @@ -104,6 +105,12 @@ def to_str(val):
("gene_name", "Gene Name", str),
("gene_family", "Gene Family", str),
("pubmed_id", "Gene Pubmed ID", str),
("gnomad_pLI", "Gnomad constrains pLI", float),
("gnomad_mis_z", "Gnomad constrains z-score missense", float),
("gnomad_oe_lof", "Gnomad constrains lof observed/expected", float),
("gnomad_oe_lof_upper", "Gnomad constrains lof observed/expected upper", float),
("gnomad_oe_lof_lower", "Gnomad constrains lof observed/expected lower", float),
("pathogenicity_summary", "ClinVar pathogenicity summary", str),
)
if settings.KIOSK_MODE:
HEADER_FIXED = tuple(filter(lambda x: not x[0].startswith("inhouse_"), HEADER_FIXED))
Expand All @@ -120,6 +127,8 @@ def to_str(val):
("pathogenicity_rank", "Pathogenicity Rank", int),
)

HEADERS_TRANSCRIPTS = (("transcripts", "Transcript ids", str),)

#: Names of the joint scoring header columns.
HEADERS_JOINT_SCORES = (
("joint_score", "Pheno+Patho Score", float),
Expand Down Expand Up @@ -230,6 +239,11 @@ def __getitem__(self, key):
return self.__wrapped__.__getitem__(key)


def _is_jannovar_enabled():
"""Return if jannover is enabled for exporting transcripts."""
return settings.VARFISH_ENABLE_JANNOVAR


class CaseExporterBase:
"""Base class for export of (filtered) case data from single case or all cases of a project.
"""
Expand Down Expand Up @@ -339,6 +353,8 @@ def _yield_columns(self, members):
header += HEADERS_PHENO_SCORES
if self._is_pathogenicity_enabled():
header += HEADERS_PATHO_SCORES
if _is_jannovar_enabled():
header += HEADERS_TRANSCRIPTS
if self._is_prioritization_enabled() and self._is_pathogenicity_enabled():
header += HEADERS_JOINT_SCORES
if self.query_args["export_flags"]:
Expand All @@ -364,6 +380,8 @@ def _yield_smallvars(self):
with contextlib.closing(self.query.run(self.query_args)) as result:
self.job.add_log_entry("Executing phenotype score query...")
_result = list(result)
if _is_jannovar_enabled():
_result = annotate_with_transcripts(_result, self.query_args["database_select"])
if self._is_prioritization_enabled():
gene_scores = self._fetch_gene_scores([entry.entrez_id for entry in _result])
_result = annotate_with_phenotype_scores(_result, gene_scores)
Expand Down Expand Up @@ -536,7 +554,10 @@ def _write_variants_data(self):
if column["name"] == "chromosome":
row.append("chr" + getattr(small_var, "chromosome"))
elif column["fixed"]:
row.append(getattr(small_var, column["name"]))
if column["name"] == "transcripts":
row.append(getattr(small_var, column["name"]).replace("\n", "|"))
else:
row.append(getattr(small_var, column["name"]))
else:
member, field = column["name"].rsplit(".", 1)
if field == "aaf":
Expand Down
91 changes: 90 additions & 1 deletion variants/models.py
Expand Up @@ -60,7 +60,6 @@
from variants.helpers import get_meta
from projectroles.app_settings import AppSettingAPI


app_settings = AppSettingAPI()

#: Django user model.
Expand All @@ -87,6 +86,41 @@
}


def load_molecular_impact(kwargs):
"""Load molecular impact from Jannovar REST API if configured."""
if not settings.VARFISH_ENABLE_JANNOVAR:
return []

url_tpl = (
"%(base_url)sannotate-var/%(database)s/%(genome)s/%(chromosome)s/%(position)s/%(reference)s/"
"%(alternative)s"
)
genome = {"GRCh37": "hg19", "GRCh38": "hg38"}.get(kwargs["release"], "hg19")
url = url_tpl % {
"base_url": settings.VARFISH_JANNOVAR_REST_API_URL,
"database": kwargs["database"],
"genome": genome,
"chromosome": kwargs["chromosome"],
"position": kwargs["start"],
"reference": kwargs["reference"],
"alternative": kwargs["alternative"],
}
try:
res = requests.request(method="get", url=url)
if not res.status_code == 200:
raise ConnectionError(
"ERROR: Server responded with status {} and message {}".format(
res.status_code, res.text
)
)
else:
return res.json()
except requests.ConnectionError as e:
raise ConnectionError(
"ERROR: Server at {} not responding.".format(settings.VARFISH_JANNOVAR_REST_API_URL)
) from e


def only_source_name(full_name):
"""Helper function that strips SNAPPY suffixes for samples."""
if full_name.count("-") >= 3:
Expand Down Expand Up @@ -2237,6 +2271,38 @@ def __getitem__(self, key):
return self.__wrapped__.__getitem__(key)


class RowWithTranscripts(wrapt.ObjectProxy):
"""Wrap a result row and add members for phenotype score and rank."""

def __init__(self, obj, database):
super().__init__(obj)
self._self_transcripts = None
self._self_database = database

@property
def transcripts(self):
return self._self_transcripts

@property
def database(self):
return self._self_database

@transcripts.setter
def transcripts(self, value):
self._self_transcripts = value

def __getattr__(self, item):
return self.__getitem__(item)

def __getitem__(self, key):
if key == "transcripts":
return self.transcripts
elif key == "database":
return self.database
else:
return self.__wrapped__.__getitem__(key)


def annotate_with_phenotype_scores(rows, gene_scores):
"""Annotate the results in ``rows`` with phenotype scores stored in ``small_variant_query``.
Expand Down Expand Up @@ -2264,6 +2330,29 @@ def annotate_with_phenotype_scores(rows, gene_scores):
return rows


def annotate_with_transcripts(rows, database):
"""Annotate the results in ``rows`` with transcripts (RefSeq or Ensembl)
"""
rows = [RowWithTranscripts(row, database) for row in rows]
for row in rows:
transcripts = load_molecular_impact(row)
row.transcripts = "\n".join(
[
t["transcriptId"]
+ ";"
+ ",".join(t["variantEffects"])
+ ";"
+ t["hgvsProtein"]
+ ";"
+ t["hgvsNucleotides"]
for t in transcripts
]
)

return rows


# TODO: Improve wrapper so we can assign obj.pathogenicity_rank and score
class RowWithPathogenicityScore(wrapt.ObjectProxy):
"""Wrap a result row and add members for pathogenicity score and rank."""
Expand Down
14 changes: 13 additions & 1 deletion variants/queries.py
Expand Up @@ -1344,7 +1344,17 @@ def extend_selectable(self, query_parts):
class ExtendQueryPartsGnomadConstraintsJoin(ExtendQueryPartsBase):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.fields = ["pLI", "mis_z", "syn_z", "oe_lof", "oe_lof_upper", "oe_lof_lower"]
self.fields = [
"pLI",
"mis_z",
"syn_z",
"oe_mis",
"oe_mis_upper",
"oe_mis_lower",
"oe_lof",
"oe_lof_upper",
"oe_lof_lower",
]
self.subquery = (
select(
[
Expand Down Expand Up @@ -1478,6 +1488,7 @@ class CaseExportTableQueryPartsBuilder(QueryPartsBuilder):
ExtendQueryPartsHgncAndConservationJoin,
ExtendQueryPartsAcmgJoin,
ExtendQueryPartsMgiJoin,
ExtendQueryPartsGnomadConstraintsJoin,
]


Expand Down Expand Up @@ -1518,6 +1529,7 @@ class ProjectExportTableQueryPartsBuilder(QueryPartsBuilder):
ExtendQueryPartsGeneSymbolJoin,
ExtendQueryPartsAcmgJoin,
ExtendQueryPartsMgiJoin,
ExtendQueryPartsGnomadConstraintsJoin,
]


Expand Down
3 changes: 2 additions & 1 deletion variants/tests/factories.py
Expand Up @@ -655,6 +655,7 @@ class Meta:
state = "active"


CHROMOSOME_LIST_TESTING = [str(chrom) for chrom in list(range(1, 23)) + ["X", "Y"]]
CHROMOSOME_MAPPING = {str(chrom): i + 1 for i, chrom in enumerate(list(range(1, 23)) + ["X", "Y"])}
CHROMOSOME_MAPPING.update({f"chr{chrom}": i for chrom, i in CHROMOSOME_MAPPING.items()})

Expand All @@ -671,7 +672,7 @@ class Params:
genotypes = default_genotypes

release = "GRCh37"
chromosome = factory.Iterator(list(CHROMOSOME_MAPPING.keys()))
chromosome = factory.Iterator(CHROMOSOME_LIST_TESTING)
chromosome_no = factory.LazyAttribute(lambda o: CHROMOSOME_MAPPING[o.chromosome])
start = factory.Sequence(lambda n: (n + 1) * 100)
end = factory.LazyAttribute(lambda o: o.start + len(o.reference) - len(o.alternative))
Expand Down

0 comments on commit 8c3dd2b

Please sign in to comment.