Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions src/mavedb/lib/annotation/evidence_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
functional_score_calibration_as_method,
pathogenicity_score_calibration_as_method,
)
from mavedb.lib.annotation.util import serialize_evidence_items
from mavedb.models.mapped_variant import MappedVariant
from mavedb.models.score_calibration import ScoreCalibration

Expand Down Expand Up @@ -65,7 +64,7 @@ def acmg_evidence_line(

return VariantPathogenicityEvidenceLine(
description=f"Pathogenicity evidence line for {mapped_variant.variant.urn}.",
hasEvidenceItems=serialize_evidence_items(evidence),
hasEvidenceItems=list(evidence),
specifiedBy=pathogenicity_score_calibration_as_method(score_calibration, evidence_outcome),
evidenceOutcome={
"primaryCoding": Coding(
Expand Down Expand Up @@ -102,7 +101,7 @@ def functional_evidence_line(

return EvidenceLine(
description=f"Functional evidence line for {mapped_variant.variant.urn}",
hasEvidenceItems=serialize_evidence_items(evidence),
hasEvidenceItems=[StudyResult(root=item) for item in evidence],
directionOfEvidenceProvided=direction_of_support_for_functional_classification(classification),
evidenceOutcome=MappableConcept(
primaryCoding=Coding(
Expand Down
3 changes: 1 addition & 2 deletions src/mavedb/lib/annotation/statement.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
variant_interpretation_functional_guideline_method,
variant_interpretation_pathogenicity_guideline_method,
)
from mavedb.lib.annotation.util import serialize_evidence_items
from mavedb.models.mapped_variant import MappedVariant
from mavedb.models.score_calibration import ScoreCalibration
from mavedb.models.score_calibration_functional_classification import ScoreCalibrationFunctionalClassification
Expand Down Expand Up @@ -121,5 +120,5 @@ def mapped_variant_to_pathogenicity_statement(
system="ACMG Guidelines, 2015",
),
),
hasEvidenceLines=serialize_evidence_items(evidence),
hasEvidenceLines=list(evidence),
)
51 changes: 21 additions & 30 deletions src/mavedb/lib/annotation/util.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
from collections.abc import Sequence
from typing import Any, Literal, Optional
from typing import Literal, Optional

from ga4gh.core.models import Extension
from ga4gh.va_spec.base.enums import StrengthOfEvidenceProvided as VaSpecStrengthOfEvidenceProvided
from ga4gh.vrs.models import (
Allele,
CisPhasedBlock,
Expression,
LengthExpression,
LiteralSequenceExpression,
MolecularVariation,
ReferenceLengthExpression,
SequenceLocation,
SequenceReference,
)
Expand All @@ -27,33 +28,6 @@
from mavedb.models.score_calibration_functional_classification import ScoreCalibrationFunctionalClassification


def serialize_evidence_items(evidence: Sequence[Any]) -> list[dict[str, Any]]:
"""Serialize evidence objects to dictionaries using `model_dump(exclude_none=True)`.

Args:
evidence (Sequence[Any]): Evidence objects expected to provide a
`model_dump` method.

Returns:
list[dict[str, Any]]: Evidence payloads suitable for assignment to
GA4GH VA model fields such as `hasEvidenceItems`.

Raises:
TypeError: If any item does not expose a callable `model_dump` method.
"""

serialized_evidence: list[dict[str, Any]] = []

for evidence_item in evidence:
model_dump = getattr(evidence_item, "model_dump", None)
if not callable(model_dump):
raise TypeError("Evidence items must provide a callable model_dump method.")

serialized_evidence.append(model_dump(exclude_none=True))

return serialized_evidence


def allele_from_mapped_variant_dictionary_result(allelic_mapping_results: dict) -> Allele:
"""
Converts a dictionary containing allelic mapping results into an Allele object.
Expand Down Expand Up @@ -81,9 +55,26 @@ def allele_from_mapped_variant_dictionary_result(allelic_mapping_results: dict)
except KeyError:
variation = allelic_mapping_results

state_dict = variation["state"]
state: ReferenceLengthExpression | LengthExpression | LiteralSequenceExpression
if state_dict.get("type") == "ReferenceLengthExpression":
state = ReferenceLengthExpression(**state_dict)
elif state_dict.get("type") == "LengthExpression":
state = LengthExpression(**state_dict)
elif state_dict.get("type") == "LiteralSequenceExpression":
state = LiteralSequenceExpression(**state_dict)
else:
raise ValueError(
f"Unsupported VRS Allele state type {state_dict.get('type')!r}. "
"Update allele_from_mapped_variant_dictionary_result to handle this type."
)

# Explicit field extraction for alleles is intentional: stored dicts may contain extra fields (e.g. "type" on
# Extension, "label" on SequenceReference) that the strict VRS Pydantic models forbid, so using model_validate()
# directly is not possible against stored mapping results.
return Allele(
id=variation.get("id"),
state=LiteralSequenceExpression(**variation["state"]),
state=state,
digest=variation.get("digest"),
location=SequenceLocation(
start=variation.get("location", {}).get("start"),
Expand Down
39 changes: 39 additions & 0 deletions tests/helpers/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,45 @@
"members": [TEST_VALID_POST_MAPPED_VRS_ALLELE, TEST_VALID_POST_MAPPED_VRS_ALLELE],
}

TEST_VALID_POST_MAPPED_VRS_ALLELE_RLE = {
"id": TEST_GA4GH_IDENTIFIER,
"type": "Allele",
"state": {"type": "ReferenceLengthExpression", "length": 5, "repeatSubunitLength": 5},
"digest": TEST_GA4GH_DIGEST,
"location": {
"id": TEST_SEQUENCE_LOCATION_ACCESSION,
"end": 6,
"type": "SequenceLocation",
"start": 5,
"digest": TEST_GA4GH_DIGEST,
"sequenceReference": {
"type": "SequenceReference",
"label": TEST_REFSEQ_IDENTIFIER,
"refgetAccession": TEST_REFGET_ACCESSION,
},
},
"expressions": [{"value": f"{TEST_REFSEQ_IDENTIFIER}:p.=", "syntax": "hgvs.p"}],
}

TEST_VALID_POST_MAPPED_VRS_ALLELE_LENGTH_EXPRESSION = {
"id": TEST_GA4GH_IDENTIFIER,
"type": "Allele",
"state": {"type": "LengthExpression", "length": 5},
"digest": TEST_GA4GH_DIGEST,
"location": {
"id": TEST_SEQUENCE_LOCATION_ACCESSION,
"end": 6,
"type": "SequenceLocation",
"start": 5,
"digest": TEST_GA4GH_DIGEST,
"sequenceReference": {
"type": "SequenceReference",
"label": TEST_REFSEQ_IDENTIFIER,
"refgetAccession": TEST_REFGET_ACCESSION,
},
},
}

TEST_PUBMED_PUBLICATION = {
"identifier": TEST_PUBMED_IDENTIFIER,
"db_name": "PubMed",
Expand Down
21 changes: 21 additions & 0 deletions tests/lib/annotation/test_annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,3 +273,24 @@ def test_variant_not_in_any_range_returns_uncertain_significance(
assert result.type == "Statement"
# Classification should be UNCERTAIN_SIGNIFICANCE
assert result.classification.primaryCoding.code.root == "uncertain significance"

def test_pathogenicity_evidence_line_has_evidence_items_are_statement_instances(
self, mock_mapped_variant_with_pathogenicity_calibration_score_set
):
"""Regression test: hasEvidenceItems on VariantPathogenicityEvidenceLine must be model instances.

Passing serialized dict representations of Statement objects to hasEvidenceItems caused
VariantPathogenicityEvidenceLine validation to fail when reconstructing nested VRS objects
(e.g. Allele with production genomic coordinates). Model instances must be stored directly.
"""
result = variant_pathogenicity_statement(mock_mapped_variant_with_pathogenicity_calibration_score_set)

assert result is not None
for evidence_line in result.hasEvidenceLines:
assert evidence_line.hasEvidenceItems is not None
for evidence_item in evidence_line.hasEvidenceItems:
# Must be a model instance, not a raw dict
assert not isinstance(
evidence_item, dict
), "hasEvidenceItems contained a raw dict instead of a model instance"
assert evidence_item.type == "Statement"
46 changes: 43 additions & 3 deletions tests/lib/annotation/test_evidence_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,13 @@
from ga4gh.va_spec.base.enums import StrengthOfEvidenceProvided

from mavedb.lib.annotation.annotate import variant_study_result
from mavedb.lib.annotation.classification import ExperimentalVariantFunctionalImpactClassification
from mavedb.lib.annotation.evidence_line import acmg_evidence_line, functional_evidence_line
from mavedb.lib.annotation.proposition import mapped_variant_to_experimental_variant_clinical_impact_proposition
from mavedb.lib.annotation.proposition import (
mapped_variant_to_experimental_variant_clinical_impact_proposition,
mapped_variant_to_experimental_variant_functional_impact_proposition,
)
from mavedb.lib.annotation.statement import mapped_variant_to_functional_statement


@pytest.mark.unit
Expand Down Expand Up @@ -59,7 +64,8 @@ def test_acmg_evidence_line_with_met_valid_clinical_classification(
):
proposition = mapped_variant_to_experimental_variant_clinical_impact_proposition(mapped_variant)
study_result = variant_study_result(mapped_variant)
result = acmg_evidence_line(mapped_variant, score_calibration, proposition, [study_result])
evidence = functional_evidence_line(mapped_variant, score_calibration, [study_result])
result = acmg_evidence_line(mapped_variant, score_calibration, proposition, [evidence])

if expected_strength == StrengthOfEvidenceProvided.STRONG:
expected_evidence_outcome = expected_outcome.value
Expand Down Expand Up @@ -96,7 +102,8 @@ def test_acmg_evidence_line_with_not_met_clinical_classification(
):
proposition = mapped_variant_to_experimental_variant_clinical_impact_proposition(mapped_variant)
study_result = variant_study_result(mapped_variant)
result = acmg_evidence_line(mapped_variant, score_calibration, proposition, [study_result])
evidence = functional_evidence_line(mapped_variant, score_calibration, [study_result])
result = acmg_evidence_line(mapped_variant, score_calibration, proposition, [evidence])

assert isinstance(result, VariantPathogenicityEvidenceLine)
assert result.description == f"Pathogenicity evidence line for {mapped_variant.variant.urn}."
Expand All @@ -120,6 +127,39 @@ def test_acmg_evidence_line_with_no_calibrations_raises_error(self, mock_mapped_
study_result = variant_study_result(mock_mapped_variant)
acmg_evidence_line(mock_mapped_variant, score_calibration, proposition, [study_result])

def test_acmg_evidence_line_accepts_statement_evidence_without_serialization_error(
self,
mock_mapped_variant_with_pathogenicity_calibration_score_set,
):
"""Regression test: VariantPathogenicityEvidenceLine must accept Statement instances directly.

Previously, acmg_evidence_line serialized Statement evidence to dicts via model_dump,
which caused VariantPathogenicityEvidenceLine validation to fail when reconstructing
nested VRS objects from those dicts (e.g. Allele with real genomic coordinates).
Evidence model instances must be passed directly, not serialized.
"""
mapped_variant = mock_mapped_variant_with_pathogenicity_calibration_score_set
score_calibration = mapped_variant.variant.score_set.score_calibrations[0]

study_result = variant_study_result(mapped_variant)
functional_proposition = mapped_variant_to_experimental_variant_functional_impact_proposition(mapped_variant)
functional_evidence = functional_evidence_line(mapped_variant, score_calibration, [study_result])
functional_statement = mapped_variant_to_functional_statement(
mapped_variant,
functional_proposition,
[functional_evidence],
score_calibration,
ExperimentalVariantFunctionalImpactClassification.NORMAL,
)
clinical_proposition = mapped_variant_to_experimental_variant_clinical_impact_proposition(mapped_variant)

result = acmg_evidence_line(mapped_variant, score_calibration, clinical_proposition, [functional_statement])

assert isinstance(result, VariantPathogenicityEvidenceLine)
assert len(result.hasEvidenceItems) == 1
# Evidence items must be Statement model instances, not raw dicts
assert result.hasEvidenceItems[0].type == "Statement"


@pytest.mark.unit
class TestFunctionalEvidenceLine:
Expand Down
78 changes: 54 additions & 24 deletions tests/lib/annotation/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,15 @@
select_strongest_functional_calibration,
select_strongest_pathogenicity_calibration,
sequence_feature_for_mapped_variant,
serialize_evidence_items,
variation_from_mapped_variant,
vrs_object_from_mapped_variant,
)
from tests.helpers.constants import TEST_SEQUENCE_LOCATION_ACCESSION, TEST_VALID_POST_MAPPED_VRS_ALLELE
from tests.helpers.constants import (
TEST_SEQUENCE_LOCATION_ACCESSION,
TEST_VALID_POST_MAPPED_VRS_ALLELE,
TEST_VALID_POST_MAPPED_VRS_ALLELE_LENGTH_EXPRESSION,
TEST_VALID_POST_MAPPED_VRS_ALLELE_RLE,
)


@pytest.mark.unit
Expand All @@ -55,16 +59,60 @@ def test_variation_from_mapped_variant_no_post_mapped(self, mock_mapped_variant)
with pytest.raises(MappingDataDoesntExistException):
variation_from_mapped_variant(mock_mapped_variant)

def test_vrs_object_from_mapped_variant_handles_haplotype_member_list(self):
mapping_results = {
"type": "Haplotype",
"members": [TEST_VALID_POST_MAPPED_VRS_ALLELE, TEST_VALID_POST_MAPPED_VRS_ALLELE],
@pytest.mark.parametrize(
"allele_dict, expected_state_type, expected_state_fields",
[
(
TEST_VALID_POST_MAPPED_VRS_ALLELE,
"LiteralSequenceExpression",
{"sequence": "F"},
),
(
TEST_VALID_POST_MAPPED_VRS_ALLELE_RLE,
"ReferenceLengthExpression",
{"length": 5, "repeatSubunitLength": 5},
),
(
TEST_VALID_POST_MAPPED_VRS_ALLELE_LENGTH_EXPRESSION,
"LengthExpression",
{"length": 5},
),
],
ids=["lse_state", "rle_state", "length_expression_state"],
)
def test_allele_state_type_is_deserialized_correctly(self, allele_dict, expected_state_type, expected_state_fields):
result = vrs_object_from_mapped_variant(allele_dict).model_dump()

assert result["state"]["type"] == expected_state_type
for k, v in expected_state_fields.items():
assert result["state"][k] == v

def test_unknown_allele_state_type_raises_value_error(self):
allele_dict = {
**TEST_VALID_POST_MAPPED_VRS_ALLELE,
"state": {"type": "UnknownFutureExpression", "someField": "someValue"},
}

with pytest.raises(ValueError, match="Unsupported VRS Allele state type"):
vrs_object_from_mapped_variant(allele_dict)

@pytest.mark.parametrize(
"member_dict, expected_state_type",
[
(TEST_VALID_POST_MAPPED_VRS_ALLELE, "LiteralSequenceExpression"),
(TEST_VALID_POST_MAPPED_VRS_ALLELE_RLE, "ReferenceLengthExpression"),
(TEST_VALID_POST_MAPPED_VRS_ALLELE_LENGTH_EXPRESSION, "LengthExpression"),
],
ids=["lse_members", "rle_members", "length_expression_members"],
)
def test_cis_phased_block_member_state_types_are_deserialized_correctly(self, member_dict, expected_state_type):
mapping_results = {"type": "CisPhasedBlock", "members": [member_dict, member_dict]}

result = vrs_object_from_mapped_variant(mapping_results).model_dump()

assert result["type"] == "CisPhasedBlock"
assert len(result["members"]) == 2
assert result["members"][0]["state"]["type"] == expected_state_type


@pytest.mark.unit
Expand Down Expand Up @@ -523,24 +571,6 @@ def test_sequence_feature_raises_when_target_has_no_name_or_ids(self, mock_mappe
sequence_feature_for_mapped_variant(mock_mapped_variant)


@pytest.mark.unit
class TestSerializeEvidenceItems:
def test_serialize_evidence_items_serializes_all_items_in_order(self):
first_item = SimpleNamespace(model_dump=lambda *, exclude_none: {"id": "first", "exclude_none": exclude_none})
second_item = SimpleNamespace(model_dump=lambda *, exclude_none: {"id": "second", "exclude_none": exclude_none})

result = serialize_evidence_items([first_item, second_item])

assert result == [
{"id": "first", "exclude_none": True},
{"id": "second", "exclude_none": True},
]

def test_serialize_evidence_items_raises_for_non_dumpable_item(self):
with pytest.raises(TypeError, match="model_dump"):
serialize_evidence_items([SimpleNamespace(not_model_dump=True)])


@pytest.mark.integration
class TestAnnotationUtilIntegration:
"""Integration tests for utility helpers using persisted DB-backed variants."""
Expand Down
Loading