# Validation

Analysis on ClinVar Submission API validation and what errors can look like

In [1]:
import json
import os
import sys
from pathlib import Path
from copy import deepcopy

module_path = os.path.abspath(os.path.join("../"))
if module_path not in sys.path:
    sys.path.append(module_path)
from utils import dry_run_test_api  # noqa: E402

In [2]:
with Path("civic/assertion_onco.json").open() as f:
  onco_data = json.load(f)

onco_sub = onco_data["oncogenicitySubmission"][0]
onco_sub_cpy = deepcopy(onco_sub)

## Errors

In [3]:
# Not including oncogenicitySubmission in payload
dry_run_test_api(onco_sub_cpy).json()

{'message': 'Validation failed, see errors for detailed description',
 'errors': [{'message': "Additional properties are not allowed ('conditionSet', 'localID', 'localKey', 'observedIn', 'oncogenicityClassification', 'recordStatus', 'variantSet' were unexpected)",
   'code': None,
   'identifier': None}]}

In [4]:
# Including additional properties
onco_sub_cpy = deepcopy(onco_sub)
onco_sub_cpy["test"] = 1
onco_sub_cpy["will-this-break"] = "yes"
onco_sub_cpy["identifier"] = {"db": "name"}

dry_run_test_api({"oncogenicitySubmission": [onco_sub_cpy]}).json()

{'message': 'Validation failed, see errors for detailed description',
 'errors': [{'message': "Unevaluated properties are not allowed ('identifier', 'test', 'will-this-break' were unexpected)",
   'code': None,
   'identifier': None}]}

In [5]:
# Not including required properties
del onco_sub_cpy["recordStatus"]
del onco_sub_cpy["observedIn"]
del onco_sub_cpy["conditionSet"]

dry_run_test_api({"oncogenicitySubmission": [onco_sub_cpy]}).json()

{'message': 'Validation failed, see errors for detailed description',
 'errors': [{'message': "'recordStatus' is a required property",
   'code': None,
   'identifier': None},
  {'message': "'observedIn' is a required property",
   'code': None,
   'identifier': None},
  {'message': "'conditionSet' is a required property",
   'code': None,
   'identifier': None},
  {'message': "Unevaluated properties are not allowed ('identifier', 'test', 'will-this-break' were unexpected)",
   'code': None,
   'identifier': None}]}

In [6]:
# Invalid type
onco_sub_cpy = deepcopy(onco_sub)
onco_sub_cpy["variantSet"]["variant"][0]["gene"] = {"symbol": "FGR3"}
onco_sub_cpy["oncogenicityClassification"]["citation"] = [{"db": "PubMed", "id": 27998968}]

dry_run_test_api({"oncogenicitySubmission": [onco_sub_cpy]}).json()

{'message': 'Validation failed, see errors for detailed description',
 'errors': [{'message': "{'symbol': 'FGR3'} is not of type 'array'",
   'code': None,
   'identifier': None},
  {'message': "27998968 is not of type 'string'",
   'code': None,
   'identifier': None}]}

In [7]:
# Incorrect case
onco_sub_cpy = deepcopy(onco_sub)
onco_sub_cpy["oncogenicityClassification"]["oncogenicityClassificationDescription"] = "oncogenic"

dry_run_test_api({"oncogenicitySubmission": [onco_sub_cpy]}).json()

{'message': 'Validation failed, see errors for detailed description',
 'errors': [{'message': "'oncogenic' is not one of ['Oncogenic', 'Likely oncogenic', 'Uncertain significance', 'Likely benign', 'Benign']",
   'code': None,
   'identifier': None}]}

In [8]:
# Passing invalid type / not a required field
onco_sub_cpy = deepcopy(onco_sub)
onco_sub_cpy["variantSet"]["variant"] = ["gene"]

dry_run_test_api({"oncogenicitySubmission": [onco_sub_cpy]}).json()

{'message': 'Validation failed, see errors for detailed description',
 'errors': [{'message': "'gene' is not of type 'object'",
   'code': None,
   'identifier': None},
  {'message': "'gene' is valid under each of {'required': ['hgvs']}, {'required': ['chromosomeCoordinates']}",
   'code': None,
   'identifier': None}]}

In [9]:
# Can't include both db id and name
onco_sub_cpy = deepcopy(onco_sub)
onco_sub_cpy["variantSet"]["variant"][0]["gene"] = [
  {
    "symbol": "EGFR",
    "id": 673
  }
]

dry_run_test_api({"oncogenicitySubmission": [onco_sub_cpy]}).json()

{'message': 'Validation failed, see errors for detailed description',
 'errors': [{'message': "{'symbol': 'EGFR', 'id': 673} is valid under each of {'required': ['symbol']}, {'required': ['id']}",
   'code': None,
   'identifier': None}]}

In [10]:
# Adding invalid citation
onco_sub_cpy = deepcopy(onco_sub)
onco_sub_cpy["oncogenicityClassification"]["citation"].append({"myCitation": [{"id": 1}]})

dry_run_test_api({"oncogenicitySubmission": [onco_sub_cpy]}).json()

{'message': 'Validation failed, see errors for detailed description',
 'errors': [{'message': "Additional properties are not allowed ('myCitation' was unexpected)",
   'code': None,
   'identifier': None},
  {'message': "{'myCitation': [{'id': 1}]} is not valid under any of the given schemas",
   'code': None,
   'identifier': None}]}

## No Data Validation

ClinVar Submission API doesn't appear to do data validation, such as ensuring gene/disease/therapy/variant concepts are actual concepts.

In [11]:
# Passing invalid gene symbol
onco_sub_cpy = deepcopy(onco_sub)
onco_sub_cpy["variantSet"]["variant"][0]["gene"] = [{"symbol": "dummy gene"}]

dry_run_test_api({"oncogenicitySubmission": [onco_sub_cpy]}).status_code

204

In [12]:
# hgvs should be c/g, but it accepts p
onco_sub_cpy = deepcopy(onco_sub)
onco_sub_cpy["variantSet"]["variant"][0]["hgvs"] = "NP_004324.2:p.Val600Glu"

dry_run_test_api({"oncogenicitySubmission": [onco_sub_cpy]}).status_code

204

In [13]:
# Not HGVS
onco_sub_cpy = deepcopy(onco_sub)
onco_sub_cpy["variantSet"]["variant"][0]["hgvs"] = "7-140453136-A-T"

dry_run_test_api({"oncogenicitySubmission": [onco_sub_cpy]}).status_code

204

In [14]:
# Invalid condition name
onco_sub_cpy = deepcopy(onco_sub)
onco_sub_cpy["conditionSet"]["condition"][0]["name"] = "invalid"

dry_run_test_api({"oncogenicitySubmission": [onco_sub_cpy]}).status_code

204

In [15]:
# Invalid MONDO ID
onco_sub_cpy = deepcopy(onco_sub)
onco_sub_cpy["conditionSet"]["condition"][0] = {"db": "MONDO", "id": "asdf"}

dry_run_test_api({"oncogenicitySubmission": [onco_sub_cpy]}).status_code

204

In [16]:
with Path("civic/therapeutic_evidence.json").open() as f:
  tr_data = json.load(f)

tr_sub = tr_data["clinicalImpactSubmission"][0]

In [17]:
# Using delimiter that isn't semicolon for drugs
tr_sub_cpy = deepcopy(tr_sub)
tr_sub_cpy["clinicalImpactClassification"]["drugForTherapeuticAssertion"] = "afatinib|cisplatin"
dry_run_test_api({"clinicalImpactSubmission": [tr_sub_cpy]}).status_code

204

In [18]:
# Invalid drug
tr_sub_cpy = deepcopy(tr_sub)
tr_sub_cpy["clinicalImpactClassification"]["drugForTherapeuticAssertion"] = "dummy drug"
dry_run_test_api({"clinicalImpactSubmission": [tr_sub_cpy]}).status_code

204

In [19]:
# Using drug in prognostic data
with Path("civic/prognostic_evidence.json").open() as f:
  p_data = json.load(f)

p_sub = p_data["clinicalImpactSubmission"][0]
p_sub_cpy = deepcopy(p_sub)
p_sub_cpy["clinicalImpactClassification"]["drugForTherapeuticAssertion"] = "afatinib"

dry_run_test_api({"clinicalImpactSubmission": [p_sub_cpy]}).status_code


204