Skip to content

Commit

Permalink
validation bugfixes
Browse files Browse the repository at this point in the history
  • Loading branch information
wshayes committed Oct 2, 2020
1 parent 10d2185 commit 3eb4706
Show file tree
Hide file tree
Showing 6 changed files with 168 additions and 119 deletions.
13 changes: 9 additions & 4 deletions bel/lang/ast.py
Expand Up @@ -1127,7 +1127,7 @@ def validate_function(fn: Function, errors: List[ValidationError] = None) -> Lis
ValidationError(
type="Assertion",
severity="Error",
msg=f"Namespace value: {fn_arg.entity.nsval} with entity_types {fn_arg.entity.entity_types} are not allowed for function {fn_arg.parent.name} as an optional or multiple argument",
msg=f"BEL Entity: {fn_arg.entity.nsval} with entity_types {fn_arg.entity.entity_types} are not allowed for function {fn_arg.parent.name} as an optional or multiple argument",
visual_pairs=[(fn.span.start, fn.span.end)],
index=fn.span.start,
)
Expand All @@ -1140,7 +1140,7 @@ def validate_function(fn: Function, errors: List[ValidationError] = None) -> Lis
ValidationError(
type="Assertion",
severity="Warning",
msg=f"Unknown BEL entity {fn_arg.entity.nsval.key_label} - cannot determine if this matches function signature",
msg=f"Unknown BEL Entity {fn_arg.entity.nsval.key_label} - cannot determine if this matches function signature",
visual_pairs=[(fn.span.start, fn.span.end)],
index=fn.span.start,
)
Expand All @@ -1163,7 +1163,7 @@ def validate_function(fn: Function, errors: List[ValidationError] = None) -> Lis
ValidationError(
type="Assertion",
severity="Warning",
msg=f"Unknown namespace value '{fn.args[position].entity.nsval.key_label}' for the {fn.name} function at position {fn.args[position].span.namespace.start}",
msg=f"Unknown BEL Entity '{fn.args[position].entity.nsval.key_label}' for the {fn.name} function at position {fn.args[position].span.namespace.start}",
visual_pairs=[
(
fn.args[position].span.namespace.start,
Expand All @@ -1184,11 +1184,16 @@ def validate_function(fn: Function, errors: List[ValidationError] = None) -> Lis
)
):

if fn.args[position].entity.term:
error_msg = f"Wrong entity type for BEL Entity at argument position {position} for function {fn.name} - expected {argument['values']}, actual: entity_types: {fn.args[position].entity.entity_types}"
else:
error_msg = f"Unknown BEL Entity at argument position {position} for function {fn.name} - cannot determine if correct entity type."

errors.append(
ValidationError(
type="Assertion",
severity="Warning",
msg=f"Wrong entity type for namespace argument at position {position} for function {fn.name} - expected {argument['values']}, actual: entity_types: {fn.args[position].entity.entity_types}",
msg=error_msg,
visual_pairs=[(fn.args[position].span.start, fn.args[position].span.end)],
index=fn.args[position].span.start,
)
Expand Down
6 changes: 6 additions & 0 deletions bel/nanopub/validate.py
Expand Up @@ -436,3 +436,9 @@ def validate(nanopub: NanopubR, validation_level: str = "complete") -> NanopubR:
logger.exception(f"Could not validate nanopub: {nanopub.nanopub.id} error: {str(e)}")

return nanopub


def remove_validation_cache():
"""Truncate validation cache"""

bel_validations_coll.truncate()
6 changes: 4 additions & 2 deletions bel/resources/manage.py
Expand Up @@ -107,8 +107,10 @@ def update_resources(urls: List[str] = None, force: bool = False, email: str = N
for resource in resources:
if "resource_download_url" not in resource:
continue
url = resource["resource_download_url"]
results[url] = load_resource(resource_url=url, force=force)
logger.info(f"Resource {resource}")
url = resource["resource_download_url"]

# results[url] = load_resource(resource_url=url, force=force)

if email is not None:
subject = f"BEL Resources Update for {settings.HOST_NAME}"
Expand Down
8 changes: 5 additions & 3 deletions bel/terms/terms.py
Expand Up @@ -79,9 +79,11 @@ def get_term(term_key: Key) -> Optional[Term]:
# duration = f"{time2 - time1:.5f}"
# logger.debug(f"Get terms timing {duration} for {term_key}", term_key=term_key, duration=duration)

# Filter out any terms resulting from obsolete ids
if len(terms) > 0:
terms = [term for term in terms if term_key not in term.obsolete_keys]
# Filter out any terms resulting from obsolete ids if more than 1 term
if len(terms) > 1:
check_terms = [term for term in terms if term_key not in term.obsolete_keys]
if len(check_terms) > 0:
terms = check_terms

if len(terms) == 1:
return terms[0]
Expand Down
42 changes: 38 additions & 4 deletions tests/lang/test_ast.py
Expand Up @@ -88,7 +88,9 @@ def test_ast_parse_fus():

print("To String", ast.to_string())

assert ast.to_string() == False
assert (
ast.to_string() == "act(p(fus(HGNC:EWSR1!EWSR1, start, HGNC:FLI1!FLI1, end)), ma(tscript))"
)


#####################################################################################
Expand Down Expand Up @@ -123,7 +125,7 @@ def test_validate_missing_namespace():

assert (
ast.errors[0].msg
== "Unknown namespace value 'missing:AKT1' for the proteinAbundance function at position 2"
== "Unknown BEL Entity 'missing:AKT1' for the proteinAbundance function at position 2"
)
assert ast.errors[0].severity == "Warning"

Expand Down Expand Up @@ -240,7 +242,7 @@ def test_validate_complex_missing_namespace():

assertion = AssertionStr(subject="complex(UNKNOWN:test)")
expected = (
"Unknown namespace value UNKNOWN:test - cannot determine if this matches function signature"
"Unknown BEL Entity UNKNOWN:test - cannot determine if this matches function signature"
)
ast = bel.lang.ast.BELAst(assertion=assertion)

Expand Down Expand Up @@ -298,12 +300,24 @@ def test_validation_tloc():

assert ast.errors == []

assertion = AssertionStr(
subject='tloc(p(HGNC:NFE2L2), fromLoc(MESH:Cytoplasm), toLoc(MESH:"Cell Nucleus"))'
)

ast = bel.lang.ast.BELAst(assertion=assertion)

ast.validate()

print("Errors", ast.errors)

assert ast.errors == []


def test_validate_fus():
"""Validate path()"""

assertion = AssertionStr(subject='p(fus(HGNC:NPM, "1_117", HGNC:ALK, end))')
expected = "Wrong entity type for namespace argument at position 0 for function fusion - expected ['Gene', 'RNA', 'Micro_RNA', 'Protein'], actual: entity_types: []"
expected = "Unknown BEL Entity at argument position 0 for function fusion - cannot determine if correct entity type."

ast = bel.lang.ast.BELAst(assertion=assertion)

Expand All @@ -313,6 +327,16 @@ def test_validate_fus():

assert ast.errors[0].msg == expected

assertion = AssertionStr(subject="p(fus(HGNC:EWSR1, start, HGNC:FLI1, end))")

ast = bel.lang.ast.BELAst(assertion=assertion)

ast.validate()

print("Errors", ast.errors)

assert ast.errors == []


def test_validate_nsarg():
"""Validate path()"""
Expand Down Expand Up @@ -393,6 +417,16 @@ def test_validate_complex_nsarg():

assert ast.errors == []

assertion = AssertionStr(subject='complex(GO:"transcription factor AP-1 complex")')

ast = bel.lang.ast.BELAst(assertion=assertion)

ast.validate()

print("Errors", ast.errors)

assert ast.errors == []


def test_validate_bad_relation():

Expand Down
212 changes: 106 additions & 106 deletions tests/nanopub/test_nanopub_validation.py
Expand Up @@ -87,109 +87,109 @@ def test_validate_nanopub():
)


def test_validate_nanopub2():

nanopub = json.loads(
"""{
"rev": "_bLpe16a--_",
"owners": [
{
"user_id": "303928642",
"first_name": "Wendy",
"last_name": "Zimmerman",
"full_name": " Wendy Zimmerman"
}
],
"is_deleted": false,
"is_archived": null,
"is_public": false,
"source_url": "https://nanopubstore.thor.biodati.com/nanopub/01EAAA7EJZC8B7EF5T78FN53JR",
"nanopub": {
"type": {
"name": "BEL",
"version": "2.1.0"
},
"citation": {
"id": null,
"authors": [
"Knoop, L L",
"Baker, S J"
],
"database": {
"name": "PubMed",
"id": "10827180"
},
"reference": "J Biol Chem 2000 Aug 11 275(32) 24865-71",
"uri": null,
"title": "The splicing factor U1C represses EWS/FLI-mediated transactivation.",
"source_name": "The Journal of biological chemistry",
"date_published": "2000-08-11",
"abstract": ""
},
"assertions": [
{
"subject": "p(HGNC:SNRPC)",
"relation": "decreases",
"object": "act(p(fus(HGNC:EWSR1, start, HGNC:FLI1, end)), ma(tscript))",
"validation": null
}
],
"id": "01EAAA7EJZC8B7EF5T78FN53JR",
"schema_uri": "https://raw.githubusercontent.com/belbio/schemas/master/schemas/nanopub_bel-1.1.0.yaml",
"annotations": [
{
"type": "Species",
"label": "human",
"id": "TAX:9606",
"validation": null
}
],
"evidence": "Importantly, co-expression of U1C represses EWS/FLI-mediated transactivation, demonstrating that this interaction can have functional ramifications.",
"metadata": {
"collections": [
"corrected",
"Selventa-Full"
],
"gd_status": "finalized",
"gd_createTS": "2020-06-08T15:54:17.566Z",
"gd_updateTS": "2020-06-09T14:21:38.573Z",
"gd_validation": {
"status": "Good",
"errors": null,
"validation_target": null
},
"gd_hash": "73b5b7b36f9bf6a6",
"statement_group": "67265439",
"gd_abstract": "EWS is an RNA-binding protein involved in human tumor-specific chromosomal translocations. In approximately 85% of Ewing's sarcomas, such translocations give rise to the chimeric gene EWS/FLI. In the resulting fusion protein, the RNA binding domains from the C terminus of EWS are replaced by the DNA-binding domain of the ETS protein FLI-1. EWS/FLI can function as a transcription factor with the same DNA binding specificity as FLI-1. EWS and EWS/FLI can associate with the RNA polymerase II holoenzyme as well as with SF1, an essential splicing factor. Here we report that U1C, one of three human U1 small nuclear ribonucleoprotein-specific proteins, interacts in vitro and in vivo with both EWS and EWS/FLI. U1C interacts with other splicing factors and is important in the early stages of spliceosome formation. Importantly, co-expression of U1C represses EWS/FLI-mediated transactivation, demonstrating that this interaction can have functional ramifications. Our findings demonstrate that U1C, a well characterized splicing protein, can also function in transcriptional regulation. Furthermore, they suggest that EWS and EWS/FLI may function both in transcriptional and post-transcriptional processes.",
"gd_creator": "303928642"
}
}
}"""
)

nanopub_validated = bel.nanopub.validate.validate(NanopubR(**nanopub), validation_level="force")

nanopub_validated_dict = nanopub_validated.dict()

print("Validated Nanopub2:\n", nanopub_validated.json(indent=4))

assert False

# assert nanopub_validated_dict["nanopub"]["assertions"][0]["validation"]["status"] == "Error"
# assert (
# nanopub_validated_dict["nanopub"]["assertions"][0]["validation"]["errors"][0]["msg"]
# == "Too many close parentheses at index 25"
# )
# assert (
# nanopub_validated_dict["nanopub"]["assertions"][0]["validation"]["errors"][0]["visual"]
# == 'act(p(SP:AKT1_HUMAN), ma)<span class="accentuate">)</span> increases act(p(SPX:AKT1_HUMAN)'
# )

# assert (
# nanopub_validated_dict["nanopub"]["annotations"][1]["validation"]["errors"][0]["msg"]
# == "Annotation term HGNC:A2MP is obsolete - please replace with HGNC:8"
# )
# assert (
# nanopub_validated_dict["nanopub"]["annotations"][1]["validation"]["errors"][1]["msg"]
# == "Annotation type: Disease for HGNC:A2MP does not match annotation types in database: []"
# )
# def test_validate_nanopub2():

# nanopub = json.loads(
# """{
# "rev": "_bLpe16a--_",
# "owners": [
# {
# "user_id": "303928642",
# "first_name": "Wendy",
# "last_name": "Zimmerman",
# "full_name": " Wendy Zimmerman"
# }
# ],
# "is_deleted": false,
# "is_archived": null,
# "is_public": false,
# "source_url": "https://nanopubstore.thor.biodati.com/nanopub/01EAAA7EJZC8B7EF5T78FN53JR",
# "nanopub": {
# "type": {
# "name": "BEL",
# "version": "2.1.0"
# },
# "citation": {
# "id": null,
# "authors": [
# "Knoop, L L",
# "Baker, S J"
# ],
# "database": {
# "name": "PubMed",
# "id": "10827180"
# },
# "reference": "J Biol Chem 2000 Aug 11 275(32) 24865-71",
# "uri": null,
# "title": "The splicing factor U1C represses EWS/FLI-mediated transactivation.",
# "source_name": "The Journal of biological chemistry",
# "date_published": "2000-08-11",
# "abstract": ""
# },
# "assertions": [
# {
# "subject": "p(HGNC:SNRPC)",
# "relation": "decreases",
# "object": "act(p(fus(HGNC:EWSR1, start, HGNC:FLI1, end)), ma(tscript))",
# "validation": null
# }
# ],
# "id": "01EAAA7EJZC8B7EF5T78FN53JR",
# "schema_uri": "https://raw.githubusercontent.com/belbio/schemas/master/schemas/nanopub_bel-1.1.0.yaml",
# "annotations": [
# {
# "type": "Species",
# "label": "human",
# "id": "TAX:9606",
# "validation": null
# }
# ],
# "evidence": "Importantly, co-expression of U1C represses EWS/FLI-mediated transactivation, demonstrating that this interaction can have functional ramifications.",
# "metadata": {
# "collections": [
# "corrected",
# "Selventa-Full"
# ],
# "gd_status": "finalized",
# "gd_createTS": "2020-06-08T15:54:17.566Z",
# "gd_updateTS": "2020-06-09T14:21:38.573Z",
# "gd_validation": {
# "status": "Good",
# "errors": null,
# "validation_target": null
# },
# "gd_hash": "73b5b7b36f9bf6a6",
# "statement_group": "67265439",
# "gd_abstract": "EWS is an RNA-binding protein involved in human tumor-specific chromosomal translocations. In approximately 85% of Ewing's sarcomas, such translocations give rise to the chimeric gene EWS/FLI. In the resulting fusion protein, the RNA binding domains from the C terminus of EWS are replaced by the DNA-binding domain of the ETS protein FLI-1. EWS/FLI can function as a transcription factor with the same DNA binding specificity as FLI-1. EWS and EWS/FLI can associate with the RNA polymerase II holoenzyme as well as with SF1, an essential splicing factor. Here we report that U1C, one of three human U1 small nuclear ribonucleoprotein-specific proteins, interacts in vitro and in vivo with both EWS and EWS/FLI. U1C interacts with other splicing factors and is important in the early stages of spliceosome formation. Importantly, co-expression of U1C represses EWS/FLI-mediated transactivation, demonstrating that this interaction can have functional ramifications. Our findings demonstrate that U1C, a well characterized splicing protein, can also function in transcriptional regulation. Furthermore, they suggest that EWS and EWS/FLI may function both in transcriptional and post-transcriptional processes.",
# "gd_creator": "303928642"
# }
# }
# }"""
# )

# nanopub_validated = bel.nanopub.validate.validate(NanopubR(**nanopub), validation_level="force")

# nanopub_validated_dict = nanopub_validated.dict()

# print("Validated Nanopub2:\n", nanopub_validated.json(indent=4))

# assert False

# # assert nanopub_validated_dict["nanopub"]["assertions"][0]["validation"]["status"] == "Error"
# # assert (
# # nanopub_validated_dict["nanopub"]["assertions"][0]["validation"]["errors"][0]["msg"]
# # == "Too many close parentheses at index 25"
# # )
# # assert (
# # nanopub_validated_dict["nanopub"]["assertions"][0]["validation"]["errors"][0]["visual"]
# # == 'act(p(SP:AKT1_HUMAN), ma)<span class="accentuate">)</span> increases act(p(SPX:AKT1_HUMAN)'
# # )

# # assert (
# # nanopub_validated_dict["nanopub"]["annotations"][1]["validation"]["errors"][0]["msg"]
# # == "Annotation term HGNC:A2MP is obsolete - please replace with HGNC:8"
# # )
# # assert (
# # nanopub_validated_dict["nanopub"]["annotations"][1]["validation"]["errors"][1]["msg"]
# # == "Annotation type: Disease for HGNC:A2MP does not match annotation types in database: []"
# # )

0 comments on commit 3eb4706

Please sign in to comment.