From 3eb47067a935f647f2edf9aa8dd1063a6d632560 Mon Sep 17 00:00:00 2001 From: William Hayes Date: Fri, 2 Oct 2020 08:28:37 -0400 Subject: [PATCH] validation bugfixes --- bel/lang/ast.py | 13 +- bel/nanopub/validate.py | 6 + bel/resources/manage.py | 6 +- bel/terms/terms.py | 8 +- tests/lang/test_ast.py | 42 ++++- tests/nanopub/test_nanopub_validation.py | 212 +++++++++++------------ 6 files changed, 168 insertions(+), 119 deletions(-) diff --git a/bel/lang/ast.py b/bel/lang/ast.py index f8e4ec2..6d10542 100644 --- a/bel/lang/ast.py +++ b/bel/lang/ast.py @@ -1127,7 +1127,7 @@ def validate_function(fn: Function, errors: List[ValidationError] = None) -> Lis ValidationError( type="Assertion", severity="Error", - msg=f"Namespace value: {fn_arg.entity.nsval} with entity_types {fn_arg.entity.entity_types} are not allowed for function {fn_arg.parent.name} as an optional or multiple argument", + msg=f"BEL Entity: {fn_arg.entity.nsval} with entity_types {fn_arg.entity.entity_types} are not allowed for function {fn_arg.parent.name} as an optional or multiple argument", visual_pairs=[(fn.span.start, fn.span.end)], index=fn.span.start, ) @@ -1140,7 +1140,7 @@ def validate_function(fn: Function, errors: List[ValidationError] = None) -> Lis ValidationError( type="Assertion", severity="Warning", - msg=f"Unknown BEL entity {fn_arg.entity.nsval.key_label} - cannot determine if this matches function signature", + msg=f"Unknown BEL Entity {fn_arg.entity.nsval.key_label} - cannot determine if this matches function signature", visual_pairs=[(fn.span.start, fn.span.end)], index=fn.span.start, ) @@ -1163,7 +1163,7 @@ def validate_function(fn: Function, errors: List[ValidationError] = None) -> Lis ValidationError( type="Assertion", severity="Warning", - msg=f"Unknown namespace value '{fn.args[position].entity.nsval.key_label}' for the {fn.name} function at position {fn.args[position].span.namespace.start}", + msg=f"Unknown BEL Entity '{fn.args[position].entity.nsval.key_label}' for the {fn.name} function at position {fn.args[position].span.namespace.start}", visual_pairs=[ ( fn.args[position].span.namespace.start, @@ -1184,11 +1184,16 @@ def validate_function(fn: Function, errors: List[ValidationError] = None) -> Lis ) ): + if fn.args[position].entity.term: + error_msg = f"Wrong entity type for BEL Entity at argument position {position} for function {fn.name} - expected {argument['values']}, actual: entity_types: {fn.args[position].entity.entity_types}" + else: + error_msg = f"Unknown BEL Entity at argument position {position} for function {fn.name} - cannot determine if correct entity type." + errors.append( ValidationError( type="Assertion", severity="Warning", - msg=f"Wrong entity type for namespace argument at position {position} for function {fn.name} - expected {argument['values']}, actual: entity_types: {fn.args[position].entity.entity_types}", + msg=error_msg, visual_pairs=[(fn.args[position].span.start, fn.args[position].span.end)], index=fn.args[position].span.start, ) diff --git a/bel/nanopub/validate.py b/bel/nanopub/validate.py index f6091ef..559795b 100644 --- a/bel/nanopub/validate.py +++ b/bel/nanopub/validate.py @@ -436,3 +436,9 @@ def validate(nanopub: NanopubR, validation_level: str = "complete") -> NanopubR: logger.exception(f"Could not validate nanopub: {nanopub.nanopub.id} error: {str(e)}") return nanopub + + +def remove_validation_cache(): + """Truncate validation cache""" + + bel_validations_coll.truncate() diff --git a/bel/resources/manage.py b/bel/resources/manage.py index a688826..d3fafc7 100644 --- a/bel/resources/manage.py +++ b/bel/resources/manage.py @@ -107,8 +107,10 @@ def update_resources(urls: List[str] = None, force: bool = False, email: str = N for resource in resources: if "resource_download_url" not in resource: continue - url = resource["resource_download_url"] - results[url] = load_resource(resource_url=url, force=force) + logger.info(f"Resource {resource}") + url = resource["resource_download_url"] + + # results[url] = load_resource(resource_url=url, force=force) if email is not None: subject = f"BEL Resources Update for {settings.HOST_NAME}" diff --git a/bel/terms/terms.py b/bel/terms/terms.py index 6a993ff..fcf82af 100644 --- a/bel/terms/terms.py +++ b/bel/terms/terms.py @@ -79,9 +79,11 @@ def get_term(term_key: Key) -> Optional[Term]: # duration = f"{time2 - time1:.5f}" # logger.debug(f"Get terms timing {duration} for {term_key}", term_key=term_key, duration=duration) - # Filter out any terms resulting from obsolete ids - if len(terms) > 0: - terms = [term for term in terms if term_key not in term.obsolete_keys] + # Filter out any terms resulting from obsolete ids if more than 1 term + if len(terms) > 1: + check_terms = [term for term in terms if term_key not in term.obsolete_keys] + if len(check_terms) > 0: + terms = check_terms if len(terms) == 1: return terms[0] diff --git a/tests/lang/test_ast.py b/tests/lang/test_ast.py index 121adc6..5b969fc 100644 --- a/tests/lang/test_ast.py +++ b/tests/lang/test_ast.py @@ -88,7 +88,9 @@ def test_ast_parse_fus(): print("To String", ast.to_string()) - assert ast.to_string() == False + assert ( + ast.to_string() == "act(p(fus(HGNC:EWSR1!EWSR1, start, HGNC:FLI1!FLI1, end)), ma(tscript))" + ) ##################################################################################### @@ -123,7 +125,7 @@ def test_validate_missing_namespace(): assert ( ast.errors[0].msg - == "Unknown namespace value 'missing:AKT1' for the proteinAbundance function at position 2" + == "Unknown BEL Entity 'missing:AKT1' for the proteinAbundance function at position 2" ) assert ast.errors[0].severity == "Warning" @@ -240,7 +242,7 @@ def test_validate_complex_missing_namespace(): assertion = AssertionStr(subject="complex(UNKNOWN:test)") expected = ( - "Unknown namespace value UNKNOWN:test - cannot determine if this matches function signature" + "Unknown BEL Entity UNKNOWN:test - cannot determine if this matches function signature" ) ast = bel.lang.ast.BELAst(assertion=assertion) @@ -298,12 +300,24 @@ def test_validation_tloc(): assert ast.errors == [] + assertion = AssertionStr( + subject='tloc(p(HGNC:NFE2L2), fromLoc(MESH:Cytoplasm), toLoc(MESH:"Cell Nucleus"))' + ) + + ast = bel.lang.ast.BELAst(assertion=assertion) + + ast.validate() + + print("Errors", ast.errors) + + assert ast.errors == [] + def test_validate_fus(): """Validate path()""" assertion = AssertionStr(subject='p(fus(HGNC:NPM, "1_117", HGNC:ALK, end))') - expected = "Wrong entity type for namespace argument at position 0 for function fusion - expected ['Gene', 'RNA', 'Micro_RNA', 'Protein'], actual: entity_types: []" + expected = "Unknown BEL Entity at argument position 0 for function fusion - cannot determine if correct entity type." ast = bel.lang.ast.BELAst(assertion=assertion) @@ -313,6 +327,16 @@ def test_validate_fus(): assert ast.errors[0].msg == expected + assertion = AssertionStr(subject="p(fus(HGNC:EWSR1, start, HGNC:FLI1, end))") + + ast = bel.lang.ast.BELAst(assertion=assertion) + + ast.validate() + + print("Errors", ast.errors) + + assert ast.errors == [] + def test_validate_nsarg(): """Validate path()""" @@ -393,6 +417,16 @@ def test_validate_complex_nsarg(): assert ast.errors == [] + assertion = AssertionStr(subject='complex(GO:"transcription factor AP-1 complex")') + + ast = bel.lang.ast.BELAst(assertion=assertion) + + ast.validate() + + print("Errors", ast.errors) + + assert ast.errors == [] + def test_validate_bad_relation(): diff --git a/tests/nanopub/test_nanopub_validation.py b/tests/nanopub/test_nanopub_validation.py index ad924e5..0b1ae7b 100644 --- a/tests/nanopub/test_nanopub_validation.py +++ b/tests/nanopub/test_nanopub_validation.py @@ -87,109 +87,109 @@ def test_validate_nanopub(): ) -def test_validate_nanopub2(): - - nanopub = json.loads( - """{ - "rev": "_bLpe16a--_", - "owners": [ - { - "user_id": "303928642", - "first_name": "Wendy", - "last_name": "Zimmerman", - "full_name": " Wendy Zimmerman" - } - ], - "is_deleted": false, - "is_archived": null, - "is_public": false, - "source_url": "https://nanopubstore.thor.biodati.com/nanopub/01EAAA7EJZC8B7EF5T78FN53JR", - "nanopub": { - "type": { - "name": "BEL", - "version": "2.1.0" - }, - "citation": { - "id": null, - "authors": [ - "Knoop, L L", - "Baker, S J" - ], - "database": { - "name": "PubMed", - "id": "10827180" - }, - "reference": "J Biol Chem 2000 Aug 11 275(32) 24865-71", - "uri": null, - "title": "The splicing factor U1C represses EWS/FLI-mediated transactivation.", - "source_name": "The Journal of biological chemistry", - "date_published": "2000-08-11", - "abstract": "" - }, - "assertions": [ - { - "subject": "p(HGNC:SNRPC)", - "relation": "decreases", - "object": "act(p(fus(HGNC:EWSR1, start, HGNC:FLI1, end)), ma(tscript))", - "validation": null - } - ], - "id": "01EAAA7EJZC8B7EF5T78FN53JR", - "schema_uri": "https://raw.githubusercontent.com/belbio/schemas/master/schemas/nanopub_bel-1.1.0.yaml", - "annotations": [ - { - "type": "Species", - "label": "human", - "id": "TAX:9606", - "validation": null - } - ], - "evidence": "Importantly, co-expression of U1C represses EWS/FLI-mediated transactivation, demonstrating that this interaction can have functional ramifications.", - "metadata": { - "collections": [ - "corrected", - "Selventa-Full" - ], - "gd_status": "finalized", - "gd_createTS": "2020-06-08T15:54:17.566Z", - "gd_updateTS": "2020-06-09T14:21:38.573Z", - "gd_validation": { - "status": "Good", - "errors": null, - "validation_target": null - }, - "gd_hash": "73b5b7b36f9bf6a6", - "statement_group": "67265439", - "gd_abstract": "EWS is an RNA-binding protein involved in human tumor-specific chromosomal translocations. In approximately 85% of Ewing's sarcomas, such translocations give rise to the chimeric gene EWS/FLI. In the resulting fusion protein, the RNA binding domains from the C terminus of EWS are replaced by the DNA-binding domain of the ETS protein FLI-1. EWS/FLI can function as a transcription factor with the same DNA binding specificity as FLI-1. EWS and EWS/FLI can associate with the RNA polymerase II holoenzyme as well as with SF1, an essential splicing factor. Here we report that U1C, one of three human U1 small nuclear ribonucleoprotein-specific proteins, interacts in vitro and in vivo with both EWS and EWS/FLI. U1C interacts with other splicing factors and is important in the early stages of spliceosome formation. Importantly, co-expression of U1C represses EWS/FLI-mediated transactivation, demonstrating that this interaction can have functional ramifications. Our findings demonstrate that U1C, a well characterized splicing protein, can also function in transcriptional regulation. Furthermore, they suggest that EWS and EWS/FLI may function both in transcriptional and post-transcriptional processes.", - "gd_creator": "303928642" - } - } - }""" - ) - - nanopub_validated = bel.nanopub.validate.validate(NanopubR(**nanopub), validation_level="force") - - nanopub_validated_dict = nanopub_validated.dict() - - print("Validated Nanopub2:\n", nanopub_validated.json(indent=4)) - - assert False - - # assert nanopub_validated_dict["nanopub"]["assertions"][0]["validation"]["status"] == "Error" - # assert ( - # nanopub_validated_dict["nanopub"]["assertions"][0]["validation"]["errors"][0]["msg"] - # == "Too many close parentheses at index 25" - # ) - # assert ( - # nanopub_validated_dict["nanopub"]["assertions"][0]["validation"]["errors"][0]["visual"] - # == 'act(p(SP:AKT1_HUMAN), ma)) increases act(p(SPX:AKT1_HUMAN)' - # ) - - # assert ( - # nanopub_validated_dict["nanopub"]["annotations"][1]["validation"]["errors"][0]["msg"] - # == "Annotation term HGNC:A2MP is obsolete - please replace with HGNC:8" - # ) - # assert ( - # nanopub_validated_dict["nanopub"]["annotations"][1]["validation"]["errors"][1]["msg"] - # == "Annotation type: Disease for HGNC:A2MP does not match annotation types in database: []" - # ) +# def test_validate_nanopub2(): + +# nanopub = json.loads( +# """{ +# "rev": "_bLpe16a--_", +# "owners": [ +# { +# "user_id": "303928642", +# "first_name": "Wendy", +# "last_name": "Zimmerman", +# "full_name": " Wendy Zimmerman" +# } +# ], +# "is_deleted": false, +# "is_archived": null, +# "is_public": false, +# "source_url": "https://nanopubstore.thor.biodati.com/nanopub/01EAAA7EJZC8B7EF5T78FN53JR", +# "nanopub": { +# "type": { +# "name": "BEL", +# "version": "2.1.0" +# }, +# "citation": { +# "id": null, +# "authors": [ +# "Knoop, L L", +# "Baker, S J" +# ], +# "database": { +# "name": "PubMed", +# "id": "10827180" +# }, +# "reference": "J Biol Chem 2000 Aug 11 275(32) 24865-71", +# "uri": null, +# "title": "The splicing factor U1C represses EWS/FLI-mediated transactivation.", +# "source_name": "The Journal of biological chemistry", +# "date_published": "2000-08-11", +# "abstract": "" +# }, +# "assertions": [ +# { +# "subject": "p(HGNC:SNRPC)", +# "relation": "decreases", +# "object": "act(p(fus(HGNC:EWSR1, start, HGNC:FLI1, end)), ma(tscript))", +# "validation": null +# } +# ], +# "id": "01EAAA7EJZC8B7EF5T78FN53JR", +# "schema_uri": "https://raw.githubusercontent.com/belbio/schemas/master/schemas/nanopub_bel-1.1.0.yaml", +# "annotations": [ +# { +# "type": "Species", +# "label": "human", +# "id": "TAX:9606", +# "validation": null +# } +# ], +# "evidence": "Importantly, co-expression of U1C represses EWS/FLI-mediated transactivation, demonstrating that this interaction can have functional ramifications.", +# "metadata": { +# "collections": [ +# "corrected", +# "Selventa-Full" +# ], +# "gd_status": "finalized", +# "gd_createTS": "2020-06-08T15:54:17.566Z", +# "gd_updateTS": "2020-06-09T14:21:38.573Z", +# "gd_validation": { +# "status": "Good", +# "errors": null, +# "validation_target": null +# }, +# "gd_hash": "73b5b7b36f9bf6a6", +# "statement_group": "67265439", +# "gd_abstract": "EWS is an RNA-binding protein involved in human tumor-specific chromosomal translocations. In approximately 85% of Ewing's sarcomas, such translocations give rise to the chimeric gene EWS/FLI. In the resulting fusion protein, the RNA binding domains from the C terminus of EWS are replaced by the DNA-binding domain of the ETS protein FLI-1. EWS/FLI can function as a transcription factor with the same DNA binding specificity as FLI-1. EWS and EWS/FLI can associate with the RNA polymerase II holoenzyme as well as with SF1, an essential splicing factor. Here we report that U1C, one of three human U1 small nuclear ribonucleoprotein-specific proteins, interacts in vitro and in vivo with both EWS and EWS/FLI. U1C interacts with other splicing factors and is important in the early stages of spliceosome formation. Importantly, co-expression of U1C represses EWS/FLI-mediated transactivation, demonstrating that this interaction can have functional ramifications. Our findings demonstrate that U1C, a well characterized splicing protein, can also function in transcriptional regulation. Furthermore, they suggest that EWS and EWS/FLI may function both in transcriptional and post-transcriptional processes.", +# "gd_creator": "303928642" +# } +# } +# }""" +# ) + +# nanopub_validated = bel.nanopub.validate.validate(NanopubR(**nanopub), validation_level="force") + +# nanopub_validated_dict = nanopub_validated.dict() + +# print("Validated Nanopub2:\n", nanopub_validated.json(indent=4)) + +# assert False + +# # assert nanopub_validated_dict["nanopub"]["assertions"][0]["validation"]["status"] == "Error" +# # assert ( +# # nanopub_validated_dict["nanopub"]["assertions"][0]["validation"]["errors"][0]["msg"] +# # == "Too many close parentheses at index 25" +# # ) +# # assert ( +# # nanopub_validated_dict["nanopub"]["assertions"][0]["validation"]["errors"][0]["visual"] +# # == 'act(p(SP:AKT1_HUMAN), ma)) increases act(p(SPX:AKT1_HUMAN)' +# # ) + +# # assert ( +# # nanopub_validated_dict["nanopub"]["annotations"][1]["validation"]["errors"][0]["msg"] +# # == "Annotation term HGNC:A2MP is obsolete - please replace with HGNC:8" +# # ) +# # assert ( +# # nanopub_validated_dict["nanopub"]["annotations"][1]["validation"]["errors"][1]["msg"] +# # == "Annotation type: Disease for HGNC:A2MP does not match annotation types in database: []" +# # )