Skip to content

Commit

Permalink
Merge pull request #645 from biolink/go-site-2066-gorule-0000001-inva…
Browse files Browse the repository at this point in the history
…lid-taxon-id-0

Go site 2066 gorule 0000001 invalid taxon id 0
  • Loading branch information
mugitty committed Sep 13, 2023
2 parents 5d15999 + d721f9d commit 7d53dd3
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 1 deletion.
5 changes: 4 additions & 1 deletion ontobio/io/gafparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,10 @@ def to_association(gaf_line: List[str], report=None, group="unknown", dataset="u
return assocparser.ParseResult(source_line, [], True, report=report)

taxon = parsed_taxons_result.parsed[0]

if taxon.identity is None or taxon.identity == '0':
report.error(source_line, Report.INVALID_TAXON, parsed_taxons_result.original, parsed_taxons_result.message, taxon=parsed_taxons_result.original, rule=1)
return assocparser.ParseResult(source_line, [], True, report=report)

date = assocparser.parse_date(gaf_line[13], report, source_line)
if date is None:
return assocparser.ParseResult(source_line, [], True, report=report)
Expand Down
12 changes: 12 additions & 0 deletions ontobio/io/gpadparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,12 @@ def from_1_2(gpad_line: List[str], report=None, group="unknown", dataset="unknow
if entity is not None:
subject = entity
taxon = subject.taxon

#Ensure taxon is valid, if we are reading from bioentity
if len(bio_entities.entities) > 0:
if taxon.identity is None or taxon.identity == '0':
report.error(source_line, Report.INVALID_TAXON, "None or 0", "Taxon is invalid", rule=1)
return assocparser.ParseResult(source_line, [], True, report=report)

go_term = association.Curie.from_str(gpad_line[3])
if go_term.is_error():
Expand Down Expand Up @@ -425,6 +431,12 @@ def from_2_0(gpad_line: List[str], report=None, group="unknown", dataset="unknow
# If we found a subject entity, then set `subject` to the found entity
subject = entity
taxon = subject.taxon

#Ensure taxon is valid, if we are reading from bioentity
if len(bio_entities.entities) > 0:
if taxon.identity is None or taxon.identity == '0':
report.error(source_line, Report.INVALID_TAXON, "None or 0", "Taxon is invalid", rule=1)
return assocparser.ParseResult(source_line, [], True, report=report)

negated = gpad_line[1] == "NOT"

Expand Down
78 changes: 78 additions & 0 deletions tests/test_gpad_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from ontobio.model import association
from ontobio.model.association import ConjunctiveSet, ExtensionUnit, Curie
from ontobio.ontol_factory import OntologyFactory
from ontobio.model import collections
from ontobio.model.association import Curie, Subject

import yaml

Expand Down Expand Up @@ -252,3 +254,79 @@ def test_unmapped_eco_to_gaf_codes():
vals[5] = "ECO:0006003" # indirectly maps to IDA via gaf-eco-mapping-derived.txt
result = parser.parse_line("\t".join(vals))
assert len(result.associations) == 1

def test_gpi_check():
report = assocparser.Report(group="unknown", dataset="unknown")
vals = [
"ZFIN",
"ZDB-GENE-070117-1552",
"acts_upstream_of_or_within",
"GO:0045601",
"PMID:17531218",
"ECO:0000307",
"",
"",
"20080326",
"ZFIN",
"",
"creation-date=2020-09-17|modification-date=2020-09-17|contributor-id=http://orcid.org/0000-0003-2689-5511"
]

bioentities = {'ZFIN:ZDB-GENE-070117-1552' : {
'id': "ZDB-GENE-070117-1552",
'label': "ZDB-GENE-070117-1552",
'full_name': "fullnames",
'synonyms': "synonyms",
'type': "gene_product",
'taxon': "taxon:0"
}
}

bioentities = collections.BioEntities({
Curie("ZFIN", "ZDB-GENE-070117-1552"): Subject(Curie.from_str("ZFIN:ZDB-GENE-070117-1552"), "ste4", ["adaptor protein Ste4"], [], ["protein"], Curie.from_str("taxon:0"))
})



result = to_association(list(vals), report=report, version="1.2", bio_entities=bioentities)
assert result.skipped == True
assert len([m for m in result.report.messages if m["level"] == "ERROR"]) == 1
assert len(result.associations) == 0

vals = [
"ZFIN:ZDB-GENE-070117-1552",
"ZFIN:ZDB-GENE-070117-1552",
"RO:12345",
"GO:0045601",
"PMID:17531218",
"ECO:0000307",
"",
"",
"2008-03-26",
"ZFIN",
"",
"creation-date=2020-09-17|modification-date=2020-09-17|contributor-id=http://orcid.org/0000-0003-2689-5511"
]
result = to_association(list(vals), report=report, version="2.0", bio_entities=bioentities)
assert result.skipped == True
assert len([m for m in result.report.messages if m["level"] == "ERROR"]) == 2
assert len(result.associations) == 0


bioentities = collections.BioEntities({"bla": 'blabla'})

result = to_association(list(vals), report=report, version="2.0", bio_entities=bioentities)
assert result.skipped == True
assert len([m for m in result.report.messages if m["level"] == "ERROR"]) == 3
assert len(result.associations) == 0

bioentities = collections.BioEntities({
Curie("ZFIN", "ZDB-GENE-070117-1552"): Subject(Curie.from_str("ZFIN:ZDB-GENE-070117-1552"), "ste4", ["adaptor protein Ste4"], [], ["protein"], Curie.from_str("NCBITaxon:12345"))
})

result = to_association(list(vals), report=report, version="2.0", bio_entities=bioentities)
assert result.skipped == 0
assert len([m for m in result.report.messages if m["level"] == "ERROR"]) == 3
assert len(result.associations) == 1


0 comments on commit 7d53dd3

Please sign in to comment.