-
Notifications
You must be signed in to change notification settings - Fork 30
/
test_parse_ids.py
107 lines (82 loc) · 4.43 KB
/
test_parse_ids.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import io
import json
from ontobio.io import assocparser
from ontobio.io import gafparser
from ontobio.io import assocwriter
from ontobio.model import association
def test_no_colon_in_id():
parser = gafparser.GafParser()
valid = parser._validate_id("FOOBAR", assocparser.SplitLine("", [""] * 17, "taxon:foo"))
assert not valid
assert len(parser.report.messages) == 1
assert parser.report.messages[0]["level"] == assocparser.Report.ERROR
def test_empty_id():
parser = gafparser.GafParser()
valid = parser._validate_id("", assocparser.SplitLine("", [""] * 17, "taxon:foo"))
assert not valid
assert len(parser.report.messages) == 1
assert parser.report.messages[0]["level"] == assocparser.Report.ERROR
def test_pipe_in_id():
parser = gafparser.GafParser()
valid = parser._validate_id("F|OO:123", assocparser.SplitLine("", [""] * 17, "taxon:foo"))
assert not valid
assert len(parser.report.messages) == 1
assert parser.report.messages[0]["level"] == assocparser.Report.ERROR
def test_bad_character_in_id():
parser = gafparser.GafParser()
valid = parser._validate_id("FOO:1&23", assocparser.SplitLine("", [""] * 17, "taxon:foo"))
assert not valid
assert len(parser.report.messages) == 1
assert parser.report.messages[0]["level"] == assocparser.Report.ERROR
def test_empty_post_colon():
parser = gafparser.GafParser()
valid = parser._validate_id("FOO:", assocparser.SplitLine("", [""] * 17, "taxon:foo"))
assert not valid
assert len(parser.report.messages) == 1
assert parser.report.messages[0]["level"] == assocparser.Report.ERROR
def test_empty_pre_colon():
parser = gafparser.GafParser()
valid = parser._validate_id(":123", assocparser.SplitLine("", [""] * 17, "taxon:foo"))
assert not valid
assert len(parser.report.messages) == 1
assert parser.report.messages[0]["level"] == assocparser.Report.ERROR
def test_validate_with_allowed_ids():
parser = gafparser.GafParser()
valid = parser._validate_id("FOO:123", assocparser.SplitLine("", [""]*17, "taxon:foo"), allowed_ids=["FOO"])
assert valid
def test_validate_with_disallowed_id():
parser = gafparser.GafParser()
valid = parser._validate_id("FOO:123", assocparser.SplitLine("", [""]*17, "taxon:foo"), allowed_ids=["BAR"])
assert len(parser.report.messages) == 1
assert parser.report.messages[0]["level"] == assocparser.Report.WARNING
def test_validate_pipe_separated():
parser = gafparser.GafParser()
ids = parser.validate_pipe_separated_ids("PMID:12345", assocparser.SplitLine("", [""] * 17, "taxon:foo"))
assert set(ids) == set(["PMID:12345"])
ids = parser.validate_pipe_separated_ids("PMID:12345|PMID:11111", assocparser.SplitLine("", [""] * 17, "taxon:foo"))
assert set(ids) == set(["PMID:12345", "PMID:11111"])
def test_validate_pipe_separated_with_bad_ids():
parser = gafparser.GafParser()
ids = parser.validate_pipe_separated_ids("PMID:123[2]|PMID:11111", assocparser.SplitLine("", [""] * 17, "taxon:foo"))
assert ids == None
ids = parser.validate_pipe_separated_ids("PMID:123[2]", assocparser.SplitLine("", [""] * 17, "taxon:foo"))
assert ids == None
def test_validate_pipe_separated_empty_allowed():
parser = gafparser.GafParser()
ids = parser.validate_pipe_separated_ids("", assocparser.SplitLine("", [""] * 17, "taxon:foo"), empty_allowed=True)
assert ids == []
def test_validate_pipe_with_additional_delims():
parser = gafparser.GafParser()
ids = parser.validate_pipe_separated_ids("F:123,B:234|B:111", assocparser.SplitLine("", [""] * 17, "taxon:foo"), extra_delims=",")
assert set(ids) == set(["F:123", "B:234", "B:111"])
result = parser.parse_line("PomBase\tSPAC25B8.17\typf1\t\tGO:1990578\tGO_REF:0000024\tISO\tUniProtKB:Q9CXD9|ensembl:ENSMUSP00000038569,PMID:11111\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:4896\t20150305\tPomBase\t\t")
expected = [
association.ConjunctiveSet(elements=[association.Curie.from_str("UniProtKB:Q9CXD9")]),
association.ConjunctiveSet(elements=[association.Curie.from_str("ensembl:ENSMUSP00000038569"),
association.Curie.from_str("PMID:11111")])
]
assert result.associations[0].evidence.with_support_from == expected
def test_doi_id():
parser = gafparser.GafParser()
valid = parser._validate_id("DOI:10.1007/BF00127499", assocparser.SplitLine("", [""]*17, "taxon:foo"))
assert valid