-
Notifications
You must be signed in to change notification settings - Fork 30
/
test_collections.py
108 lines (88 loc) · 5.42 KB
/
test_collections.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
from ontobio.model import collections
from ontobio.model.association import Curie, Subject
from ontobio.io import assocparser, gafparser, gpadparser
def test_bioentities_get_when_empty():
e = collections.BioEntities(dict())
assert e.get(Curie("FOO", "123")) == None
def test_bioentities_get_when_exists():
e = collections.BioEntities({
Curie("FOO", "123"): Subject(Curie("FOO", "123"), "hello", "world", [], "protien", Curie("NCBITaxon", "12345"))
})
assert e.get(Curie.from_str("FOO:123")) == Subject(Curie("FOO", "123"), "hello", "world", [], "protien", Curie("NCBITaxon", "12345"))
def test_bioentities_merge():
e = collections.BioEntities({
Curie("FOO", "123"): Subject(Curie("FOO", "123"), "hello", "world", [], "protien", Curie("NCBITaxon", "12345"))
})
o = collections.BioEntities({
Curie("BAR", "987"): Subject(Curie("BAR", "987"), "goodbye", "world", [], "protien", Curie("NCBITaxon", "999"))
})
assert e.merge(o) == collections.BioEntities({
Curie("FOO", "123"): Subject(Curie("FOO", "123"), "hello", "world", [], "protien", Curie("NCBITaxon", "12345")),
Curie("BAR", "987"): Subject(Curie("BAR", "987"), "goodbye", "world", [], "protien", Curie("NCBITaxon", "999"))
})
# Test that blank label does not overwrite existing label
p = collections.BioEntities({
Curie("BAR", "987"): Subject(Curie("BAR", "987"), "", "world", [], "protien", Curie("NCBITaxon", "999"))
})
assert o.merge(p) == collections.BioEntities({
Curie("BAR", "987"): Subject(Curie("BAR", "987"), "goodbye", "world", [], "protien", Curie("NCBITaxon", "999"))
})
def test_bioentities_merge_clobber():
e = collections.BioEntities({
Curie("FOO", "123"): Subject(Curie("FOO", "123"), "hello", "world", [], "protien", Curie("NCBITaxon", "12345"))
})
o = collections.BioEntities({
Curie("FOO", "123"): Subject(Curie("FOO", "123"), "different", "world", [], "dog", Curie("NCBITaxon", "12345"))
})
# Get the clobbered key, the value should be the subject in `o`
assert e.merge(o).get(Curie("FOO", "123")) == Subject(Curie("FOO", "123"), "different", "world", [], "dog", Curie("NCBITaxon", "12345"))
def test_bioentities_load_from_file():
pombase = collections.BioEntities.load_from_file("tests/resources/truncated-pombase.gpi")
assert len(pombase.entities.keys()) == 199 # Has 199 gpi lines in the file
assert pombase.get(Curie.from_str("PomBase:SPAC1565.04c")) == Subject(Curie.from_str("PomBase:SPAC1565.04c"), "ste4", ["adaptor protein Ste4"], [], ["protein"], Curie.from_str("NCBITaxon:4896"))
def test_bioentities_file_gone():
gone = collections.BioEntities.load_from_file("not_here.gpi")
assert gone.entities == dict()
def test_create_parser_non_version_line():
parser = collections.create_parser_from_header("! hello", assocparser.AssocParserConfig())
assert parser == None
def test_create_parser_gaf():
parser = collections.create_parser_from_header("!gaf-version: 2.1", assocparser.AssocParserConfig())
assert isinstance(parser, gafparser.GafParser)
assert parser.version == "2.1"
parser = collections.create_parser_from_header("!gaf-version: 2.2", assocparser.AssocParserConfig())
assert isinstance(parser, gafparser.GafParser)
assert parser.version == "2.2"
def test_create_parser_gpad():
parser = collections.create_parser_from_header("!gpa-version: 1.2", assocparser.AssocParserConfig())
assert isinstance(parser, gpadparser.GpadParser)
assert parser.version == "1.2"
parser = collections.create_parser_from_header("!gpad-version: 2.0", assocparser.AssocParserConfig())
assert isinstance(parser, gpadparser.GpadParser)
assert parser.version == "2.0"
def test_construct_collection_empty():
collection = collections.construct_collection(None, [], assocparser.AssocParserConfig())
assert collection.headers == []
assert collection.associations == collections.GoAssociations([])
assert collection.entities == collections.BioEntities(dict())
def test_construct_collection_basic():
collection = collections.construct_collection("tests/resources/truncated-pombase.gaf", ["tests/resources/truncated-pombase.gpi"], assocparser.AssocParserConfig())
# There are 24 lines of headers in the truncated-pombase.gaf
assert len(collection.headers) == 25
# There are many lines in this file, we should get more than 100 back
assert len(collection.associations.associations) > 100
# This is a 199 line gpi file, with each identifier unique, so should have 200 entity entries
assert len(collection.entities.entities.keys()) == 199
def test_construct_collection_no_version_error():
collection = collections.construct_collection("tests/resources/no-version.gaf", [], assocparser.AssocParserConfig())
assert collection.associations.associations == []
assert collection.report.to_report_json()["messages"]["gorule-0000001"][0]["type"] == "Invalid Annotation File"
def test_bioentities_from_gpi_2_0():
entities = collections.BioEntities.load_from_file("tests/resources/mgi.truncated.gpi2")
assert entities.get(Curie(namespace="MGI", identity="MGI:1918925")) == Subject(
id=Curie.from_str("MGI:MGI:1918925"),
label="0610010F05Rik",
fullname=["RIKEN cDNA 0610010F05 gene"],
synonyms=[], type=[Curie.from_str("SO:0001217")],
taxon=Curie(namespace="NCBITaxon", identity="10090"),
db_xrefs=[Curie.from_str("UniProtKB:Q68FF0")])