-
Notifications
You must be signed in to change notification settings - Fork 30
/
ontol_factory.py
178 lines (156 loc) · 6.2 KB
/
ontol_factory.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
"""
Factory class for generating ontology objects based on a variety of handle types.
See :ref:`inputs` on readthedocs for more details
"""
import ontobio.obograph_util as obograph_util
from ontobio.ontol import Ontology
from ontobio.sparql.sparql_ontology import EagerRemoteSparqlOntology
import os
import subprocess
import hashlib
import logging
logger = logging.getLogger(__name__)
# TODO
default_ontology_handle = 'cache/ontologies/pato.json'
#if not os.path.isfile(ontology_handle):
# ontology_handle = None
global default_ontology
default_ontology = None
class OntologyFactory():
"""Implements a factory for generating :class:`Ontology` objects.
You should use a factory object rather than initializing
`Ontology` directly. See :ref:`inputs` for more details.
"""
# class variable - reuse the same object throughout
test = 0
def __init__(self, handle=None):
"""
initializes based on an ontology name
Arguments
---------
handle : str
see `create`
"""
self.handle = handle
def create(self, handle=None, handle_type=None, **args):
"""
Creates an ontology based on a handle
Handle is one of the following
- `FILENAME.json` : creates an ontology from an obographs json file
- `obo:ONTID` : E.g. obo:pato - creates an ontology from obolibrary PURL (requires owltools)
- `ONTID` : E.g. 'pato' - creates an ontology from a remote SPARQL query
Arguments
---------
handle : str
specifies how to retrieve the ontology info
"""
if handle is None:
self.test = self.test+1
logger.info("T: "+str(self.test))
global default_ontology
if default_ontology is None:
logger.info("Creating new instance of default ontology")
default_ontology = create_ontology(default_ontology_handle, **args)
logger.info("Using default_ontology")
return default_ontology
return create_ontology(handle, **args)
def create_ontology(handle=None, **args):
ont = None
logger.info("Determining strategy to load '{}' into memory...".format(handle))
if handle.find("+") > -1:
handles = handle.split("+")
onts = [create_ontology(ont) for ont in handles]
ont = onts.pop()
ont.merge(onts)
return ont
# TODO: consider replacing with plugin architecture
if handle.find(".") > 0 and os.path.isfile(handle):
logger.info("Fetching obograph-json file from filesystem")
ont = translate_file_to_ontology(handle, **args)
elif handle.startswith("obo:"):
logger.info("Fetching from OBO PURL")
if handle.find(".") == -1:
if handle == 'chebi' or handle == 'ncbitaxon' or handle == 'pr':
handle += '.obo'
logger.info("using obo for large ontology: {}".format(handle))
else:
handle += '.owl'
fn = '/tmp/'+handle
if not os.path.isfile(fn):
url = handle.replace("obo:","http://purl.obolibrary.org/obo/")
cmd = ['owltools',url,'-o','-f','json',fn]
cp = subprocess.run(cmd, check=True)
logger.info(cp)
else:
logger.info("using cached file: "+fn)
g = obograph_util.convert_json_file(fn)
ont = Ontology(handle=handle, payload=g)
elif handle.startswith("wdq:"):
from ontobio.sparql.wikidata_ontology import EagerWikidataOntology
logger.info("Fetching from Wikidata")
ont = EagerWikidataOntology(handle=handle)
elif handle.startswith("skos:"):
fn = handle.replace('skos:','')
from ontobio.sparql.skos import Skos
logger.info("Fetching from Skos file")
skos = Skos()
ont = skos.process_file(fn)
elif handle.startswith("scigraph:"):
from ontobio.neo.scigraph_ontology import RemoteScigraphOntology
logger.info("Fetching from SciGraph")
ont = RemoteScigraphOntology(handle=handle)
elif handle.startswith("http:"):
logger.info("Fetching from Web PURL: "+handle)
encoded = hashlib.sha256(handle.encode()).hexdigest()
#encoded = binascii.hexlify(bytes(handle, 'utf-8'))
#base64.b64encode(bytes(handle, 'utf-8'))
logger.info(" encoded: "+str(encoded))
fn = '/tmp/'+encoded
if not os.path.isfile(fn):
cmd = ['owltools',handle,'-o','-f','json',fn]
cp = subprocess.run(cmd, check=True)
logger.info(cp)
else:
logger.info("using cached file: "+fn)
g = obograph_util.convert_json_file(fn)
ont = Ontology(handle=handle, payload=g)
else:
logger.info("Fetching from SPARQL")
ont = EagerRemoteSparqlOntology(handle=handle)
#g = get_digraph(handle, None, True)
return ont
def create_ontology_from_obograph(og):
ont = None
g = obograph_util.convert_json_object(og)
ont = Ontology(handle=None, payload=g)
return ont
def translate_file_to_ontology(handle, **args):
if handle.endswith(".json"):
g = obograph_util.convert_json_file(handle, **args)
return Ontology(handle=handle, payload=g)
elif handle.endswith(".ttl"):
from ontobio.sparql.rdf2nx import RdfMapper
logger.info("RdfMapper: {}".format(args))
m = RdfMapper(**args)
return m.convert(handle,'ttl')
else:
if not (handle.endswith(".obo") or handle.endswith(".owl")):
logger.info("Attempting to parse non obo or owl file with owltools: "+handle)
encoded = get_checksum(handle)
logger.info(" encoded: "+str(encoded))
fn = '/tmp/'+encoded
if not os.path.isfile(fn):
cmd = ['owltools',handle,'-o','-f','json',fn]
cp = subprocess.run(cmd, check=True)
logger.info(cp)
else:
logger.info("using cached file: "+fn)
g = obograph_util.convert_json_file(fn, **args)
return Ontology(handle=handle, payload=g)
def get_checksum(file):
"""
Get SHA256 hash from the contents of a given file
"""
with open(file, 'rb') as FH:
contents = FH.read()
return hashlib.sha256(contents).hexdigest()