Skip to content

Commit b9bc69a

Browse files
GlassOfWhiskeymr-c
andauthored
pycodegen: Added graph property to LoadingOptions (#583)
This property loads schemas in a Graph object. This feature can be used for CWL file formats checking. * Removed rdfa format from graph loader The `rdfa` format has been removed from RDFLib 5.0.0 onward (see https://rdflib.readthedocs.io/en/stable/upgrade4to5.html#removed-rdf-parsers) * Adjusted fetcher hierarchy * Changed schema parameter type Co-authored-by: Michael R. Crusoe <1330696+mr-c@users.noreply.github.com>
1 parent 2de4281 commit b9bc69a

File tree

6 files changed

+173
-22
lines changed

6 files changed

+173
-22
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,6 @@ mypy-stubs/ruamel/yaml
1010
venv/
1111
.cache/
1212
.pytest_cache/
13+
14+
# PyCharm
15+
.idea/

mypy-stubs/rdflib/compare.pyi

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from typing import Dict, Union
2+
3+
from rdflib.graph import ConjunctiveGraph, Graph
4+
5+
Stats = Dict[str, Union[int, str]]
6+
7+
class IsomorphicGraph(ConjunctiveGraph):
8+
pass
9+
10+
def to_isomorphic(graph: Graph = ...) -> IsomorphicGraph: ...

schema_salad/fetcher.py

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
import os
44
import re
55
import sys
6-
import urllib
6+
import urllib.parse
7+
import urllib.request
8+
from abc import ABC, abstractmethod
79
from typing import List, Optional
810

911
import requests
@@ -15,40 +17,51 @@
1517
_logger = logging.getLogger("salad")
1618

1719

18-
class Fetcher:
19-
def __init__(
20-
self,
21-
cache: CacheType,
22-
session: Optional[requests.sessions.Session],
23-
) -> None:
24-
pass
20+
class Fetcher(ABC):
21+
"""Fetch resources from URIs."""
2522

23+
@abstractmethod
2624
def fetch_text(self, url: str, content_types: Optional[List[str]] = None) -> str:
27-
raise NotImplementedError()
25+
"""Retrieve the given resource as a string."""
26+
...
2827

28+
@abstractmethod
2929
def check_exists(self, url: str) -> bool:
30-
raise NotImplementedError()
30+
"""Check if the given resource exists."""
31+
...
3132

33+
@abstractmethod
3234
def urljoin(self, base_url: str, url: str) -> str:
33-
raise NotImplementedError()
35+
...
3436

3537
schemes = ["file", "http", "https", "mailto"]
3638

3739
def supported_schemes(self) -> List[str]:
40+
"""Return the list of supported URI schemes."""
3841
return self.schemes
3942

4043

41-
class DefaultFetcher(Fetcher):
44+
class MemoryCachingFetcher(Fetcher):
45+
"""Fetcher that caches resources in memory after retrieval."""
46+
47+
def __init__(self, cache: CacheType) -> None:
48+
"""Create a MemoryCachingFetcher object."""
49+
self.cache = cache
50+
51+
52+
class DefaultFetcher(MemoryCachingFetcher):
53+
"""The default Fetcher implementation."""
54+
4255
def __init__(
4356
self,
4457
cache: CacheType,
4558
session: Optional[requests.sessions.Session],
4659
) -> None:
47-
self.cache = cache
60+
"""Create a DefaultFetcher object."""
61+
super().__init__(cache)
4862
self.session = session
4963

5064
def fetch_text(self, url: str, content_types: Optional[List[str]] = None) -> str:
51-
"""Retrieve the given resource as a string."""
5265
result = self.cache.get(url, None)
5366
if isinstance(result, str):
5467
return result

schema_salad/metaschema.py

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33
# The code itself is released under the Apache 2.0 license and the help text is
44
# subject to the license of the original schema.
55
import copy
6+
import logging
67
import os
78
import pathlib
89
import re
910
import tempfile
1011
import uuid as _uuid__ # pylint: disable=unused-import # noqa: F401
12+
import xml.sax # nosec
1113
from abc import ABC, abstractmethod
1214
from io import StringIO
1315
from typing import (
@@ -21,27 +23,32 @@
2123
Tuple,
2224
Type,
2325
Union,
26+
cast,
2427
)
2528
from urllib.parse import quote, urlparse, urlsplit, urlunsplit
2629
from urllib.request import pathname2url
2730

31+
from rdflib import Graph
32+
from rdflib.plugins.parsers.notation3 import BadSyntax
2833
from ruamel.yaml.comments import CommentedMap
2934

3035
from schema_salad.exceptions import SchemaSaladException, ValidationException
31-
from schema_salad.fetcher import DefaultFetcher, Fetcher
36+
from schema_salad.fetcher import DefaultFetcher, Fetcher, MemoryCachingFetcher
3237
from schema_salad.sourceline import SourceLine, add_lc_filename
3338
from schema_salad.utils import yaml_no_ts # requires schema-salad v8.2+
3439

3540
_vocab: Dict[str, str] = {}
3641
_rvocab: Dict[str, str] = {}
3742

43+
_logger = logging.getLogger("salad")
44+
3845

3946
class LoadingOptions:
4047
def __init__(
4148
self,
4249
fetcher: Optional[Fetcher] = None,
4350
namespaces: Optional[Dict[str, str]] = None,
44-
schemas: Optional[Dict[str, str]] = None,
51+
schemas: Optional[List[str]] = None,
4552
fileuri: Optional[str] = None,
4653
copyfrom: Optional["LoadingOptions"] = None,
4754
original_doc: Optional[Any] = None,
@@ -77,6 +84,10 @@ def __init__(
7784
else:
7885
self.fetcher = fetcher
7986

87+
self.cache = (
88+
self.fetcher.cache if isinstance(self.fetcher, MemoryCachingFetcher) else {}
89+
)
90+
8091
self.vocab = _vocab
8192
self.rvocab = _rvocab
8293

@@ -87,6 +98,42 @@ def __init__(
8798
self.vocab[k] = v
8899
self.rvocab[v] = k
89100

101+
@property
102+
def graph(self) -> Graph:
103+
"""Generate a merged rdflib.Graph from all entries in self.schemas."""
104+
graph = Graph()
105+
if not self.schemas:
106+
return graph
107+
key = str(hash(tuple(self.schemas)))
108+
if key in self.cache:
109+
return cast(Graph, self.cache[key])
110+
for schema in self.schemas:
111+
fetchurl = (
112+
self.fetcher.urljoin(self.fileuri, schema)
113+
if self.fileuri is not None
114+
else pathlib.Path(schema).resolve().as_uri()
115+
)
116+
try:
117+
if fetchurl not in self.cache or self.cache[fetchurl] is True:
118+
_logger.debug("Getting external schema %s", fetchurl)
119+
content = self.fetcher.fetch_text(fetchurl)
120+
self.cache[fetchurl] = newGraph = Graph()
121+
for fmt in ["xml", "turtle"]:
122+
try:
123+
newGraph.parse(
124+
data=content, format=fmt, publicID=str(fetchurl)
125+
)
126+
break
127+
except (xml.sax.SAXParseException, TypeError, BadSyntax):
128+
pass
129+
graph += self.cache[fetchurl]
130+
except Exception as e:
131+
_logger.warning(
132+
"Could not load extension schema %s: %s", fetchurl, str(e)
133+
)
134+
self.cache[key] = graph
135+
return graph
136+
90137

91138
class Savable(ABC):
92139
"""Mark classes than have a save() and fromDoc() function."""
@@ -138,7 +185,6 @@ def save(
138185
base_url: str = "",
139186
relative_uris: bool = True,
140187
) -> save_type:
141-
142188
if isinstance(val, Savable):
143189
return val.save(top=top, base_url=base_url, relative_uris=relative_uris)
144190
if isinstance(val, MutableSequence):

schema_salad/python_codegen_support.py

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
"""Template code used by python_codegen.py."""
22
import copy
3+
import logging
34
import os
45
import pathlib
56
import re
67
import tempfile
78
import uuid as _uuid__ # pylint: disable=unused-import # noqa: F401
9+
import xml.sax # nosec
810
from abc import ABC, abstractmethod
911
from io import StringIO
1012
from typing import (
@@ -18,27 +20,32 @@
1820
Tuple,
1921
Type,
2022
Union,
23+
cast,
2124
)
2225
from urllib.parse import quote, urlparse, urlsplit, urlunsplit
2326
from urllib.request import pathname2url
2427

28+
from rdflib import Graph
29+
from rdflib.plugins.parsers.notation3 import BadSyntax
2530
from ruamel.yaml.comments import CommentedMap
2631

2732
from schema_salad.exceptions import SchemaSaladException, ValidationException
28-
from schema_salad.fetcher import DefaultFetcher, Fetcher
33+
from schema_salad.fetcher import DefaultFetcher, Fetcher, MemoryCachingFetcher
2934
from schema_salad.sourceline import SourceLine, add_lc_filename
3035
from schema_salad.utils import yaml_no_ts # requires schema-salad v8.2+
3136

3237
_vocab: Dict[str, str] = {}
3338
_rvocab: Dict[str, str] = {}
3439

40+
_logger = logging.getLogger("salad")
41+
3542

3643
class LoadingOptions:
3744
def __init__(
3845
self,
3946
fetcher: Optional[Fetcher] = None,
4047
namespaces: Optional[Dict[str, str]] = None,
41-
schemas: Optional[Dict[str, str]] = None,
48+
schemas: Optional[List[str]] = None,
4249
fileuri: Optional[str] = None,
4350
copyfrom: Optional["LoadingOptions"] = None,
4451
original_doc: Optional[Any] = None,
@@ -74,6 +81,10 @@ def __init__(
7481
else:
7582
self.fetcher = fetcher
7683

84+
self.cache = (
85+
self.fetcher.cache if isinstance(self.fetcher, MemoryCachingFetcher) else {}
86+
)
87+
7788
self.vocab = _vocab
7889
self.rvocab = _rvocab
7990

@@ -84,6 +95,42 @@ def __init__(
8495
self.vocab[k] = v
8596
self.rvocab[v] = k
8697

98+
@property
99+
def graph(self) -> Graph:
100+
"""Generate a merged rdflib.Graph from all entries in self.schemas."""
101+
graph = Graph()
102+
if not self.schemas:
103+
return graph
104+
key = str(hash(tuple(self.schemas)))
105+
if key in self.cache:
106+
return cast(Graph, self.cache[key])
107+
for schema in self.schemas:
108+
fetchurl = (
109+
self.fetcher.urljoin(self.fileuri, schema)
110+
if self.fileuri is not None
111+
else pathlib.Path(schema).resolve().as_uri()
112+
)
113+
try:
114+
if fetchurl not in self.cache or self.cache[fetchurl] is True:
115+
_logger.debug("Getting external schema %s", fetchurl)
116+
content = self.fetcher.fetch_text(fetchurl)
117+
self.cache[fetchurl] = newGraph = Graph()
118+
for fmt in ["xml", "turtle"]:
119+
try:
120+
newGraph.parse(
121+
data=content, format=fmt, publicID=str(fetchurl)
122+
)
123+
break
124+
except (xml.sax.SAXParseException, TypeError, BadSyntax):
125+
pass
126+
graph += self.cache[fetchurl]
127+
except Exception as e:
128+
_logger.warning(
129+
"Could not load extension schema %s: %s", fetchurl, str(e)
130+
)
131+
self.cache[key] = graph
132+
return graph
133+
87134

88135
class Savable(ABC):
89136
"""Mark classes than have a save() and fromDoc() function."""
@@ -135,7 +182,6 @@ def save(
135182
base_url: str = "",
136183
relative_uris: bool = True,
137184
) -> save_type:
138-
139185
if isinstance(val, Savable):
140186
return val.save(top=top, base_url=base_url, relative_uris=relative_uris)
141187
if isinstance(val, MutableSequence):

schema_salad/tests/test_python_codegen.py

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,20 @@
11
import inspect
22
import os
3+
import pathlib
34
from pathlib import Path
45
from typing import Any, Dict, List, Optional, cast
56

7+
from rdflib import Graph
8+
from rdflib.compare import to_isomorphic
9+
from requests import Session
10+
611
import schema_salad.metaschema as cg_metaschema
712
from schema_salad import codegen
813
from schema_salad.avro.schema import Names
14+
from schema_salad.fetcher import DefaultFetcher
15+
from schema_salad.python_codegen_support import LoadingOptions
916
from schema_salad.schema import load_schema
10-
11-
from .util import basket_file_uri, cwl_file_uri, metaschema_file_uri
17+
from .util import basket_file_uri, cwl_file_uri, get_data, metaschema_file_uri
1218

1319

1420
def test_cwl_gen(tmp_path: Path) -> None:
@@ -90,3 +96,30 @@ def test_use_of_package_for_parser_info(tmp_path: Path) -> None:
9096
assert os.path.exists(src_target)
9197
with open(src_target) as f:
9298
assert 'def parser_info() -> str:\n return "cwl"' in f.read()
99+
100+
101+
def test_graph_property() -> None:
102+
"""Test the RDFLib Graph representation of the `$schemas` directive."""
103+
schema = cast(str, get_data("tests/EDAM.owl"))
104+
fetcher = DefaultFetcher({}, Session())
105+
fetchurl = pathlib.Path(schema).resolve().as_uri()
106+
content = fetcher.fetch_text(fetchurl)
107+
graph = Graph()
108+
graph.parse(data=content, format="xml", publicID=fetchurl)
109+
loading_options = LoadingOptions(schemas=[schema])
110+
assert to_isomorphic(graph) == to_isomorphic(loading_options.graph)
111+
112+
113+
def test_graph_property_cache() -> None:
114+
"""Test that LoadingOptions properly cache the `$schemas` RDFLib Graph representations."""
115+
schema = cast(str, get_data("tests/EDAM.owl"))
116+
loading_options = LoadingOptions(schemas=[schema])
117+
graph1 = loading_options.graph
118+
graph2 = loading_options.graph
119+
assert graph1 == graph2
120+
121+
122+
def test_graph_property_empty_schema() -> None:
123+
"""Test that an empty RDFLib Graph is returned when not `$schemas` directive is present."""
124+
loading_options = LoadingOptions()
125+
assert to_isomorphic(loading_options.graph) == to_isomorphic(Graph())

0 commit comments

Comments
 (0)