Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,6 @@ mypy-stubs/ruamel/yaml
venv/
.cache/
.pytest_cache/

# PyCharm
.idea/
10 changes: 10 additions & 0 deletions mypy-stubs/rdflib/compare.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from typing import Dict, Union

from rdflib.graph import ConjunctiveGraph, Graph

Stats = Dict[str, Union[int, str]]

class IsomorphicGraph(ConjunctiveGraph):
pass

def to_isomorphic(graph: Graph = ...) -> IsomorphicGraph: ...
41 changes: 27 additions & 14 deletions schema_salad/fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
import os
import re
import sys
import urllib
import urllib.parse
import urllib.request
from abc import ABC, abstractmethod
from typing import List, Optional

import requests
Expand All @@ -15,40 +17,51 @@
_logger = logging.getLogger("salad")


class Fetcher:
def __init__(
self,
cache: CacheType,
session: Optional[requests.sessions.Session],
) -> None:
pass
class Fetcher(ABC):
"""Fetch resources from URIs."""

@abstractmethod
def fetch_text(self, url: str, content_types: Optional[List[str]] = None) -> str:
raise NotImplementedError()
"""Retrieve the given resource as a string."""
...

@abstractmethod
def check_exists(self, url: str) -> bool:
raise NotImplementedError()
"""Check if the given resource exists."""
...

@abstractmethod
def urljoin(self, base_url: str, url: str) -> str:
raise NotImplementedError()
...

schemes = ["file", "http", "https", "mailto"]

def supported_schemes(self) -> List[str]:
"""Return the list of supported URI schemes."""
return self.schemes


class DefaultFetcher(Fetcher):
class MemoryCachingFetcher(Fetcher):
"""Fetcher that caches resources in memory after retrieval."""

def __init__(self, cache: CacheType) -> None:
"""Create a MemoryCachingFetcher object."""
self.cache = cache


class DefaultFetcher(MemoryCachingFetcher):
"""The default Fetcher implementation."""

def __init__(
self,
cache: CacheType,
session: Optional[requests.sessions.Session],
) -> None:
self.cache = cache
"""Create a DefaultFetcher object."""
super().__init__(cache)
self.session = session

def fetch_text(self, url: str, content_types: Optional[List[str]] = None) -> str:
"""Retrieve the given resource as a string."""
result = self.cache.get(url, None)
if isinstance(result, str):
return result
Expand Down
52 changes: 49 additions & 3 deletions schema_salad/metaschema.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
# The code itself is released under the Apache 2.0 license and the help text is
# subject to the license of the original schema.
import copy
import logging
import os
import pathlib
import re
import tempfile
import uuid as _uuid__ # pylint: disable=unused-import # noqa: F401
import xml.sax # nosec
from abc import ABC, abstractmethod
from io import StringIO
from typing import (
Expand All @@ -21,27 +23,32 @@
Tuple,
Type,
Union,
cast,
)
from urllib.parse import quote, urlparse, urlsplit, urlunsplit
from urllib.request import pathname2url

from rdflib import Graph
from rdflib.plugins.parsers.notation3 import BadSyntax
from ruamel.yaml.comments import CommentedMap

from schema_salad.exceptions import SchemaSaladException, ValidationException
from schema_salad.fetcher import DefaultFetcher, Fetcher
from schema_salad.fetcher import DefaultFetcher, Fetcher, MemoryCachingFetcher
from schema_salad.sourceline import SourceLine, add_lc_filename
from schema_salad.utils import yaml_no_ts # requires schema-salad v8.2+

_vocab: Dict[str, str] = {}
_rvocab: Dict[str, str] = {}

_logger = logging.getLogger("salad")


class LoadingOptions:
def __init__(
self,
fetcher: Optional[Fetcher] = None,
namespaces: Optional[Dict[str, str]] = None,
schemas: Optional[Dict[str, str]] = None,
schemas: Optional[List[str]] = None,
fileuri: Optional[str] = None,
copyfrom: Optional["LoadingOptions"] = None,
original_doc: Optional[Any] = None,
Expand Down Expand Up @@ -77,6 +84,10 @@ def __init__(
else:
self.fetcher = fetcher

self.cache = (
self.fetcher.cache if isinstance(self.fetcher, MemoryCachingFetcher) else {}
)

self.vocab = _vocab
self.rvocab = _rvocab

Expand All @@ -87,6 +98,42 @@ def __init__(
self.vocab[k] = v
self.rvocab[v] = k

@property
def graph(self) -> Graph:
"""Generate a merged rdflib.Graph from all entries in self.schemas."""
graph = Graph()
if not self.schemas:
return graph
key = str(hash(tuple(self.schemas)))
if key in self.cache:
return cast(Graph, self.cache[key])
for schema in self.schemas:
fetchurl = (
self.fetcher.urljoin(self.fileuri, schema)
if self.fileuri is not None
else pathlib.Path(schema).resolve().as_uri()
)
try:
if fetchurl not in self.cache or self.cache[fetchurl] is True:
_logger.debug("Getting external schema %s", fetchurl)
content = self.fetcher.fetch_text(fetchurl)
self.cache[fetchurl] = newGraph = Graph()
for fmt in ["xml", "turtle"]:
try:
newGraph.parse(
data=content, format=fmt, publicID=str(fetchurl)
)
break
except (xml.sax.SAXParseException, TypeError, BadSyntax):
pass
graph += self.cache[fetchurl]
except Exception as e:
_logger.warning(
"Could not load extension schema %s: %s", fetchurl, str(e)
)
self.cache[key] = graph
return graph


class Savable(ABC):
"""Mark classes than have a save() and fromDoc() function."""
Expand Down Expand Up @@ -138,7 +185,6 @@ def save(
base_url: str = "",
relative_uris: bool = True,
) -> save_type:

if isinstance(val, Savable):
return val.save(top=top, base_url=base_url, relative_uris=relative_uris)
if isinstance(val, MutableSequence):
Expand Down
52 changes: 49 additions & 3 deletions schema_salad/python_codegen_support.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
"""Template code used by python_codegen.py."""
import copy
import logging
import os
import pathlib
import re
import tempfile
import uuid as _uuid__ # pylint: disable=unused-import # noqa: F401
import xml.sax # nosec
from abc import ABC, abstractmethod
from io import StringIO
from typing import (
Expand All @@ -18,27 +20,32 @@
Tuple,
Type,
Union,
cast,
)
from urllib.parse import quote, urlparse, urlsplit, urlunsplit
from urllib.request import pathname2url

from rdflib import Graph
from rdflib.plugins.parsers.notation3 import BadSyntax
from ruamel.yaml.comments import CommentedMap

from schema_salad.exceptions import SchemaSaladException, ValidationException
from schema_salad.fetcher import DefaultFetcher, Fetcher
from schema_salad.fetcher import DefaultFetcher, Fetcher, MemoryCachingFetcher
from schema_salad.sourceline import SourceLine, add_lc_filename
from schema_salad.utils import yaml_no_ts # requires schema-salad v8.2+

_vocab: Dict[str, str] = {}
_rvocab: Dict[str, str] = {}

_logger = logging.getLogger("salad")


class LoadingOptions:
def __init__(
self,
fetcher: Optional[Fetcher] = None,
namespaces: Optional[Dict[str, str]] = None,
schemas: Optional[Dict[str, str]] = None,
schemas: Optional[List[str]] = None,
fileuri: Optional[str] = None,
copyfrom: Optional["LoadingOptions"] = None,
original_doc: Optional[Any] = None,
Expand Down Expand Up @@ -74,6 +81,10 @@ def __init__(
else:
self.fetcher = fetcher

self.cache = (
self.fetcher.cache if isinstance(self.fetcher, MemoryCachingFetcher) else {}
)

self.vocab = _vocab
self.rvocab = _rvocab

Expand All @@ -84,6 +95,42 @@ def __init__(
self.vocab[k] = v
self.rvocab[v] = k

@property
def graph(self) -> Graph:
"""Generate a merged rdflib.Graph from all entries in self.schemas."""
graph = Graph()
if not self.schemas:
return graph
key = str(hash(tuple(self.schemas)))
if key in self.cache:
return cast(Graph, self.cache[key])
for schema in self.schemas:
fetchurl = (
self.fetcher.urljoin(self.fileuri, schema)
if self.fileuri is not None
else pathlib.Path(schema).resolve().as_uri()
)
try:
if fetchurl not in self.cache or self.cache[fetchurl] is True:
_logger.debug("Getting external schema %s", fetchurl)
content = self.fetcher.fetch_text(fetchurl)
self.cache[fetchurl] = newGraph = Graph()
for fmt in ["xml", "turtle"]:
try:
newGraph.parse(
data=content, format=fmt, publicID=str(fetchurl)
)
break
except (xml.sax.SAXParseException, TypeError, BadSyntax):
pass
graph += self.cache[fetchurl]
except Exception as e:
_logger.warning(
"Could not load extension schema %s: %s", fetchurl, str(e)
)
self.cache[key] = graph
return graph


class Savable(ABC):
"""Mark classes than have a save() and fromDoc() function."""
Expand Down Expand Up @@ -135,7 +182,6 @@ def save(
base_url: str = "",
relative_uris: bool = True,
) -> save_type:

if isinstance(val, Savable):
return val.save(top=top, base_url=base_url, relative_uris=relative_uris)
if isinstance(val, MutableSequence):
Expand Down
37 changes: 35 additions & 2 deletions schema_salad/tests/test_python_codegen.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,20 @@
import inspect
import os
import pathlib
from pathlib import Path
from typing import Any, Dict, List, Optional, cast

from rdflib import Graph
from rdflib.compare import to_isomorphic
from requests import Session

import schema_salad.metaschema as cg_metaschema
from schema_salad import codegen
from schema_salad.avro.schema import Names
from schema_salad.fetcher import DefaultFetcher
from schema_salad.python_codegen_support import LoadingOptions
from schema_salad.schema import load_schema

from .util import basket_file_uri, cwl_file_uri, metaschema_file_uri
from .util import basket_file_uri, cwl_file_uri, get_data, metaschema_file_uri


def test_cwl_gen(tmp_path: Path) -> None:
Expand Down Expand Up @@ -90,3 +96,30 @@ def test_use_of_package_for_parser_info(tmp_path: Path) -> None:
assert os.path.exists(src_target)
with open(src_target) as f:
assert 'def parser_info() -> str:\n return "cwl"' in f.read()


def test_graph_property() -> None:
"""Test the RDFLib Graph representation of the `$schemas` directive."""
schema = cast(str, get_data("tests/EDAM.owl"))
fetcher = DefaultFetcher({}, Session())
fetchurl = pathlib.Path(schema).resolve().as_uri()
content = fetcher.fetch_text(fetchurl)
graph = Graph()
graph.parse(data=content, format="xml", publicID=fetchurl)
loading_options = LoadingOptions(schemas=[schema])
assert to_isomorphic(graph) == to_isomorphic(loading_options.graph)


def test_graph_property_cache() -> None:
"""Test that LoadingOptions properly cache the `$schemas` RDFLib Graph representations."""
schema = cast(str, get_data("tests/EDAM.owl"))
loading_options = LoadingOptions(schemas=[schema])
graph1 = loading_options.graph
graph2 = loading_options.graph
assert graph1 == graph2


def test_graph_property_empty_schema() -> None:
"""Test that an empty RDFLib Graph is returned when not `$schemas` directive is present."""
loading_options = LoadingOptions()
assert to_isomorphic(loading_options.graph) == to_isomorphic(Graph())