Skip to content

Commit

Permalink
refactor: use iri util consistently (DEV-2951) (#642)
Browse files Browse the repository at this point in the history
  • Loading branch information
jnussbaum committed Nov 15, 2023
1 parent 406a845 commit 911cfa5
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 15 deletions.
Expand Up @@ -7,6 +7,7 @@
from lxml import etree

from dsp_tools.commands.xmlupload.stash.graph_models import Cost, Edge, ResptrLink, XMLLink
from dsp_tools.utils.iri_util import is_resource_iri


def create_info_from_xml_for_graph(
Expand Down Expand Up @@ -54,7 +55,7 @@ def _create_resptr_link_objects(subject_id: str, resptr_prop: etree._Element) ->
resptr_links = []
for resptr in resptr_prop.getchildren():
resptr.text = cast(str, resptr.text)
if not regex.search(r"https?://rdfh.ch/[a-fA-F0-9]{4}/[\w-]{22}", resptr.text):
if not is_resource_iri(resptr.text):
link_object = ResptrLink(subject_id, resptr.text)
# this UUID is so that the links that were stashed can be identified in the XML data file
resptr.attrib["linkUUID"] = link_object.link_uuid
Expand Down
20 changes: 6 additions & 14 deletions src/dsp_tools/models/helpers.py
Expand Up @@ -9,6 +9,7 @@
import regex

from dsp_tools.models.exceptions import BaseError
from dsp_tools.utils.iri_util import is_iri

#
# here we do some data typing that should help
Expand All @@ -32,15 +33,6 @@ class OntoIri:
ContextType = dict[str, OntoIri]


class IriTest: # pylint: disable=too-few-public-methods
__iri_regexp = regex.compile("^(http)s?://([\\w\\.\\-~]+)?(:\\d{,6})?(/[\\w\\-~]+)*(#[\\w\\-~]*)?")

@classmethod
def test(cls, val: str) -> bool:
m = cls.__iri_regexp.match(val)
return m.span()[1] == len(val) if m else False


@unique
class Actions(Enum):
Create = 1
Expand Down Expand Up @@ -225,7 +217,7 @@ def iri_from_prefix(self, prefix: str) -> Optional[str]:
:return: The full IRI without trailing "#"
"""
# if self.__is_iri(prefix):
if IriTest.test(prefix):
if is_iri(prefix):
return prefix
if self._context.get(prefix) is not None:
return self._context.get(prefix).iri
Expand All @@ -243,7 +235,7 @@ def prefix_from_iri(self, iri: str) -> Optional[str]:
:return: the prefix of this context element, or None, if not found
"""
# if not self.__is_iri(iri):
if not IriTest.test(iri):
if not is_iri(iri):
raise BaseError("String does not conform to IRI patter: " + iri)
if iri.endswith("#"):
iri = iri[:-1]
Expand Down Expand Up @@ -278,7 +270,7 @@ def get_qualified_iri(self, val: Optional[str]) -> Optional[str]:
"""
if not val:
return None
if IriTest.test(val):
if is_iri(val):
return val
tmp = val.split(":")
if len(tmp) < 2:
Expand Down Expand Up @@ -313,7 +305,7 @@ def get_prefixed_iri(self, iri: Optional[str]) -> Optional[str]:
if m and m.span()[1] == len(iri):
return iri

if not IriTest.test(iri):
if not is_iri(iri):
raise BaseError(f"The IRI '{iri}' does not conform to the IRI pattern.")

split_point = iri.find("#")
Expand Down Expand Up @@ -356,7 +348,7 @@ def reduce_iri(self, iri_str: str, onto_name: Optional[str] = None) -> str:
knora_api = self.prefix_from_iri("http://api.knora.org/ontology/knora-api/v2#")
salsah_gui = self.prefix_from_iri("http://api.knora.org/ontology/salsah-gui/v2#")

if IriTest.test(iri_str):
if is_iri(iri_str):
if self.get_prefixed_iri(iri_str):
iri_str = self.get_prefixed_iri(iri_str)
tmp = iri_str.split(":")
Expand Down
6 changes: 6 additions & 0 deletions src/dsp_tools/utils/iri_util.py
@@ -1,8 +1,14 @@
import regex

_iri_pattern = r"^(http)s?://[\w\.\-~]*(:\d{,6})?(/[\w\-~]+)*(#[\w\-~]*)?"
_resource_iri_pattern = r"https?://rdfh.ch/[a-fA-F0-9]{4}/[\w-]{22}"


def is_iri(s: str) -> bool:
"""Checks whether a string is a valid IRI."""
return regex.fullmatch(_iri_pattern, s) is not None


def is_resource_iri(s: str) -> bool:
"""Checks whether a string is a valid resource IRI."""
return regex.fullmatch(_resource_iri_pattern, s) is not None

0 comments on commit 911cfa5

Please sign in to comment.