diff --git a/src/dsp_tools/models/value.py b/src/dsp_tools/models/value.py index 96fe1acfa..f6d3509fe 100644 --- a/src/dsp_tools/models/value.py +++ b/src/dsp_tools/models/value.py @@ -15,7 +15,6 @@ class KnoraStandoffXml: """Used to handle XML strings for standoff markup""" - __iriregexp = regex.compile(r"IRI:[^:]*:IRI") __xmlstr: str def __init__(self, xmlstr: str) -> None: @@ -24,9 +23,6 @@ def __init__(self, xmlstr: str) -> None: def __str__(self) -> str: return self.__xmlstr - def get_all_iris(self) -> Optional[list[str]]: - return self.__iriregexp.findall(self.__xmlstr) - def find_ids_referenced_in_salsah_links(self) -> set[str]: return set(regex.findall(pattern='href="IRI:(.*?):IRI"', string=self.__xmlstr)) diff --git a/src/dsp_tools/models/xmlresource.py b/src/dsp_tools/models/xmlresource.py index 6582e9900..3f6b894bc 100644 --- a/src/dsp_tools/models/xmlresource.py +++ b/src/dsp_tools/models/xmlresource.py @@ -146,16 +146,15 @@ def get_propvals( v = value.value # if we do not find the id, we assume it's a valid DSP IRI elif prop.valtype == "text": if isinstance(value.value, KnoraStandoffXml): - iri_refs = value.value.get_all_iris() - for iri_ref in iri_refs or []: - res_id = iri_ref.split(":")[1] + res_ids = value.value.find_ids_referenced_in_salsah_links() + for res_id in res_ids: iri = resiri_lookup.get(res_id) if not iri: raise BaseError( f"Resource '{self.id}' cannot be created, because it contains a salsah-Link to " f"the following invalid resource: '{res_id}'" ) - value.value.replace(iri_ref, iri) + value.value.replace(f"IRI:{res_id}:IRI", iri) v = value.value else: v = value.value diff --git a/src/dsp_tools/models/xmlvalue.py b/src/dsp_tools/models/xmlvalue.py index 39ad1e492..eab8ed5ec 100644 --- a/src/dsp_tools/models/xmlvalue.py +++ b/src/dsp_tools/models/xmlvalue.py @@ -27,7 +27,7 @@ def __init__( xmlstr_orig = etree.tostring(node, encoding="unicode", method="xml") xmlstr_cleaned = self._cleanup_formatted_text(xmlstr_orig) self.value = KnoraStandoffXml(xmlstr_cleaned) - self.resrefs = list({x.split(":")[1] for x in self.value.get_all_iris() or []}) + self.resrefs = list(self.value.find_ids_referenced_in_salsah_links()) elif val_type == "text" and node.get("encoding") == "utf8": str_orig = "".join(node.itertext()) str_cleaned = self._cleanup_unformatted_text(str_orig)