Skip to content

Commit

Permalink
chore: tidy up KnoraStandoffXml class (#597)
Browse files Browse the repository at this point in the history
  • Loading branch information
jnussbaum committed Oct 26, 2023
1 parent 72f710c commit c430b67
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 9 deletions.
4 changes: 0 additions & 4 deletions src/dsp_tools/models/value.py
Expand Up @@ -15,7 +15,6 @@
class KnoraStandoffXml:
"""Used to handle XML strings for standoff markup"""

__iriregexp = regex.compile(r"IRI:[^:]*:IRI")
__xmlstr: str

def __init__(self, xmlstr: str) -> None:
Expand All @@ -24,9 +23,6 @@ def __init__(self, xmlstr: str) -> None:
def __str__(self) -> str:
return self.__xmlstr

def get_all_iris(self) -> Optional[list[str]]:
return self.__iriregexp.findall(self.__xmlstr)

def find_ids_referenced_in_salsah_links(self) -> set[str]:
return set(regex.findall(pattern='href="IRI:(.*?):IRI"', string=self.__xmlstr))

Expand Down
7 changes: 3 additions & 4 deletions src/dsp_tools/models/xmlresource.py
Expand Up @@ -146,16 +146,15 @@ def get_propvals(
v = value.value # if we do not find the id, we assume it's a valid DSP IRI
elif prop.valtype == "text":
if isinstance(value.value, KnoraStandoffXml):
iri_refs = value.value.get_all_iris()
for iri_ref in iri_refs or []:
res_id = iri_ref.split(":")[1]
res_ids = value.value.find_ids_referenced_in_salsah_links()
for res_id in res_ids:
iri = resiri_lookup.get(res_id)
if not iri:
raise BaseError(
f"Resource '{self.id}' cannot be created, because it contains a salsah-Link to "
f"the following invalid resource: '{res_id}'"
)
value.value.replace(iri_ref, iri)
value.value.replace(f"IRI:{res_id}:IRI", iri)
v = value.value
else:
v = value.value
Expand Down
2 changes: 1 addition & 1 deletion src/dsp_tools/models/xmlvalue.py
Expand Up @@ -27,7 +27,7 @@ def __init__(
xmlstr_orig = etree.tostring(node, encoding="unicode", method="xml")
xmlstr_cleaned = self._cleanup_formatted_text(xmlstr_orig)
self.value = KnoraStandoffXml(xmlstr_cleaned)
self.resrefs = list({x.split(":")[1] for x in self.value.get_all_iris() or []})
self.resrefs = list(self.value.find_ids_referenced_in_salsah_links())
elif val_type == "text" and node.get("encoding") == "utf8":
str_orig = "".join(node.itertext())
str_cleaned = self._cleanup_unformatted_text(str_orig)
Expand Down

0 comments on commit c430b67

Please sign in to comment.