Skip to content

Commit

Permalink
refactor: identify temporary text values with UUID instead of text ha…
Browse files Browse the repository at this point in the history
…sh (DEV-2790) (#558)
  • Loading branch information
BalduinLandolt committed Oct 9, 2023
1 parent 1e2c8f2 commit 08624b2
Show file tree
Hide file tree
Showing 8 changed files with 192 additions and 347 deletions.
16 changes: 10 additions & 6 deletions src/dsp_tools/models/value.py
@@ -1,4 +1,5 @@
# pylint: disable=missing-class-docstring,missing-function-docstring
from __future__ import annotations

from typing import Any, Optional, Union

Expand Down Expand Up @@ -32,12 +33,15 @@ def find_ids_referenced_in_salsah_links(self) -> set[str]:
def replace(self, fromStr: str, toStr: str) -> None:
self.__xmlstr = self.__xmlstr.replace(fromStr, toStr)

def replace_one_id_with_iri_in_salsah_link(self, internal_id: str, iri: str) -> None:
self.__xmlstr = regex.sub(
pattern=f'href="IRI:{internal_id}:IRI"',
repl=f'href="{iri}"',
string=self.__xmlstr,
)
def with_iris(self, id_2_iri: dict[str, str]) -> KnoraStandoffXml:
"""
Returns a copy of this object, where all internal ids are replaced with iris according to the provided mapping.
"""
s = self.__xmlstr
for internal_id in self.find_ids_referenced_in_salsah_links():
iri = id_2_iri[internal_id]
s = s.replace(f'href="IRI:{internal_id}:IRI"', f'href="{iri}"')
return KnoraStandoffXml(s)


class Value:
Expand Down
Empty file.
51 changes: 51 additions & 0 deletions src/dsp_tools/utils/xmlupload/stash/stash_models.py
@@ -0,0 +1,51 @@
from __future__ import annotations

from dataclasses import dataclass

from dsp_tools.models.value import KnoraStandoffXml
from dsp_tools.models.xmlresource import XMLResource


@dataclass(frozen=True)
class StandoffStashItem:
"""
Holds information about a single stashed XML text value.
"""

uuid: str
prop_name: str
value: KnoraStandoffXml
# Permissions missing still


@dataclass(frozen=True)
class StandoffStash:
"""
Holds information about a number of stashed XML text values, organized by resource instance.
"""

res_2_stash_items: dict[str, list[StandoffStashItem]]
res_2_xmlres: dict[str, XMLResource]

@staticmethod
def make(tups: list[tuple[XMLResource, StandoffStashItem]]) -> StandoffStash | None:
"""
Factory method for StandoffStash.
Args:
tups: A list of tuples of XMLResource and StandoffStashItem.
Returns:
StandoffStash | None: A StandoffStash object or None, if an empty list was passed.
"""
if not tups:
return None
res_2_stash_items = {}
res_2_xmlres = {}
for xmlres, stash_item in tups:
if xmlres.id not in res_2_stash_items:
res_2_stash_items[xmlres.id] = [stash_item]
res_2_xmlres[xmlres.id] = xmlres
else:
res_2_stash_items[xmlres.id].append(stash_item)
return StandoffStash(res_2_stash_items, res_2_xmlres)
38 changes: 17 additions & 21 deletions src/dsp_tools/utils/xmlupload/stash_circular_references.py
@@ -1,13 +1,14 @@
from __future__ import annotations

from datetime import datetime
from typing import cast
from uuid import uuid4

from dsp_tools.models.exceptions import BaseError
from dsp_tools.models.value import KnoraStandoffXml
from dsp_tools.models.xmlproperty import XMLProperty
from dsp_tools.models.xmlresource import XMLResource
from dsp_tools.utils.create_logger import get_logger
from dsp_tools.utils.xmlupload.stash.stash_models import StandoffStash, StandoffStashItem

logger = get_logger(__name__)

Expand All @@ -16,34 +17,31 @@ def _stash_circular_references(
nok_resources: list[XMLResource],
ok_res_ids: set[str],
ok_resources: list[XMLResource],
stashed_xml_texts: dict[XMLResource, dict[XMLProperty, dict[str, KnoraStandoffXml]]],
stashed_resptr_props: dict[XMLResource, dict[XMLProperty, list[str]]],
) -> tuple[
list[XMLResource],
set[str],
list[XMLResource],
dict[XMLResource, dict[XMLProperty, dict[str, KnoraStandoffXml]]],
StandoffStash | None,
dict[XMLResource, dict[XMLProperty, list[str]]],
]:
"""
Raises:
BaseError
"""
stashed_standoff_values: list[tuple[XMLResource, StandoffStashItem]] = []
for res in nok_resources.copy():
for link_prop in res.get_props_with_links():
if link_prop.valtype == "text":
for value in link_prop.values:
if value.resrefs and not all(_id in ok_res_ids for _id in value.resrefs):
# stash this XML text, replace it by its hash, and remove the
# problematic resrefs from the XMLValue's resrefs list
value_hash = str(hash(f"{value.value}{datetime.now()}"))
if res not in stashed_xml_texts:
stashed_xml_texts[res] = {link_prop: {value_hash: cast(KnoraStandoffXml, value.value)}}
elif link_prop not in stashed_xml_texts[res]:
stashed_xml_texts[res][link_prop] = {value_hash: cast(KnoraStandoffXml, value.value)}
else:
stashed_xml_texts[res][link_prop][value_hash] = cast(KnoraStandoffXml, value.value)
value.value = KnoraStandoffXml(value_hash)
# replace the problematic XML with a UUID
# and remove the problematic resrefs from the XMLValue's resrefs list
standoff_xml = cast(KnoraStandoffXml, value.value)
uuid = str(uuid4())
stash_item = StandoffStashItem(uuid=uuid, prop_name=link_prop.name, value=standoff_xml)
stashed_standoff_values.append((res, stash_item))
value.value = KnoraStandoffXml(uuid)
value.resrefs = [_id for _id in value.resrefs if _id in ok_res_ids]
elif link_prop.valtype == "resptr":
for value in link_prop.values.copy():
Expand All @@ -70,17 +68,15 @@ def _stash_circular_references(
ok_res_ids.add(res.id)
nok_resources.remove(res)

return nok_resources, ok_res_ids, ok_resources, stashed_xml_texts, stashed_resptr_props
standoff_stash = StandoffStash.make(stashed_standoff_values)

return nok_resources, ok_res_ids, ok_resources, standoff_stash, stashed_resptr_props


def remove_circular_references(
resources: list[XMLResource],
verbose: bool,
) -> tuple[
list[XMLResource],
dict[XMLResource, dict[XMLProperty, dict[str, KnoraStandoffXml]]],
dict[XMLResource, dict[XMLProperty, list[str]]],
]:
) -> tuple[list[XMLResource], StandoffStash | None, dict[XMLResource, dict[XMLProperty, list[str]]],]:
"""
Temporarily removes problematic resource-references from a list of resources.
A reference is problematic if it creates a circle (circular references).
Expand All @@ -102,7 +98,7 @@ def remove_circular_references(
print("Checking resources for unresolvable references...")
logger.info("Checking resources for unresolvable references...")

stashed_xml_texts: dict[XMLResource, dict[XMLProperty, dict[str, KnoraStandoffXml]]] = {}
stashed_xml_texts: StandoffStash | None = None
stashed_resptr_props: dict[XMLResource, dict[XMLProperty, list[str]]] = {}

# sort the resources according to outgoing resptrs
Expand Down Expand Up @@ -137,7 +133,6 @@ def remove_circular_references(
nok_resources=nok_resources,
ok_res_ids=ok_res_ids,
ok_resources=ok_resources,
stashed_xml_texts=stashed_xml_texts,
stashed_resptr_props=stashed_resptr_props,
)
nok_len = len(nok_resources)
Expand All @@ -146,4 +141,5 @@ def remove_circular_references(
if verbose:
print(f"{cnt}. ordering pass finished.")
logger.info(f"{cnt}. ordering pass finished.")

return ok_resources, stashed_xml_texts, stashed_resptr_props

0 comments on commit 08624b2

Please sign in to comment.