Skip to content

Commit

Permalink
refactor: create specific objects for stash information (DEV-2787) (#557
Browse files Browse the repository at this point in the history
)
  • Loading branch information
BalduinLandolt committed Oct 12, 2023
1 parent e8fb38b commit b38c2d3
Show file tree
Hide file tree
Showing 6 changed files with 191 additions and 323 deletions.
66 changes: 60 additions & 6 deletions src/dsp_tools/utils/xmlupload/stash/stash_models.py
@@ -1,16 +1,15 @@
from __future__ import annotations

from dataclasses import dataclass
from itertools import groupby

from dsp_tools.models.value import KnoraStandoffXml
from dsp_tools.models.xmlresource import XMLResource


@dataclass(frozen=True)
class StandoffStashItem:
"""
Holds information about a single stashed XML text value.
"""
"""Holds information about a single stashed XML text value."""

uuid: str
prop_name: str
Expand All @@ -20,9 +19,7 @@ class StandoffStashItem:

@dataclass(frozen=True)
class StandoffStash:
"""
Holds information about a number of stashed XML text values, organized by resource instance.
"""
"""Holds information about a number of stashed XML text values, organized by resource instance."""

res_2_stash_items: dict[str, list[StandoffStashItem]]
res_2_xmlres: dict[str, XMLResource]
Expand All @@ -49,3 +46,60 @@ def make(tups: list[tuple[XMLResource, StandoffStashItem]]) -> StandoffStash | N
else:
res_2_stash_items[xmlres.id].append(stash_item)
return StandoffStash(res_2_stash_items, res_2_xmlres)


@dataclass(frozen=True)
class LinkValueStashItem:
"""Holds information about a single stashed link value."""

res_id: str
res_type: str
prop_name: str
target_id: str


@dataclass(frozen=True)
class LinkValueStash:
"""Holds information about a number of stashed link values (resptr-props), organized by resource instance."""

res_2_stash_items: dict[str, list[LinkValueStashItem]]

@staticmethod
def make(items: list[LinkValueStashItem]) -> LinkValueStash | None:
"""
Factory method for LinkValueStash.
Args:
items: A list of LinkValueStashItem.
Returns:
LinkValueStash | None: A LinkValueStash object or None, if an empty list was passed.
"""
if not items:
return None
grouped_objects = {k: list(vals) for k, vals in groupby(items, key=lambda x: x.res_id)}
return LinkValueStash(grouped_objects)


@dataclass(frozen=True)
class Stash:
"""Holds a standoff stash and a linkvalue stash"""

standoff_stash: StandoffStash | None
link_value_stash: LinkValueStash | None

@staticmethod
def make(standoff_stash: StandoffStash | None, link_value_stash: LinkValueStash | None) -> Stash | None:
"""
Factory method for Stash.
Args:
standoff_stash: A StandoffStash object or None.
link_value_stash: A LinkValueStash object or None.
Returns:
Stash: A Stash object, or None if both iunputs are None.
"""
if standoff_stash or link_value_stash:
return Stash(standoff_stash, link_value_stash)
return None
72 changes: 30 additions & 42 deletions src/dsp_tools/utils/xmlupload/stash_circular_references.py
Expand Up @@ -5,31 +5,31 @@

from dsp_tools.models.exceptions import BaseError
from dsp_tools.models.value import KnoraStandoffXml
from dsp_tools.models.xmlproperty import XMLProperty
from dsp_tools.models.xmlresource import XMLResource
from dsp_tools.utils.create_logger import get_logger
from dsp_tools.utils.xmlupload.stash.stash_models import StandoffStash, StandoffStashItem
from dsp_tools.utils.xmlupload.stash.stash_models import (
LinkValueStash,
LinkValueStashItem,
StandoffStash,
StandoffStashItem,
Stash,
)

logger = get_logger(__name__)


def _stash_circular_references(
nok_resources: list[XMLResource],
ok_res_ids: set[str],
ok_resources: list[XMLResource],
stashed_resptr_props: dict[XMLResource, dict[XMLProperty, list[str]]],
) -> tuple[
list[XMLResource],
set[str],
list[XMLResource],
StandoffStash | None,
dict[XMLResource, dict[XMLProperty, list[str]]],
]:
) -> tuple[list[XMLResource], set[str], list[XMLResource], Stash | None]:
"""
Raises:
BaseError
"""
stashed_standoff_values: list[tuple[XMLResource, StandoffStashItem]] = []
stashed_link_values: list[LinkValueStashItem] = []
ok_resources: list[XMLResource] = []

for res in nok_resources.copy():
for link_prop in res.get_props_with_links():
if link_prop.valtype == "text":
Expand All @@ -39,22 +39,21 @@ def _stash_circular_references(
# and remove the problematic resrefs from the XMLValue's resrefs list
standoff_xml = cast(KnoraStandoffXml, value.value)
uuid = str(uuid4())
stash_item = StandoffStashItem(uuid=uuid, prop_name=link_prop.name, value=standoff_xml)
stashed_standoff_values.append((res, stash_item))
standoff_stash_item = StandoffStashItem(uuid=uuid, prop_name=link_prop.name, value=standoff_xml)
stashed_standoff_values.append((res, standoff_stash_item))
value.value = KnoraStandoffXml(uuid)
value.resrefs = [_id for _id in value.resrefs if _id in ok_res_ids]
elif link_prop.valtype == "resptr":
for value in link_prop.values.copy():
if value.value not in ok_res_ids:
# value.value is the id of the target resource. stash it, then delete it
if res not in stashed_resptr_props:
stashed_resptr_props[res] = {}
stashed_resptr_props[res][link_prop] = [str(value.value)]
else:
if link_prop not in stashed_resptr_props[res]:
stashed_resptr_props[res][link_prop] = [str(value.value)]
else:
stashed_resptr_props[res][link_prop].append(str(value.value))
link_stash_item = LinkValueStashItem(
res_id=res.id,
res_type=res.restype,
prop_name=link_prop.name,
target_id=str(value.value),
)
stashed_link_values.append(link_stash_item)
link_prop.values.remove(value)
else:
logger.error("ERROR in remove_circular_references(): link_prop.valtype is neither text nor resptr.")
Expand All @@ -69,14 +68,16 @@ def _stash_circular_references(
nok_resources.remove(res)

standoff_stash = StandoffStash.make(stashed_standoff_values)
link_value_stash = LinkValueStash.make(stashed_link_values)
stash = Stash.make(standoff_stash, link_value_stash)

return nok_resources, ok_res_ids, ok_resources, standoff_stash, stashed_resptr_props
return nok_resources, ok_res_ids, ok_resources, stash


def remove_circular_references(
resources: list[XMLResource],
verbose: bool,
) -> tuple[list[XMLResource], StandoffStash | None, dict[XMLResource, dict[XMLProperty, list[str]]]]:
) -> tuple[list[XMLResource], Stash | None]:
"""
Temporarily removes problematic resource-references from a list of resources.
A reference is problematic if it creates a circle (circular references).
Expand All @@ -90,17 +91,14 @@ def remove_circular_references(
Returns:
list: list of cleaned resources
stashed_xml_texts: dict with the stashed XML texts
stashed_resptr_props: dict with the stashed resptr-props
stash: an object that contains the problematic references
"""

if verbose:
print("Checking resources for unresolvable references...")
logger.info("Checking resources for unresolvable references...")

stashed_xml_texts: StandoffStash | None = None
stashed_resptr_props: dict[XMLResource, dict[XMLProperty, list[str]]] = {}

stash: Stash | None = None
# sort the resources according to outgoing resptrs
ok_resources: list[XMLResource] = []
# resources with circular references
Expand All @@ -123,23 +121,13 @@ def remove_circular_references(
resources = nok_resources
if len(nok_resources) == nok_len:
# there are circular references. go through all problematic resources, and stash the problematic references.
(
nok_resources,
ok_res_ids,
ok_resources,
stashed_xml_texts,
stashed_resptr_props,
) = _stash_circular_references(
nok_resources=nok_resources,
ok_res_ids=ok_res_ids,
ok_resources=ok_resources,
stashed_resptr_props=stashed_resptr_props,
)
nok_resources, ok_res_ids, ok_res, stash = _stash_circular_references(nok_resources, ok_res_ids)
ok_resources.extend(ok_res)
nok_len = len(nok_resources)
nok_resources = []
cnt += 1
if verbose:
print(f"{cnt}. ordering pass finished.")
logger.info(f"{cnt}. ordering pass finished.")
logger.debug(f"{cnt}. ordering pass finished.")

return ok_resources, stashed_xml_texts, stashed_resptr_props
return ok_resources, stash

0 comments on commit b38c2d3

Please sign in to comment.