Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: create specific objects for stash information (DEV-2787) #557

Merged
merged 30 commits into from
Oct 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
f76d56f
create link stash object
BalduinLandolt Oct 6, 2023
ca7bfc1
create object for entire stash of a resource
BalduinLandolt Oct 6, 2023
e8c5e72
extract method
BalduinLandolt Oct 6, 2023
a8a9f3a
docstrings
BalduinLandolt Oct 6, 2023
2a67c02
Merge branch 'main' into feature/dev-2787-draft-stash-datastructure
BalduinLandolt Oct 10, 2023
c15fbd5
Merge branch 'main' into feature/dev-2787-draft-stash-datastructure
BalduinLandolt Oct 10, 2023
7315f38
introduce stash object
BalduinLandolt Oct 10, 2023
1bb76f4
improve stash
BalduinLandolt Oct 10, 2023
04e3785
change wiring
BalduinLandolt Oct 10, 2023
bab297e
integrate newly added stash concepts
BalduinLandolt Oct 11, 2023
d3e4619
Merge branch 'main' into feature/dev-2787-draft-stash-datastructure
BalduinLandolt Oct 11, 2023
0c49c5d
cleanup
BalduinLandolt Oct 11, 2023
d478a2a
cleanup
BalduinLandolt Oct 11, 2023
8450ad3
print infos
BalduinLandolt Oct 11, 2023
1cf6900
more prints
BalduinLandolt Oct 11, 2023
15d21b7
more prints
BalduinLandolt Oct 11, 2023
fc65af9
fix?
BalduinLandolt Oct 11, 2023
b0184a7
cleanup
BalduinLandolt Oct 11, 2023
17b6aa2
more print
BalduinLandolt Oct 11, 2023
8e3bacc
Update shared.py
BalduinLandolt Oct 11, 2023
3e47c72
Update shared.py
BalduinLandolt Oct 11, 2023
9dc5022
add quicker runner
BalduinLandolt Oct 11, 2023
a19cbf0
remove prints
BalduinLandolt Oct 11, 2023
ea240ee
fix?
BalduinLandolt Oct 11, 2023
05c9b95
cleanup
BalduinLandolt Oct 11, 2023
33710c2
Update xmlupload.py
BalduinLandolt Oct 11, 2023
2f8f8ef
cleanup
BalduinLandolt Oct 11, 2023
498bf5e
apply suggestions
BalduinLandolt Oct 12, 2023
2415d55
Merge branch 'main' into feature/dev-2787-draft-stash-datastructure
BalduinLandolt Oct 12, 2023
2e9e6af
Merge branch 'main' into feature/dev-2787-draft-stash-datastructure
BalduinLandolt Oct 12, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
66 changes: 60 additions & 6 deletions src/dsp_tools/utils/xmlupload/stash/stash_models.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
from __future__ import annotations

from dataclasses import dataclass
from itertools import groupby

from dsp_tools.models.value import KnoraStandoffXml
from dsp_tools.models.xmlresource import XMLResource


@dataclass(frozen=True)
class StandoffStashItem:
"""
Holds information about a single stashed XML text value.
"""
"""Holds information about a single stashed XML text value."""

uuid: str
prop_name: str
Expand All @@ -20,9 +19,7 @@ class StandoffStashItem:

@dataclass(frozen=True)
class StandoffStash:
"""
Holds information about a number of stashed XML text values, organized by resource instance.
"""
"""Holds information about a number of stashed XML text values, organized by resource instance."""

res_2_stash_items: dict[str, list[StandoffStashItem]]
res_2_xmlres: dict[str, XMLResource]
Expand All @@ -49,3 +46,60 @@ def make(tups: list[tuple[XMLResource, StandoffStashItem]]) -> StandoffStash | N
else:
res_2_stash_items[xmlres.id].append(stash_item)
return StandoffStash(res_2_stash_items, res_2_xmlres)


@dataclass(frozen=True)
class LinkValueStashItem:
"""Holds information about a single stashed link value."""

res_id: str
res_type: str
prop_name: str
target_id: str
BalduinLandolt marked this conversation as resolved.
Show resolved Hide resolved


@dataclass(frozen=True)
class LinkValueStash:
"""Holds information about a number of stashed link values (resptr-props), organized by resource instance."""

res_2_stash_items: dict[str, list[LinkValueStashItem]]

@staticmethod
def make(items: list[LinkValueStashItem]) -> LinkValueStash | None:
"""
Factory method for LinkValueStash.

Args:
items: A list of LinkValueStashItem.

Returns:
LinkValueStash | None: A LinkValueStash object or None, if an empty list was passed.
"""
if not items:
return None
grouped_objects = {k: list(vals) for k, vals in groupby(items, key=lambda x: x.res_id)}
return LinkValueStash(grouped_objects)


@dataclass(frozen=True)
class Stash:
"""Holds a standoff stash and a linkvalue stash"""

standoff_stash: StandoffStash | None
link_value_stash: LinkValueStash | None

@staticmethod
def make(standoff_stash: StandoffStash | None, link_value_stash: LinkValueStash | None) -> Stash | None:
"""
Factory method for Stash.

Args:
standoff_stash: A StandoffStash object or None.
link_value_stash: A LinkValueStash object or None.

Returns:
Stash: A Stash object, or None if both iunputs are None.
"""
if standoff_stash or link_value_stash:
return Stash(standoff_stash, link_value_stash)
return None
72 changes: 30 additions & 42 deletions src/dsp_tools/utils/xmlupload/stash_circular_references.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,31 +5,31 @@

from dsp_tools.models.exceptions import BaseError
from dsp_tools.models.value import KnoraStandoffXml
from dsp_tools.models.xmlproperty import XMLProperty
from dsp_tools.models.xmlresource import XMLResource
from dsp_tools.utils.create_logger import get_logger
from dsp_tools.utils.xmlupload.stash.stash_models import StandoffStash, StandoffStashItem
from dsp_tools.utils.xmlupload.stash.stash_models import (
LinkValueStash,
LinkValueStashItem,
StandoffStash,
StandoffStashItem,
Stash,
)

logger = get_logger(__name__)


def _stash_circular_references(
nok_resources: list[XMLResource],
ok_res_ids: set[str],
ok_resources: list[XMLResource],
stashed_resptr_props: dict[XMLResource, dict[XMLProperty, list[str]]],
) -> tuple[
list[XMLResource],
set[str],
list[XMLResource],
StandoffStash | None,
dict[XMLResource, dict[XMLProperty, list[str]]],
]:
) -> tuple[list[XMLResource], set[str], list[XMLResource], Stash | None]:
"""
Raises:
BaseError
"""
stashed_standoff_values: list[tuple[XMLResource, StandoffStashItem]] = []
stashed_link_values: list[LinkValueStashItem] = []
ok_resources: list[XMLResource] = []

for res in nok_resources.copy():
for link_prop in res.get_props_with_links():
if link_prop.valtype == "text":
Expand All @@ -39,22 +39,21 @@ def _stash_circular_references(
# and remove the problematic resrefs from the XMLValue's resrefs list
standoff_xml = cast(KnoraStandoffXml, value.value)
uuid = str(uuid4())
stash_item = StandoffStashItem(uuid=uuid, prop_name=link_prop.name, value=standoff_xml)
stashed_standoff_values.append((res, stash_item))
standoff_stash_item = StandoffStashItem(uuid=uuid, prop_name=link_prop.name, value=standoff_xml)
stashed_standoff_values.append((res, standoff_stash_item))
value.value = KnoraStandoffXml(uuid)
value.resrefs = [_id for _id in value.resrefs if _id in ok_res_ids]
elif link_prop.valtype == "resptr":
for value in link_prop.values.copy():
if value.value not in ok_res_ids:
# value.value is the id of the target resource. stash it, then delete it
if res not in stashed_resptr_props:
stashed_resptr_props[res] = {}
stashed_resptr_props[res][link_prop] = [str(value.value)]
else:
if link_prop not in stashed_resptr_props[res]:
stashed_resptr_props[res][link_prop] = [str(value.value)]
else:
stashed_resptr_props[res][link_prop].append(str(value.value))
link_stash_item = LinkValueStashItem(
res_id=res.id,
res_type=res.restype,
prop_name=link_prop.name,
target_id=str(value.value),
)
stashed_link_values.append(link_stash_item)
link_prop.values.remove(value)
else:
logger.error("ERROR in remove_circular_references(): link_prop.valtype is neither text nor resptr.")
Expand All @@ -69,14 +68,16 @@ def _stash_circular_references(
nok_resources.remove(res)

standoff_stash = StandoffStash.make(stashed_standoff_values)
link_value_stash = LinkValueStash.make(stashed_link_values)
stash = Stash.make(standoff_stash, link_value_stash)

return nok_resources, ok_res_ids, ok_resources, standoff_stash, stashed_resptr_props
return nok_resources, ok_res_ids, ok_resources, stash


def remove_circular_references(
resources: list[XMLResource],
verbose: bool,
) -> tuple[list[XMLResource], StandoffStash | None, dict[XMLResource, dict[XMLProperty, list[str]]]]:
) -> tuple[list[XMLResource], Stash | None]:
"""
Temporarily removes problematic resource-references from a list of resources.
A reference is problematic if it creates a circle (circular references).
Expand All @@ -90,17 +91,14 @@ def remove_circular_references(

Returns:
list: list of cleaned resources
stashed_xml_texts: dict with the stashed XML texts
stashed_resptr_props: dict with the stashed resptr-props
stash: an object that contains the problematic references
"""

if verbose:
print("Checking resources for unresolvable references...")
logger.info("Checking resources for unresolvable references...")

stashed_xml_texts: StandoffStash | None = None
stashed_resptr_props: dict[XMLResource, dict[XMLProperty, list[str]]] = {}

stash: Stash | None = None
# sort the resources according to outgoing resptrs
ok_resources: list[XMLResource] = []
# resources with circular references
Expand All @@ -123,23 +121,13 @@ def remove_circular_references(
resources = nok_resources
if len(nok_resources) == nok_len:
# there are circular references. go through all problematic resources, and stash the problematic references.
(
nok_resources,
ok_res_ids,
ok_resources,
stashed_xml_texts,
stashed_resptr_props,
) = _stash_circular_references(
nok_resources=nok_resources,
ok_res_ids=ok_res_ids,
ok_resources=ok_resources,
stashed_resptr_props=stashed_resptr_props,
)
nok_resources, ok_res_ids, ok_res, stash = _stash_circular_references(nok_resources, ok_res_ids)
ok_resources.extend(ok_res)
nok_len = len(nok_resources)
nok_resources = []
cnt += 1
if verbose:
print(f"{cnt}. ordering pass finished.")
logger.info(f"{cnt}. ordering pass finished.")
logger.debug(f"{cnt}. ordering pass finished.")

return ok_resources, stashed_xml_texts, stashed_resptr_props
return ok_resources, stash