From 05dc48695039c4f460c4a236efde9ed5c87905a1 Mon Sep 17 00:00:00 2001 From: Balduin Landolt <33053745+BalduinLandolt@users.noreply.github.com> Date: Wed, 18 Oct 2023 11:27:31 +0200 Subject: [PATCH] refactor: remove use of XMLResource in stash models (DEV-2843) (#577) --- .../utils/xmlupload/stash/stash_models.py | 21 +++++++------------ .../xmlupload/stash_circular_references.py | 12 ++++++++--- .../xmlupload/upload_stashed_xml_texts.py | 19 ++++++++--------- 3 files changed, 25 insertions(+), 27 deletions(-) diff --git a/src/dsp_tools/utils/xmlupload/stash/stash_models.py b/src/dsp_tools/utils/xmlupload/stash/stash_models.py index bc56072b7..adee318b6 100644 --- a/src/dsp_tools/utils/xmlupload/stash/stash_models.py +++ b/src/dsp_tools/utils/xmlupload/stash/stash_models.py @@ -4,13 +4,14 @@ from itertools import groupby from dsp_tools.models.value import KnoraStandoffXml -from dsp_tools.models.xmlresource import XMLResource @dataclass(frozen=True) class StandoffStashItem: """Holds information about a single stashed XML text value.""" + res_id: str + res_type: str uuid: str prop_name: str value: KnoraStandoffXml @@ -22,30 +23,22 @@ class StandoffStash: """Holds information about a number of stashed XML text values, organized by resource instance.""" res_2_stash_items: dict[str, list[StandoffStashItem]] - res_2_xmlres: dict[str, XMLResource] @staticmethod - def make(tups: list[tuple[XMLResource, StandoffStashItem]]) -> StandoffStash | None: + def make(items: list[StandoffStashItem]) -> StandoffStash | None: """ Factory method for StandoffStash. Args: - tups: A list of tuples of XMLResource and StandoffStashItem. + items: A list of StandoffStashItem. Returns: StandoffStash | None: A StandoffStash object or None, if an empty list was passed. """ - if not tups: + if not items: return None - res_2_stash_items = {} - res_2_xmlres = {} - for xmlres, stash_item in tups: - if xmlres.id not in res_2_stash_items: - res_2_stash_items[xmlres.id] = [stash_item] - res_2_xmlres[xmlres.id] = xmlres - else: - res_2_stash_items[xmlres.id].append(stash_item) - return StandoffStash(res_2_stash_items, res_2_xmlres) + grouped_objects = {k: list(vals) for k, vals in groupby(items, key=lambda x: x.res_id)} + return StandoffStash(grouped_objects) @dataclass(frozen=True) diff --git a/src/dsp_tools/utils/xmlupload/stash_circular_references.py b/src/dsp_tools/utils/xmlupload/stash_circular_references.py index 86093dd22..6bef066d0 100644 --- a/src/dsp_tools/utils/xmlupload/stash_circular_references.py +++ b/src/dsp_tools/utils/xmlupload/stash_circular_references.py @@ -26,7 +26,7 @@ def _stash_circular_references( Raises: BaseError """ - stashed_standoff_values: list[tuple[XMLResource, StandoffStashItem]] = [] + stashed_standoff_values: list[StandoffStashItem] = [] stashed_link_values: list[LinkValueStashItem] = [] ok_resources: list[XMLResource] = [] @@ -39,8 +39,14 @@ def _stash_circular_references( # and remove the problematic resrefs from the XMLValue's resrefs list standoff_xml = cast(KnoraStandoffXml, value.value) uuid = str(uuid4()) - standoff_stash_item = StandoffStashItem(uuid=uuid, prop_name=link_prop.name, value=standoff_xml) - stashed_standoff_values.append((res, standoff_stash_item)) + standoff_stash_item = StandoffStashItem( + res_id=res.id, + res_type=res.restype, + uuid=uuid, + prop_name=link_prop.name, + value=standoff_xml, + ) + stashed_standoff_values.append(standoff_stash_item) value.value = KnoraStandoffXml(uuid) value.resrefs = [_id for _id in value.resrefs if _id in ok_res_ids] elif link_prop.valtype == "resptr": diff --git a/src/dsp_tools/utils/xmlupload/upload_stashed_xml_texts.py b/src/dsp_tools/utils/xmlupload/upload_stashed_xml_texts.py index 359833ea1..82a111e1f 100644 --- a/src/dsp_tools/utils/xmlupload/upload_stashed_xml_texts.py +++ b/src/dsp_tools/utils/xmlupload/upload_stashed_xml_texts.py @@ -8,7 +8,6 @@ from dsp_tools.models.exceptions import BaseError from dsp_tools.models.resource import KnoraStandoffXmlEncoder from dsp_tools.models.value import KnoraStandoffXml -from dsp_tools.models.xmlresource import XMLResource from dsp_tools.utils.create_logger import get_logger from dsp_tools.utils.shared import try_network_action from dsp_tools.utils.xmlupload.stash.stash_models import StandoffStash, StandoffStashItem @@ -17,14 +16,14 @@ def _log_unable_to_retrieve_resource( - resource: XMLResource, + resource: str, received_error: BaseError, ) -> None: """ This function logs the error if it is not possible to retrieve the resource. Args: - resource: the resource + resource: the resource id received_error: the error """ # print the message to keep track of the cause for the failure @@ -32,7 +31,7 @@ def _log_unable_to_retrieve_resource( # this resource will remain in nonapplied_xml_texts, which will be handled by the caller orig_err_msg = received_error.orig_err_msg_from_api or received_error.message err_msg = ( - f"Unable to upload XML texts of resource '{resource.id}', " + f"Unable to upload XML texts of resource '{resource}', " "because the resource cannot be retrieved from the DSP server." ) print(f" WARNING: {err_msg} Original error message: {orig_err_msg}") @@ -118,7 +117,7 @@ def upload_stashed_xml_texts( print("Upload the stashed XML texts...") logger.info("Upload the stashed XML texts...") - not_uploaded: list[tuple[XMLResource, StandoffStashItem]] = [] + not_uploaded: list[StandoffStashItem] = [] for res_id, stash_items in stashed_xml_texts.res_2_stash_items.items(): res_iri = id2iri_mapping.get(res_id) if not res_iri: @@ -126,11 +125,11 @@ def upload_stashed_xml_texts( # no action necessary: this resource will remain in nonapplied_xml_texts, # which will be handled by the caller continue - xmlres: XMLResource = stashed_xml_texts.res_2_xmlres[res_id] + # xmlres: XMLResource = stashed_xml_texts.res_2_xmlres[res_id] try: resource_in_triplestore = try_network_action(con.get, route=f"/v2/resources/{quote_plus(res_iri)}") except BaseError as err: - _log_unable_to_retrieve_resource(resource=xmlres, received_error=err) + _log_unable_to_retrieve_resource(resource=res_id, received_error=err) continue if verbose: print(f' Upload XML text(s) of resource "{res_id}"...') @@ -139,12 +138,12 @@ def upload_stashed_xml_texts( for stash_item in stash_items: value_iri = _get_value_iri(stash_item.prop_name, resource_in_triplestore, stash_item.uuid) if not value_iri: - not_uploaded.append((xmlres, stash_item)) # does that even make sense to hold on to that one? + not_uploaded.append(stash_item) continue success = _upload_stash_item( stash_item=stash_item, res_iri=res_iri, - res_type=xmlres.restype, + res_type=stash_item.res_type, res_id=res_id, value_iri=value_iri, id2iri_mapping=id2iri_mapping, @@ -152,7 +151,7 @@ def upload_stashed_xml_texts( context=context, ) if not success: - not_uploaded.append((xmlres, stash_item)) + not_uploaded.append(stash_item) return StandoffStash.make(not_uploaded)