From 08624b2bdf0acd4f1cd2f126e2f5511f4b633d15 Mon Sep 17 00:00:00 2001
From: Balduin Landolt <33053745+BalduinLandolt@users.noreply.github.com>
Date: Mon, 9 Oct 2023 15:17:39 +0200
Subject: [PATCH] refactor: identify temporary text values with UUID instead of
text hash (DEV-2790) (#558)
---
src/dsp_tools/models/value.py | 16 +-
.../utils/xmlupload/stash/__init__.py | 0
.../utils/xmlupload/stash/stash_models.py | 51 +++
.../xmlupload/stash_circular_references.py | 38 +--
.../xmlupload/upload_stashed_xml_texts.py | 317 ++++++------------
src/dsp_tools/utils/xmlupload/xmlupload.py | 21 +-
.../test_upload_stashed_xml_texts.py | 9 +-
.../test_xmlupload/test_xmlupload.py | 87 -----
8 files changed, 192 insertions(+), 347 deletions(-)
create mode 100644 src/dsp_tools/utils/xmlupload/stash/__init__.py
create mode 100644 src/dsp_tools/utils/xmlupload/stash/stash_models.py
diff --git a/src/dsp_tools/models/value.py b/src/dsp_tools/models/value.py
index a494e5610..96fe1acfa 100644
--- a/src/dsp_tools/models/value.py
+++ b/src/dsp_tools/models/value.py
@@ -1,4 +1,5 @@
# pylint: disable=missing-class-docstring,missing-function-docstring
+from __future__ import annotations
from typing import Any, Optional, Union
@@ -32,12 +33,15 @@ def find_ids_referenced_in_salsah_links(self) -> set[str]:
def replace(self, fromStr: str, toStr: str) -> None:
self.__xmlstr = self.__xmlstr.replace(fromStr, toStr)
- def replace_one_id_with_iri_in_salsah_link(self, internal_id: str, iri: str) -> None:
- self.__xmlstr = regex.sub(
- pattern=f'href="IRI:{internal_id}:IRI"',
- repl=f'href="{iri}"',
- string=self.__xmlstr,
- )
+ def with_iris(self, id_2_iri: dict[str, str]) -> KnoraStandoffXml:
+ """
+ Returns a copy of this object, where all internal ids are replaced with iris according to the provided mapping.
+ """
+ s = self.__xmlstr
+ for internal_id in self.find_ids_referenced_in_salsah_links():
+ iri = id_2_iri[internal_id]
+ s = s.replace(f'href="IRI:{internal_id}:IRI"', f'href="{iri}"')
+ return KnoraStandoffXml(s)
class Value:
diff --git a/src/dsp_tools/utils/xmlupload/stash/__init__.py b/src/dsp_tools/utils/xmlupload/stash/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/dsp_tools/utils/xmlupload/stash/stash_models.py b/src/dsp_tools/utils/xmlupload/stash/stash_models.py
new file mode 100644
index 000000000..c0d070163
--- /dev/null
+++ b/src/dsp_tools/utils/xmlupload/stash/stash_models.py
@@ -0,0 +1,51 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+from dsp_tools.models.value import KnoraStandoffXml
+from dsp_tools.models.xmlresource import XMLResource
+
+
+@dataclass(frozen=True)
+class StandoffStashItem:
+ """
+ Holds information about a single stashed XML text value.
+ """
+
+ uuid: str
+ prop_name: str
+ value: KnoraStandoffXml
+ # Permissions missing still
+
+
+@dataclass(frozen=True)
+class StandoffStash:
+ """
+ Holds information about a number of stashed XML text values, organized by resource instance.
+ """
+
+ res_2_stash_items: dict[str, list[StandoffStashItem]]
+ res_2_xmlres: dict[str, XMLResource]
+
+ @staticmethod
+ def make(tups: list[tuple[XMLResource, StandoffStashItem]]) -> StandoffStash | None:
+ """
+ Factory method for StandoffStash.
+
+ Args:
+ tups: A list of tuples of XMLResource and StandoffStashItem.
+
+ Returns:
+ StandoffStash | None: A StandoffStash object or None, if an empty list was passed.
+ """
+ if not tups:
+ return None
+ res_2_stash_items = {}
+ res_2_xmlres = {}
+ for xmlres, stash_item in tups:
+ if xmlres.id not in res_2_stash_items:
+ res_2_stash_items[xmlres.id] = [stash_item]
+ res_2_xmlres[xmlres.id] = xmlres
+ else:
+ res_2_stash_items[xmlres.id].append(stash_item)
+ return StandoffStash(res_2_stash_items, res_2_xmlres)
diff --git a/src/dsp_tools/utils/xmlupload/stash_circular_references.py b/src/dsp_tools/utils/xmlupload/stash_circular_references.py
index ff5aff6fd..5bfd6b1ac 100644
--- a/src/dsp_tools/utils/xmlupload/stash_circular_references.py
+++ b/src/dsp_tools/utils/xmlupload/stash_circular_references.py
@@ -1,13 +1,14 @@
from __future__ import annotations
-from datetime import datetime
from typing import cast
+from uuid import uuid4
from dsp_tools.models.exceptions import BaseError
from dsp_tools.models.value import KnoraStandoffXml
from dsp_tools.models.xmlproperty import XMLProperty
from dsp_tools.models.xmlresource import XMLResource
from dsp_tools.utils.create_logger import get_logger
+from dsp_tools.utils.xmlupload.stash.stash_models import StandoffStash, StandoffStashItem
logger = get_logger(__name__)
@@ -16,34 +17,31 @@ def _stash_circular_references(
nok_resources: list[XMLResource],
ok_res_ids: set[str],
ok_resources: list[XMLResource],
- stashed_xml_texts: dict[XMLResource, dict[XMLProperty, dict[str, KnoraStandoffXml]]],
stashed_resptr_props: dict[XMLResource, dict[XMLProperty, list[str]]],
) -> tuple[
list[XMLResource],
set[str],
list[XMLResource],
- dict[XMLResource, dict[XMLProperty, dict[str, KnoraStandoffXml]]],
+ StandoffStash | None,
dict[XMLResource, dict[XMLProperty, list[str]]],
]:
"""
Raises:
BaseError
"""
+ stashed_standoff_values: list[tuple[XMLResource, StandoffStashItem]] = []
for res in nok_resources.copy():
for link_prop in res.get_props_with_links():
if link_prop.valtype == "text":
for value in link_prop.values:
if value.resrefs and not all(_id in ok_res_ids for _id in value.resrefs):
- # stash this XML text, replace it by its hash, and remove the
- # problematic resrefs from the XMLValue's resrefs list
- value_hash = str(hash(f"{value.value}{datetime.now()}"))
- if res not in stashed_xml_texts:
- stashed_xml_texts[res] = {link_prop: {value_hash: cast(KnoraStandoffXml, value.value)}}
- elif link_prop not in stashed_xml_texts[res]:
- stashed_xml_texts[res][link_prop] = {value_hash: cast(KnoraStandoffXml, value.value)}
- else:
- stashed_xml_texts[res][link_prop][value_hash] = cast(KnoraStandoffXml, value.value)
- value.value = KnoraStandoffXml(value_hash)
+ # replace the problematic XML with a UUID
+ # and remove the problematic resrefs from the XMLValue's resrefs list
+ standoff_xml = cast(KnoraStandoffXml, value.value)
+ uuid = str(uuid4())
+ stash_item = StandoffStashItem(uuid=uuid, prop_name=link_prop.name, value=standoff_xml)
+ stashed_standoff_values.append((res, stash_item))
+ value.value = KnoraStandoffXml(uuid)
value.resrefs = [_id for _id in value.resrefs if _id in ok_res_ids]
elif link_prop.valtype == "resptr":
for value in link_prop.values.copy():
@@ -70,17 +68,15 @@ def _stash_circular_references(
ok_res_ids.add(res.id)
nok_resources.remove(res)
- return nok_resources, ok_res_ids, ok_resources, stashed_xml_texts, stashed_resptr_props
+ standoff_stash = StandoffStash.make(stashed_standoff_values)
+
+ return nok_resources, ok_res_ids, ok_resources, standoff_stash, stashed_resptr_props
def remove_circular_references(
resources: list[XMLResource],
verbose: bool,
-) -> tuple[
- list[XMLResource],
- dict[XMLResource, dict[XMLProperty, dict[str, KnoraStandoffXml]]],
- dict[XMLResource, dict[XMLProperty, list[str]]],
-]:
+) -> tuple[list[XMLResource], StandoffStash | None, dict[XMLResource, dict[XMLProperty, list[str]]],]:
"""
Temporarily removes problematic resource-references from a list of resources.
A reference is problematic if it creates a circle (circular references).
@@ -102,7 +98,7 @@ def remove_circular_references(
print("Checking resources for unresolvable references...")
logger.info("Checking resources for unresolvable references...")
- stashed_xml_texts: dict[XMLResource, dict[XMLProperty, dict[str, KnoraStandoffXml]]] = {}
+ stashed_xml_texts: StandoffStash | None = None
stashed_resptr_props: dict[XMLResource, dict[XMLProperty, list[str]]] = {}
# sort the resources according to outgoing resptrs
@@ -137,7 +133,6 @@ def remove_circular_references(
nok_resources=nok_resources,
ok_res_ids=ok_res_ids,
ok_resources=ok_resources,
- stashed_xml_texts=stashed_xml_texts,
stashed_resptr_props=stashed_resptr_props,
)
nok_len = len(nok_resources)
@@ -146,4 +141,5 @@ def remove_circular_references(
if verbose:
print(f"{cnt}. ordering pass finished.")
logger.info(f"{cnt}. ordering pass finished.")
+
return ok_resources, stashed_xml_texts, stashed_resptr_props
diff --git a/src/dsp_tools/utils/xmlupload/upload_stashed_xml_texts.py b/src/dsp_tools/utils/xmlupload/upload_stashed_xml_texts.py
index 96ddd0b5d..933850d24 100644
--- a/src/dsp_tools/utils/xmlupload/upload_stashed_xml_texts.py
+++ b/src/dsp_tools/utils/xmlupload/upload_stashed_xml_texts.py
@@ -4,8 +4,6 @@
from typing import Any
from urllib.parse import quote_plus
-import regex
-
from dsp_tools.models.connection import Connection
from dsp_tools.models.exceptions import BaseError
from dsp_tools.models.resource import KnoraStandoffXmlEncoder
@@ -14,11 +12,12 @@
from dsp_tools.models.xmlresource import XMLResource
from dsp_tools.utils.create_logger import get_logger
from dsp_tools.utils.shared import try_network_action
+from dsp_tools.utils.xmlupload.stash.stash_models import StandoffStash, StandoffStashItem
logger = get_logger(__name__)
-def log_unable_to_retrieve_resource(
+def _log_unable_to_retrieve_resource(
resource: XMLResource,
received_error: BaseError,
) -> None:
@@ -43,238 +42,68 @@ def log_unable_to_retrieve_resource(
def _log_unable_to_upload_xml_resource(
received_error: BaseError,
- stashed_resource: XMLResource,
- all_link_props: XMLProperty,
+ stashed_resource_id: str,
+ prop_name: str,
) -> None:
"""
This function logs if it is not possible to upload a xml resource.
Args:
received_error: Error received
- stashed_resource: resource that is stashed
- all_link_props: all the link properties from that resource
+ stashed_resource_id: id of the resource
+ prop_name: name of the property
"""
# print the message to keep track of the cause for the failure
# apart from that; no action is necessary:
# this resource will remain in nonapplied_xml_texts, which will be handled by the caller
orig_err_msg = received_error.orig_err_msg_from_api or received_error.message
- err_msg = f"Unable to upload the xml text of '{all_link_props.name}' of resource '{stashed_resource.id}'."
+ err_msg = f"Unable to upload the xml text of '{prop_name}' of resource '{stashed_resource_id}'."
print(f" WARNING: {err_msg} Original error message: {orig_err_msg}")
logger.warning(err_msg, exc_info=True)
-def _get_text_hash_value(old_xmltext: str) -> str:
- """
- This function extracts the hash values in the text
-
- Args:
- old_xmltext: Text with hash values.
-
- Returns:
- hash values
- """
- return regex.sub(r"(<\?xml.+>\s*)?\s*(.+)\s*<\/text>", r"\2", old_xmltext)
-
-
-def _replace_internal_ids_with_iris(
- id2iri_mapping: dict[str, str],
- xml_with_id: KnoraStandoffXml,
- id_set: set[str],
-) -> KnoraStandoffXml:
- """
- This function takes an XML string and a set with internal ids that are referenced in salsah-links in that string.
- It replaces all internal ids of that set with the corresponding iri according to the mapping dictionary.
-
- Args:
- id2iri_mapping: dictionary with id to iri mapping
- xml_with_id: KnoraStandoffXml with the string that should have replacements
- id_set: set of ids that are in the string
-
- Returns:
- the xml value with the old ids replaced
- """
- for internal_id in id_set:
- xml_with_id.replace_one_id_with_iri_in_salsah_link(
- internal_id=internal_id,
- iri=id2iri_mapping[internal_id],
- )
- return xml_with_id
-
-
def _create_XMLResource_json_object_to_update(
res_iri: str,
- resource_in_triplestore: dict[str, Any],
- stashed_resource: XMLResource,
- link_prop_in_triplestore: dict[str, Any],
- new_xmltext: KnoraStandoffXml,
+ res_type: str,
link_prop_name: str,
+ value_iri: str,
+ new_xmltext: KnoraStandoffXml,
+ context: dict[str, str],
) -> str:
"""
This function creates a JSON object that can be sent as update request to DSP-API.
Args:
res_iri: the iri of the resource
- resource_in_triplestore: the resource existing in the triplestore
- stashed_resource: the same resource from the stash
- link_prop_in_triplestore: the link property in the triplestore
- new_xmltext: The KnoraStandOffXml with replaced ids
+ res_type: the type of the resource
link_prop_name: the name of the link property
+ value_iri: the iri of the value
+ new_xmltext: the new xml text to be uploaded
+ context: the JSON-LD context of the resource
Returns:
json string
"""
jsonobj = {
"@id": res_iri,
- "@type": stashed_resource.restype,
+ "@type": res_type,
link_prop_name: {
- "@id": link_prop_in_triplestore["@id"],
+ "@id": value_iri,
"@type": "knora-api:TextValue",
"knora-api:textValueAsXml": new_xmltext,
"knora-api:textValueHasMapping": {"@id": "http://rdfh.ch/standoff/mappings/StandardMapping"},
},
- "@context": resource_in_triplestore["@context"],
+ "@context": context,
}
return json.dumps(jsonobj, indent=4, separators=(",", ": "), cls=KnoraStandoffXmlEncoder)
-def _upload_single_link_xml_property(
- link_prop_in_triplestore: dict[str, Any],
- res_iri: str,
- stashed_resource: XMLResource,
- resource_in_triplestore: dict[str, Any],
- link_prop: XMLProperty,
- hash_to_value: dict[str, KnoraStandoffXml],
- id2iri_mapping: dict[str, str],
- nonapplied_xml_texts: dict[XMLResource, dict[XMLProperty, dict[str, KnoraStandoffXml]]],
- verbose: bool,
- con: Connection,
-) -> dict[XMLResource, dict[XMLProperty, dict[str, KnoraStandoffXml]]]:
- """
- This function uploads a single xml link property, which was previously stashed.
-
- Args:
- link_prop_in_triplestore: the link property from the triplestore
- res_iri: the iri of the resource
- stashed_resource: the stashed resource
- resource_in_triplestore: the resource retrieved from the triplestore
- link_prop: the name of the link property
- hash_to_value: the hash value of the xml text
- id2iri_mapping: the dictionary with the internal ids and the new IRIs
- nonapplied_xml_texts: the dictionary with the stashes
- verbose: what is printed out
- con: the connection to the triplestore
-
- Returns:
- The stash dictionary with the newly uploaded resource removed.
- If the upload was not sucessfull, it returns the dictionary as it was before.
- """
- xmltext_in_triplestore = link_prop_in_triplestore.get("knora-api:textValueAsXml")
- if not xmltext_in_triplestore:
- # no action necessary: this property will remain in nonapplied_xml_texts,
- # which will be handled by the caller
- return nonapplied_xml_texts
-
- # strip all xml tags from the old xmltext, so that the pure text itself remains
- text_hash_value = _get_text_hash_value(xmltext_in_triplestore)
-
- # if the pure text is a hash, the replacement must be made
- # this hash originates from _stash_circular_references(), and identifies the XML texts
- try:
- xml_from_stash = hash_to_value[text_hash_value]
- except KeyError:
- # no action necessary: this property will remain in nonapplied_xml_texts,
- # which will be handled by the caller
- return nonapplied_xml_texts
-
- id_set = xml_from_stash.find_ids_referenced_in_salsah_links()
-
- xml_from_stash = _replace_internal_ids_with_iris(
- id2iri_mapping=id2iri_mapping,
- xml_with_id=xml_from_stash,
- id_set=id_set,
- )
-
- # prepare API call
- jsondata = _create_XMLResource_json_object_to_update(
- res_iri=res_iri,
- resource_in_triplestore=resource_in_triplestore,
- stashed_resource=stashed_resource,
- link_prop_in_triplestore=link_prop_in_triplestore,
- new_xmltext=xml_from_stash,
- link_prop_name=link_prop.name,
- )
-
- # execute API call
- try:
- try_network_action(con.put, route="/v2/values", jsondata=jsondata)
- except BaseError as err:
- _log_unable_to_upload_xml_resource(
- received_error=err, stashed_resource=stashed_resource, all_link_props=link_prop
- )
- return nonapplied_xml_texts
- if verbose:
- print(f' Successfully uploaded xml text of "{link_prop.name}"\n')
- logger.info(f' Successfully uploaded xml text of "{link_prop.name}"\n')
- nonapplied_xml_texts[stashed_resource][link_prop].pop(text_hash_value)
- return nonapplied_xml_texts
-
-
-def _upload_all_xml_texts_of_single_resource(
- res_iri: str,
- stashed_resource: XMLResource,
- resource_in_triplestore: dict[str, Any],
- link_prop: XMLProperty,
- hash_to_value: dict[str, KnoraStandoffXml],
- id2iri_mapping: dict[str, str],
- nonapplied_xml_texts: dict[XMLResource, dict[XMLProperty, dict[str, KnoraStandoffXml]]],
- verbose: bool,
- con: Connection,
-) -> dict[XMLResource, dict[XMLProperty, dict[str, KnoraStandoffXml]]]:
- """
- This function takes one resource and extracts all the link properties of that resource.
- It sends all the link props to the DSP-API.
-
- Args:
- res_iri: resource IRI
- stashed_resource: the resource from the stash
- resource_in_triplestore: the resource from the triplestore
- link_prop: the link property
- hash_to_value: the dictionary which stored the hashes and the KnoraStandoffXml with the corresponding texts
- id2iri_mapping: the dictionary that has the internal ids and IRIs to map
- nonapplied_xml_texts: the dictionary which contains the unprocessed resources
- verbose: how much information should be printed
- con: connection to the api
-
- Returns:
- the dictionary which contains the unprocessed resources
- """
- all_link_props_in_triplestore = resource_in_triplestore[link_prop.name]
-
- if not isinstance(all_link_props_in_triplestore, list):
- all_link_props_in_triplestore = [all_link_props_in_triplestore]
-
- for link_prop_in_triplestore in all_link_props_in_triplestore:
- nonapplied_xml_texts = _upload_single_link_xml_property(
- link_prop_in_triplestore=link_prop_in_triplestore,
- res_iri=res_iri,
- stashed_resource=stashed_resource,
- resource_in_triplestore=resource_in_triplestore,
- link_prop=link_prop,
- hash_to_value=hash_to_value,
- id2iri_mapping=id2iri_mapping,
- nonapplied_xml_texts=nonapplied_xml_texts,
- verbose=verbose,
- con=con,
- )
- return nonapplied_xml_texts
-
-
def upload_stashed_xml_texts(
verbose: bool,
id2iri_mapping: dict[str, str],
con: Connection,
- stashed_xml_texts: dict[XMLResource, dict[XMLProperty, dict[str, KnoraStandoffXml]]],
-) -> dict[XMLResource, dict[XMLProperty, dict[str, KnoraStandoffXml]]]:
+ stashed_xml_texts: StandoffStash,
+) -> StandoffStash | None:
"""
After all resources are uploaded, the stashed xml texts must be applied to their resources in DSP.
@@ -290,40 +119,106 @@ def upload_stashed_xml_texts(
print("Upload the stashed XML texts...")
logger.info("Upload the stashed XML texts...")
- nonapplied_xml_texts = stashed_xml_texts.copy()
- for stashed_resource, all_link_props in stashed_xml_texts.items():
- if stashed_resource.id not in id2iri_mapping:
+ not_uploaded: list[tuple[XMLResource, StandoffStashItem]] = []
+ for res_id, stash_items in stashed_xml_texts.res_2_stash_items.items():
+ res_iri = id2iri_mapping.get(res_id)
+ if not res_iri:
# resource could not be uploaded to DSP, so the stash cannot be uploaded either
# no action necessary: this resource will remain in nonapplied_xml_texts,
# which will be handled by the caller
continue
- res_iri = id2iri_mapping[stashed_resource.id]
+ xmlres: XMLResource = stashed_xml_texts.res_2_xmlres[res_id]
try:
resource_in_triplestore = try_network_action(con.get, route=f"/v2/resources/{quote_plus(res_iri)}")
except BaseError as err:
- log_unable_to_retrieve_resource(resource=stashed_resource, received_error=err)
+ _log_unable_to_retrieve_resource(resource=xmlres, received_error=err)
continue
- print(f' Upload XML text(s) of resource "{stashed_resource.id}"...')
- logger.info(f' Upload XML text(s) of resource "{stashed_resource.id}"...')
- for link_prop, hash_to_value in all_link_props.items():
- nonapplied_xml_texts = _upload_all_xml_texts_of_single_resource(
+ if verbose:
+ print(f' Upload XML text(s) of resource "{res_id}"...')
+ logger.debug(f' Upload XML text(s) of resource "{res_id}"...')
+ context = resource_in_triplestore["@context"]
+ for stash_item in stash_items:
+ value_iri = _get_value_iri(stash_item.prop_name, resource_in_triplestore, stash_item.uuid)
+ if not value_iri:
+ not_uploaded.append((xmlres, stash_item)) # does that even make sense to hold on to that one?
+ continue
+ success = _upload_stash_item(
+ stash_item=stash_item,
res_iri=res_iri,
- stashed_resource=stashed_resource,
- resource_in_triplestore=resource_in_triplestore,
- link_prop=link_prop,
- hash_to_value=hash_to_value,
+ res_type=xmlres.restype,
+ res_id=res_id,
+ value_iri=value_iri,
id2iri_mapping=id2iri_mapping,
- nonapplied_xml_texts=nonapplied_xml_texts,
- verbose=verbose,
con=con,
+ context=context,
)
+ if not success:
+ not_uploaded.append((xmlres, stash_item))
+ return StandoffStash.make(not_uploaded)
+
+
+def _get_value_iri(
+ property_name: str,
+ resource: dict[str, Any],
+ uuid: str,
+) -> str | None:
+ values_on_server = resource.get(property_name)
+ if not isinstance(values_on_server, list):
+ values_on_server = [values_on_server]
+
+ # get the IRI of the value that contains the UUID in its text
+ text_and_iris = ((v["knora-api:textValueAsXml"], v["@id"]) for v in values_on_server)
+ value_iri: str | None = next((iri for text, iri in text_and_iris if uuid in text), None)
+ if not value_iri:
+ # the value that contains the UUID in its text does not exist in DSP
+ # no action necessary: this resource will remain in nonapplied_xml_texts,
+ # which will be handled by the caller
+ return None
+ return value_iri
- # make a purged version of nonapplied_xml_texts, without empty entries
- nonapplied_xml_texts = purge_stashed_xml_texts(
- stashed_xml_texts=nonapplied_xml_texts,
- id2iri_mapping=id2iri_mapping,
+
+def _upload_stash_item(
+ stash_item: StandoffStashItem,
+ res_iri: str,
+ res_type: str,
+ res_id: str,
+ value_iri: str,
+ id2iri_mapping: dict[str, str],
+ con: Connection,
+ context: dict[str, str],
+) -> bool:
+ """
+ Upload a single stashed xml text to DSP.
+
+ Args:
+ stash_item: the stashed text value to upload
+ res_iri: the iri of the resource
+ res_type: the type of the resource
+ res_id: the internal id of the resource
+ value_iri: the iri of the value
+ id2iri_mapping: mapping of ids from the XML file to IRIs in DSP
+ con: connection to DSP
+ context: the JSON-LD context of the resource
+
+ Returns:
+ True, if the upload was successful, False otherwise
+ """
+ adjusted_text_value = stash_item.value.with_iris(id2iri_mapping)
+ jsondata = _create_XMLResource_json_object_to_update(
+ res_iri,
+ res_type,
+ stash_item.prop_name,
+ value_iri,
+ adjusted_text_value,
+ context,
)
- return nonapplied_xml_texts
+ try:
+ try_network_action(con.put, route="/v2/values", jsondata=jsondata)
+ except BaseError as err:
+ _log_unable_to_upload_xml_resource(err, res_id, stash_item.prop_name)
+ return False
+ logger.debug(f' Successfully uploaded xml text of "{stash_item.prop_name}"\n')
+ return True
def purge_stashed_xml_texts(
diff --git a/src/dsp_tools/utils/xmlupload/xmlupload.py b/src/dsp_tools/utils/xmlupload/xmlupload.py
index 70279e883..1c4bfee37 100644
--- a/src/dsp_tools/utils/xmlupload/xmlupload.py
+++ b/src/dsp_tools/utils/xmlupload/xmlupload.py
@@ -17,7 +17,6 @@
from dsp_tools.models.projectContext import ProjectContext
from dsp_tools.models.resource import KnoraStandoffXmlEncoder, ResourceInstance, ResourceInstanceFactory
from dsp_tools.models.sipi import Sipi
-from dsp_tools.models.value import KnoraStandoffXml
from dsp_tools.models.xmlpermission import XmlPermission
from dsp_tools.models.xmlproperty import XMLProperty
from dsp_tools.models.xmlresource import XMLResource
@@ -32,12 +31,13 @@
calculate_multimedia_file_size,
get_sipi_multimedia_information,
)
+from dsp_tools.utils.xmlupload.stash.stash_models import StandoffStash
from dsp_tools.utils.xmlupload.stash_circular_references import remove_circular_references
from dsp_tools.utils.xmlupload.upload_stashed_resptr_props import (
purge_stashed_resptr_props,
upload_stashed_resptr_props,
)
-from dsp_tools.utils.xmlupload.upload_stashed_xml_texts import purge_stashed_xml_texts, upload_stashed_xml_texts
+from dsp_tools.utils.xmlupload.upload_stashed_xml_texts import upload_stashed_xml_texts
from dsp_tools.utils.xmlupload.write_diagnostic_info import (
MetricRecord,
determine_save_location_of_diagnostic_info,
@@ -138,7 +138,7 @@ def xmlupload(
id2iri_mapping: dict[str, str] = {}
failed_uploads: list[str] = []
nonapplied_resptr_props = {}
- nonapplied_xml_texts = {}
+ nonapplied_xml_texts: StandoffStash | None = None
try:
id2iri_mapping, failed_uploads, metrics = _upload_resources(
resources=resources,
@@ -424,7 +424,7 @@ def _handle_upload_error(
err: BaseException,
id2iri_mapping: dict[str, str],
failed_uploads: list[str],
- stashed_xml_texts: dict[XMLResource, dict[XMLProperty, dict[str, KnoraStandoffXml]]],
+ stashed_xml_texts: StandoffStash | None,
stashed_resptr_props: dict[XMLResource, dict[XMLProperty, list[str]]],
save_location: Path,
timestamp_str: str,
@@ -455,11 +455,6 @@ def _handle_upload_error(
)
logger.error("xmlupload must be aborted because of an error", exc_info=err)
- # only stashed properties of resources that already exist in DSP are of interest
- stashed_xml_texts = purge_stashed_xml_texts(
- stashed_xml_texts=stashed_xml_texts,
- id2iri_mapping=id2iri_mapping,
- )
stashed_resptr_props = purge_stashed_resptr_props(
stashed_resptr_props=stashed_resptr_props,
id2iri_mapping=id2iri_mapping,
@@ -540,7 +535,7 @@ def save_json_stashed_resptr_properties(
def save_json_stashed_text_properties(
- stashed_xml_texts: dict[XMLResource, dict[XMLProperty, dict[str, KnoraStandoffXml]]],
+ stashed_xml_texts: StandoffStash,
save_location: Path,
timestamp_str: str,
) -> str:
@@ -556,14 +551,10 @@ def save_json_stashed_text_properties(
Returns:
name of the JSON file
"""
- stashed_xml_texts_serializable = {
- resource.id: {_property.name: xml for _property, xml in res_dict.items()}
- for resource, res_dict in stashed_xml_texts.items()
- }
xml_filename = f"{save_location}/{timestamp_str}_stashed_text_properties.json"
with open(xml_filename, "x", encoding="utf-8") as file:
json.dump(
- obj=stashed_xml_texts_serializable,
+ obj=stashed_xml_texts,
fp=file,
ensure_ascii=False,
indent=4,
diff --git a/test/unittests/test_xmlupload/test_upload_stashed_xml_texts.py b/test/unittests/test_xmlupload/test_upload_stashed_xml_texts.py
index f288e636c..809c54055 100644
--- a/test/unittests/test_xmlupload/test_upload_stashed_xml_texts.py
+++ b/test/unittests/test_xmlupload/test_upload_stashed_xml_texts.py
@@ -5,7 +5,6 @@
import pytest
from dsp_tools.models.value import KnoraStandoffXml
-from dsp_tools.utils.xmlupload import upload_stashed_xml_texts
class TestXMLUploadStash(TestCase):
@@ -45,9 +44,7 @@ def test__replace_internal_ids_with_iris_one_link(self) -> None:
""
)
)
- returned_instance = upload_stashed_xml_texts._replace_internal_ids_with_iris(
- id2iri_mapping=test_id2iri, xml_with_id=one_link_KnoraStandoffXml, id_set={"r2_id"}
- )
+ returned_instance = one_link_KnoraStandoffXml.with_iris(test_id2iri)
expected_str = (
''
''
@@ -68,9 +65,7 @@ def test__replace_internal_ids_with_iris_three_links(self) -> None:
""
)
)
- returned_instance = upload_stashed_xml_texts._replace_internal_ids_with_iris(
- id2iri_mapping=test_id2iri, xml_with_id=three_link_KnoraStandoffXml, id_set={"r2_id", "r3_id"}
- )
+ returned_instance = three_link_KnoraStandoffXml.with_iris(test_id2iri)
expected_str = (
''
''
diff --git a/test/unittests/test_xmlupload/test_xmlupload.py b/test/unittests/test_xmlupload/test_xmlupload.py
index e7b72dd4e..753b22b40 100644
--- a/test/unittests/test_xmlupload/test_xmlupload.py
+++ b/test/unittests/test_xmlupload/test_xmlupload.py
@@ -9,10 +9,8 @@
from lxml import etree
from dsp_tools.models.exceptions import BaseError
-from dsp_tools.models.xmlresource import XMLResource
from dsp_tools.utils.xml_utils import parse_and_clean_xml_file
from dsp_tools.utils.xmlupload.ark2iri import convert_ark_v0_to_resource_iri
-from dsp_tools.utils.xmlupload.stash_circular_references import remove_circular_references
from dsp_tools.utils.xmlupload.write_diagnostic_info import (
_transform_server_url_to_foldername,
determine_save_location_of_diagnostic_info,
@@ -117,91 +115,6 @@ def test_convert_ark_v0_to_resource_iri(self) -> None:
):
convert_ark_v0_to_resource_iri("ark:/72163/080c-779b99+90a0c3f-6e")
- def test_remove_circular_references(self) -> None:
- # create a list of XMLResources from the test data file
- root = parse_and_clean_xml_file("testdata/xml-data/test-data-systematic.xml")
- resources = [XMLResource(x, "testonto") for x in root if x.tag == "resource"]
-
- # get the purged resources and the stashes from the function to be tested
- resources, stashed_xml_texts_original, stashed_resptr_props_original = remove_circular_references(
- resources=resources,
- verbose=False,
- )
-
- # make a list of all hashes from the stashed xml texts
- stashed_xml_texts_hashes = list()
- for res, propdict in stashed_xml_texts_original.items():
- for elem in propdict.values():
- for _hash in elem:
- stashed_xml_texts_hashes.append(_hash)
-
- # make a version of the stashes with the IDs from the XML file instead of the Python objects
- stashed_xml_texts = {
- res.id: {prop.name: [str(x) for x in d.values()] for prop, d in _dict.items()}
- for res, _dict in stashed_xml_texts_original.items()
- }
- stashed_resptr_props = {
- res.id: {prop.name: l for prop, l in _dict.items()} for res, _dict in stashed_resptr_props_original.items()
- }
-
- # hardcode the expected values
- stashed_xml_texts_expected = {
- "test_thing_1": {
- "testonto:hasRichtext": [
- "This text contains links to all resources: "
- 'test_thing_0 '
- 'test_thing_1 '
- 'image_thing_0 '
- 'compound_thing_0 '
- 'partof_thing_1 '
- 'partof_thing_2 '
- 'partof_thing_3 '
- 'document_thing_1 '
- 'text_thing_1 '
- 'zip_thing_1 '
- 'audio_thing_1 '
- 'test_thing_2 '
- 'test_thing_with_iri_1'
- ]
- },
- "test_thing_2": {
- "testonto:hasRichtext": [
- "This text contains links to all resources: "
- 'test_thing_0 '
- 'test_thing_1 '
- 'image_thing_0 '
- 'compound_thing_0 '
- 'partof_thing_1 '
- 'partof_thing_2 '
- 'partof_thing_3 '
- 'document_thing_1 '
- 'text_thing_1 '
- 'zip_thing_1 '
- 'audio_thing_1 '
- 'test_thing_2'
- ]
- },
- }
- stashed_resptr_props_expected = {
- "test_thing_0": {"testonto:hasTestThing": ["test_thing_1"]},
- "test_thing_1": {"testonto:hasResource": ["test_thing_2", "link_obj_1"]},
- }
-
- # check if the stashes are equal to the expected stashes
- self.assertDictEqual(stashed_resptr_props, stashed_resptr_props_expected)
- self.assertDictEqual(stashed_xml_texts, stashed_xml_texts_expected)
-
- # check if the stashed hashes can also be found at the correct position in the purged resources
- for res, propdict in stashed_xml_texts_original.items():
- for prop, hashdict in propdict.items():
- stashed_hashes = list(hashdict.keys())
- purged_res = resources[resources.index(res)]
- purged_prop = purged_res.properties[purged_res.properties.index(prop)]
- purged_hashes = [
- str(val.value) for val in purged_prop.values if str(val.value) in stashed_xml_texts_hashes
- ]
- self.assertListEqual(stashed_hashes, purged_hashes)
-
if __name__ == "__main__":
pytest.main([__file__])