-
Notifications
You must be signed in to change notification settings - Fork 4
/
upload_stashed_xml_texts.py
218 lines (193 loc) · 7.66 KB
/
upload_stashed_xml_texts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
from __future__ import annotations
from datetime import datetime
from typing import Any
from urllib.parse import quote_plus
from dsp_tools.commands.xmlupload.iri_resolver import IriResolver
from dsp_tools.commands.xmlupload.models.formatted_text_value import FormattedTextValue
from dsp_tools.commands.xmlupload.stash.stash_models import StandoffStash
from dsp_tools.commands.xmlupload.stash.stash_models import StandoffStashItem
from dsp_tools.models.exceptions import BaseError
from dsp_tools.utils.connection import Connection
from dsp_tools.utils.create_logger import get_logger
logger = get_logger(__name__)
def _log_unable_to_retrieve_resource(
resource: str,
received_error: BaseError,
) -> None:
"""
This function logs the error if it is not possible to retrieve the resource.
Args:
resource: the resource id
received_error: the error
"""
# print the message to keep track of the cause for the failure
# apart from that; no action is necessary:
# this resource will remain in nonapplied_xml_texts, which will be handled by the caller
err_msg = (
f"Unable to upload XML texts of resource '{resource}', "
"because the resource cannot be retrieved from the DSP server."
)
print(f"{datetime.now()}: WARNING: {err_msg} Original error message: {received_error.message}")
logger.warning(err_msg, exc_info=True)
def _log_unable_to_upload_xml_resource(
received_error: BaseError,
stashed_resource_id: str,
prop_name: str,
) -> None:
"""
This function logs if it is not possible to upload a xml resource.
Args:
received_error: Error received
stashed_resource_id: id of the resource
prop_name: name of the property
"""
# print the message to keep track of the cause for the failure
# apart from that; no action is necessary:
# this resource will remain in nonapplied_xml_texts, which will be handled by the caller
err_msg = f"Unable to upload the xml text of '{prop_name}' of resource '{stashed_resource_id}'."
print(f"{datetime.now()}: WARNING: {err_msg} Original error message: {received_error.message}")
logger.warning(err_msg, exc_info=True)
def _create_XMLResource_json_object_to_update(
res_iri: str,
res_type: str,
link_prop_name: str,
value_iri: str,
new_xmltext: FormattedTextValue,
context: dict[str, str],
) -> dict[str, Any]:
"""
This function creates a JSON object that can be sent as update request to DSP-API.
Args:
res_iri: the iri of the resource
res_type: the type of the resource
link_prop_name: the name of the link property
value_iri: the iri of the value
new_xmltext: the new xml text to be uploaded
context: the JSON-LD context of the resource
Returns:
json string
"""
jsonobj = {
"@id": res_iri,
"@type": res_type,
link_prop_name: {
"@id": value_iri,
"@type": "knora-api:TextValue",
"knora-api:textValueAsXml": new_xmltext.as_xml(),
"knora-api:textValueHasMapping": {"@id": "http://rdfh.ch/standoff/mappings/StandardMapping"},
},
"@context": context,
}
return jsonobj
def upload_stashed_xml_texts(
verbose: bool,
iri_resolver: IriResolver,
con: Connection,
stashed_xml_texts: StandoffStash,
) -> StandoffStash | None:
"""
After all resources are uploaded, the stashed xml texts must be applied to their resources in DSP.
Args:
verbose: bool
iri_resolver: resolver to map ids from the XML file to IRIs in DSP
con: connection to DSP
stashed_xml_texts: all xml texts that have been stashed
Returns:
the xml texts that could not be uploaded
"""
print(f"{datetime.now()}: Upload the stashed XML texts...")
logger.info("Upload the stashed XML texts...")
not_uploaded: list[StandoffStashItem] = []
for res_id, stash_items in stashed_xml_texts.res_2_stash_items.copy().items():
res_iri = iri_resolver.get(res_id)
if not res_iri:
# resource could not be uploaded to DSP, so the stash cannot be uploaded either
# no action necessary: this resource will remain in the list of not uploaded stash items,
# which will be handled by the caller
continue
try:
resource_in_triplestore = con.get(f"/v2/resources/{quote_plus(res_iri)}")
except BaseError as err:
_log_unable_to_retrieve_resource(resource=res_id, received_error=err)
continue
if verbose:
print(f"{datetime.now()}: Upload XML text(s) of resource '{res_id}'...")
logger.info(f" Upload XML text(s) of resource '{res_id}'...")
context = resource_in_triplestore["@context"]
for stash_item in stash_items:
value_iri = _get_value_iri(stash_item.prop_name, resource_in_triplestore, stash_item.uuid)
if not value_iri:
not_uploaded.append(stash_item)
continue
success = _upload_stash_item(
stash_item=stash_item,
res_iri=res_iri,
res_type=stash_item.res_type,
res_id=res_id,
value_iri=value_iri,
iri_resolver=iri_resolver,
con=con,
context=context,
)
if success:
stashed_xml_texts.res_2_stash_items[res_id].remove(stash_item)
else:
not_uploaded.append(stash_item)
if not stashed_xml_texts.res_2_stash_items[res_id]:
stashed_xml_texts.res_2_stash_items.pop(res_id)
return StandoffStash.make(not_uploaded)
def _get_value_iri(
property_name: str,
resource: dict[str, Any],
uuid: str,
) -> str | None:
values_on_server = resource.get(property_name)
if not isinstance(values_on_server, list):
values_on_server = [values_on_server]
# get the IRI of the value that contains the UUID in its text
text_and_iris = ((v["knora-api:textValueAsXml"], v["@id"]) for v in values_on_server)
value_iri: str | None = next((iri for text, iri in text_and_iris if uuid in text), None)
# in case that "value_iri" is None, the value that contains the UUID in its text does not exist in DSP
# no action necessary: this resource will remain in nonapplied_xml_texts,
# which will be handled by the caller
return value_iri
def _upload_stash_item(
stash_item: StandoffStashItem,
res_iri: str,
res_type: str,
res_id: str,
value_iri: str,
iri_resolver: IriResolver,
con: Connection,
context: dict[str, str],
) -> bool:
"""
Upload a single stashed xml text to DSP.
Args:
stash_item: the stashed text value to upload
res_iri: the iri of the resource
res_type: the type of the resource
res_id: the internal id of the resource
value_iri: the iri of the value
iri_resolver: resolver to map ids from the XML file to IRIs in DSP
con: connection to DSP
context: the JSON-LD context of the resource
Returns:
True, if the upload was successful, False otherwise
"""
adjusted_text_value = stash_item.value.with_iris(iri_resolver)
payload = _create_XMLResource_json_object_to_update(
res_iri,
res_type,
stash_item.prop_name,
value_iri,
adjusted_text_value,
context,
)
try:
con.put(route="/v2/values", data=payload)
except BaseError as err:
_log_unable_to_upload_xml_resource(err, res_id, stash_item.prop_name)
return False
logger.debug(f' Successfully uploaded xml text of "{stash_item.prop_name}"')
return True