diff --git a/src/dsp_tools/commands/xmlupload/read_validate_xml_file.py b/src/dsp_tools/commands/xmlupload/read_validate_xml_file.py
index 8036ab07a..1aa8cb0ac 100644
--- a/src/dsp_tools/commands/xmlupload/read_validate_xml_file.py
+++ b/src/dsp_tools/commands/xmlupload/read_validate_xml_file.py
@@ -3,10 +3,12 @@
from pathlib import Path
from typing import Any, Union
+import regex
from lxml import etree
from dsp_tools.models.exceptions import UserError
from dsp_tools.utils.create_logger import get_logger
+from dsp_tools.utils.iri_util import is_resource_iri
from dsp_tools.utils.shared import validate_xml_against_schema
from dsp_tools.utils.xml_utils import parse_and_clean_xml_file
@@ -34,15 +36,62 @@ def validate_and_parse_xml_file(
"""
validate_xml_against_schema(input_file=input_file)
root = parse_and_clean_xml_file(input_file=input_file)
+ _check_if_link_targets_exist(root)
if not preprocessing_done:
- check_if_bitstreams_exist(root=root, imgdir=imgdir)
+ _check_if_bitstreams_exist(root=root, imgdir=imgdir)
shortcode = root.attrib["shortcode"]
default_ontology = root.attrib["default-ontology"]
logger.info(f"Validated and parsed the XML file. {shortcode=:} and {default_ontology=:}")
return default_ontology, root, shortcode
-def check_if_bitstreams_exist(
+def _check_if_link_targets_exist(root: etree._Element) -> None:
+ """
+ Make sure that all targets of links (resptr and salsah-links)
+ are either IRIsl or IDs that exist in the present XML file.
+
+ Args:
+ root: parsed XML file
+
+ Raises:
+ UserError: if a link target does not exist in the XML file
+ """
+ resptr_errors = _check_if_resptr_targets_exist(root)
+ salsah_errors = _check_if_salsah_targets_exist(root)
+ errors = resptr_errors + salsah_errors
+ if errors:
+ sep = "\n - "
+ msg = f"It is not possible to upload the XML file, because it contains invalid links:{sep}" + sep.join(errors)
+ raise UserError(msg)
+
+
+def _check_if_resptr_targets_exist(root: etree._Element) -> list[str]:
+ link_values = [x for x in root.iter() if x.tag == "resptr"]
+ resource_ids = [x.attrib["id"] for x in root.iter() if x.tag == "resource"]
+ invalid_link_values = [x for x in link_values if x.text not in resource_ids]
+ invalid_link_values = [x for x in invalid_link_values if not is_resource_iri(str(x.text))]
+ errors = []
+ for inv in invalid_link_values:
+ prop_name = next(inv.iterancestors(tag="resptr-prop")).attrib["name"]
+ res_id = next(inv.iterancestors(tag="resource")).attrib["id"]
+ errors.append(f"Resource '{res_id}', property '{prop_name}' has an invalid link target '{inv.text}'")
+ return errors
+
+
+def _check_if_salsah_targets_exist(root: etree._Element) -> list[str]:
+ link_values = [x for x in root.iter() if x.tag == "a"]
+ resource_ids = [x.attrib["id"] for x in root.iter() if x.tag == "resource"]
+ invalid_link_values = [x for x in link_values if regex.sub(r"IRI:|:IRI", "", x.attrib["href"]) not in resource_ids]
+ invalid_link_values = [x for x in invalid_link_values if not is_resource_iri(x.attrib["href"])]
+ errors = []
+ for inv in invalid_link_values:
+ prop_name = next(inv.iterancestors(tag="text-prop")).attrib["name"]
+ res_id = next(inv.iterancestors(tag="resource")).attrib["id"]
+ errors.append(f"Resource '{res_id}', property '{prop_name}' has an invalid link target '{inv.attrib['href']}'")
+ return errors
+
+
+def _check_if_bitstreams_exist(
root: etree._Element,
imgdir: str,
) -> None:
diff --git a/test/unittests/commands/xmlupload/test_read_validate_xml_file.py b/test/unittests/commands/xmlupload/test_read_validate_xml_file.py
new file mode 100644
index 000000000..be8d7420c
--- /dev/null
+++ b/test/unittests/commands/xmlupload/test_read_validate_xml_file.py
@@ -0,0 +1,98 @@
+from lxml import etree
+
+from dsp_tools.commands.xmlupload.read_validate_xml_file import (
+ _check_if_resptr_targets_exist,
+ _check_if_salsah_targets_exist,
+)
+
+
+def test_check_if_resptr_targets_exist() -> None:
+ """Check correct input"""
+ xml = """
+
+
+ resource2
+
+
+ resource1
+
+
+ """
+ root = etree.fromstring(xml)
+ errors_returned = _check_if_resptr_targets_exist(root)
+ assert not errors_returned
+
+
+def test_check_if_resptr_targets_exist_invalid() -> None:
+ """Check invalid input"""
+ xml = """
+
+
+ resource3
+
+
+ resource4
+
+
+ """
+ root = etree.fromstring(xml)
+ errors_returned = _check_if_resptr_targets_exist(root)
+ errors_expected = [
+ "Resource 'resource1', property 'resptr1' has an invalid link target 'resource3'",
+ "Resource 'resource2', property 'resptr2' has an invalid link target 'resource4'",
+ ]
+ assert errors_returned == errors_expected
+
+
+def test_check_if_salsah_targets_exist() -> None:
+ """Check correct input"""
+ xml = """
+
+
+
+
+ resource2
+
+
+
+
+
+
+ resource1
+
+
+
+
+ """
+ root = etree.fromstring(xml)
+ errors_returned = _check_if_salsah_targets_exist(root)
+ assert not errors_returned
+
+
+def test_check_if_salsah_targets_exist_invalid() -> None:
+ """Check invalid input"""
+ xml = """
+
+
+
+
+ resource3
+
+
+
+
+
+
+ resource4
+
+
+
+
+ """
+ root = etree.fromstring(xml)
+ errors_returned = _check_if_salsah_targets_exist(root)
+ errors_expected = [
+ "Resource 'resource1', property 'text1' has an invalid link target 'IRI:resource3:IRI'",
+ "Resource 'resource2', property 'text2' has an invalid link target 'IRI:resource4:IRI'",
+ ]
+ assert errors_returned == errors_expected