Skip to content

Commit

Permalink
chore(xml_validation): turn into pytest (#815)
Browse files Browse the repository at this point in the history
  • Loading branch information
Nora-Olivia-Ammann committed Feb 16, 2024
1 parent 652aa9f commit 516a2de
Show file tree
Hide file tree
Showing 7 changed files with 152 additions and 136 deletions.
2 changes: 1 addition & 1 deletion src/dsp_tools/cli/call_action.py
Expand Up @@ -21,7 +21,7 @@
from dsp_tools.commands.xmlupload.upload_config import DiagnosticsConfig, UploadConfig
from dsp_tools.commands.xmlupload.xmlupload import xmlupload
from dsp_tools.utils.create_logger import get_logger
from dsp_tools.utils.validate_data_xml import validate_xml
from dsp_tools.utils.xml_validation import validate_xml

logger = get_logger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion src/dsp_tools/commands/excel2xml/excel2xml_lib.py
Expand Up @@ -19,7 +19,7 @@
from dsp_tools.utils.date_util import is_full_date
from dsp_tools.utils.shared import check_notna, simplify_name
from dsp_tools.utils.uri_util import is_uri
from dsp_tools.utils.validate_data_xml import validate_xml
from dsp_tools.utils.xml_validation import validate_xml

# ruff: noqa: E501 (line-too-long)

Expand Down
2 changes: 1 addition & 1 deletion src/dsp_tools/commands/xmlupload/read_validate_xml_file.py
Expand Up @@ -9,8 +9,8 @@
from dsp_tools.models.exceptions import UserError
from dsp_tools.utils.create_logger import get_logger
from dsp_tools.utils.iri_util import is_resource_iri
from dsp_tools.utils.validate_data_xml import validate_xml
from dsp_tools.utils.xml_utils import parse_and_clean_xml_file
from dsp_tools.utils.xml_validation import validate_xml

logger = get_logger(__name__)

Expand Down
File renamed without changes.
133 changes: 0 additions & 133 deletions test/unittests/utils/test_validate_xml_against_schema.py

This file was deleted.

71 changes: 71 additions & 0 deletions test/unittests/utils/test_xml_validation_high_level.py
@@ -0,0 +1,71 @@
import pytest
from lxml import etree

from dsp_tools.models.exceptions import InputError
from dsp_tools.utils.xml_validation import validate_xml


def test_validate_xml_data_systematic() -> None:
assert validate_xml(input_file="testdata/xml-data/test-data-systematic.xml") is True


def test_validate_xml_data_minimal() -> None:
assert validate_xml(input_file=etree.parse(source="testdata/xml-data/test-data-minimal.xml")) is True


def test_validate_xml_invalid_resource_tag_line_twelve() -> None:
with pytest.raises(
InputError,
match=(
r"""The XML file cannot be uploaded due to the following validation error\(s\)\:
Line 12\: Element 'resource', attribute 'invalidtag'\: The attribute 'invalidtag' is not allowed\."""
),
):
validate_xml(input_file="testdata/invalid-testdata/xml-data/invalid-resource-tag.xml")


def test_validate_xml_invalid_resource_tag_problem() -> None:
with pytest.raises(
InputError,
match=r"XML-tags are not allowed in text properties with encoding=utf8\.\n"
r"The following resources of your XML file violate this rule:\n"
r" - line 13: resource 'the_only_resource', property ':test'\n"
r" - line 14: resource 'the_only_resource', property ':test'\n"
r" - line 15: resource 'the_only_resource', property ':test'\n"
r" - line 16: resource 'the_only_resource', property ':test'",
):
validate_xml(input_file="testdata/invalid-testdata/xml-data/utf8-text-with-xml-tags.xml")


def test_validate_xml_data_duplicate_iri() -> None:
with pytest.raises(
InputError,
match=r"The XML file cannot be uploaded due to the following validation error\(s\)\:\n"
r" Line 19\: Element 'resource'\: Duplicate key-sequence \['http://rdfh.ch/4123/54SYvWF0QUW6a'\] "
r"in unique identity-constraint 'IRI_attribute_of_resource_must_be_unique'\.",
):
validate_xml(input_file="testdata/invalid-testdata/xml-data/duplicate-iri.xml")


def test_validate_xml_duplicate_ark() -> None:
with pytest.raises(
InputError,
match=r"The XML file cannot be uploaded due to the following validation error\(s\)\:\n"
r" Line 19\: Element 'resource'\: Duplicate key-sequence \['ark\:/72163/4123-31ec6eab334-a.2022829'\] "
r"in unique identity-constraint 'ARK_attribute_of_resource_must_be_unique'\.",
):
validate_xml(input_file="testdata/invalid-testdata/xml-data/duplicate-ark.xml")


def test_validate_xml_empty_label() -> None:
with pytest.raises(
InputError,
match=r"The XML file cannot be uploaded due to the following validation error\(s\)\:\n"
r" Line 11\: Element 'resource', attribute 'label'\: \[facet 'minLength'\] "
r"The value '' has a length of '0'; this underruns the allowed minimum length of '1'\.",
):
validate_xml(input_file="testdata/invalid-testdata/xml-data/empty-label.xml")


if __name__ == "__main__":
pytest.main([__file__])
78 changes: 78 additions & 0 deletions test/unittests/utils/test_xml_validation_low_level.py
@@ -0,0 +1,78 @@
import pytest
from lxml import etree

from dsp_tools.utils.xml_validation import _find_xml_tags_in_simple_text_elements


class TestFindXMLTagsInUTF8:
def test_find_xml_tags_in_simple_text_elements_all_good(self) -> None:
allowed_html_escapes = [
"(<2cm) (>10cm)",
"text < text/>",
"text < text> & text",
"text <text text > text",
'text < text text="text"> text',
'text <text text="text" > text',
]
utf8_texts_with_allowed_html_escapes = [
f"""
<knora shortcode="4123" default-ontology="testonto">
<resource label="label" restype=":restype" id="id">
<text-prop name=":name">
<text encoding="utf8">{txt}</text>
</text-prop>
</resource>
</knora>
"""
for txt in allowed_html_escapes
]
for xml in utf8_texts_with_allowed_html_escapes:
all_good, msg = _find_xml_tags_in_simple_text_elements(etree.fromstring(xml))
assert all_good is True
assert msg == ""

def test_find_xml_tags_in_simple_text_elements_forbidden_escapes(self) -> None:
test_ele = etree.fromstring(
"""
<knora shortcode="4123" default-ontology="testonto">
<resource label="label" restype=":restype" id="id">
<text-prop name=":name">
<text encoding="utf8">&lt;tag s="t"&gt;</text>
</text-prop>
</resource>
</knora>
"""
)
expected_msg = (
"XML-tags are not allowed in text properties with encoding=utf8.\n"
"The following resources of your XML file violate this rule:\n"
" - line 5: resource 'id', property ':name'"
)
all_good, res_msg = _find_xml_tags_in_simple_text_elements(test_ele)
assert all_good is False
assert res_msg == expected_msg

def test_find_xml_tags_in_simple_text_elements_forbidden_escapes_two(self) -> None:
test_ele = etree.fromstring(
"""
<knora shortcode="4123" default-ontology="testonto">
<resource label="label" restype=":restype" id="id">
<text-prop name=":propName">
<text encoding="utf8">&lt;em&gt;text&lt;/em&gt;</text>
</text-prop>
</resource>
</knora>
"""
)
expected_msg = (
"XML-tags are not allowed in text properties with encoding=utf8.\n"
"The following resources of your XML file violate this rule:\n"
" - line 5: resource 'id', property ':propName'"
)
all_good, res_msg = _find_xml_tags_in_simple_text_elements(test_ele)
assert all_good is False
assert res_msg == expected_msg


if __name__ == "__main__":
pytest.main([__file__])

0 comments on commit 516a2de

Please sign in to comment.