Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(xml_validation): turn into pytest #815

Merged
merged 5 commits into from
Feb 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/dsp_tools/cli/call_action.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from dsp_tools.commands.xmlupload.upload_config import DiagnosticsConfig, UploadConfig
from dsp_tools.commands.xmlupload.xmlupload import xmlupload
from dsp_tools.utils.create_logger import get_logger
from dsp_tools.utils.validate_data_xml import validate_xml
from dsp_tools.utils.xml_validation import validate_xml

logger = get_logger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion src/dsp_tools/commands/excel2xml/excel2xml_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from dsp_tools.utils.date_util import is_full_date
from dsp_tools.utils.shared import check_notna, simplify_name
from dsp_tools.utils.uri_util import is_uri
from dsp_tools.utils.validate_data_xml import validate_xml
from dsp_tools.utils.xml_validation import validate_xml

# ruff: noqa: E501 (line-too-long)

Expand Down
2 changes: 1 addition & 1 deletion src/dsp_tools/commands/xmlupload/read_validate_xml_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
from dsp_tools.models.exceptions import UserError
from dsp_tools.utils.create_logger import get_logger
from dsp_tools.utils.iri_util import is_resource_iri
from dsp_tools.utils.validate_data_xml import validate_xml
from dsp_tools.utils.xml_utils import parse_and_clean_xml_file
from dsp_tools.utils.xml_validation import validate_xml

logger = get_logger(__name__)

Expand Down
133 changes: 0 additions & 133 deletions test/unittests/utils/test_validate_xml_against_schema.py

This file was deleted.

71 changes: 71 additions & 0 deletions test/unittests/utils/test_xml_validation_high_level.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import pytest
from lxml import etree

from dsp_tools.models.exceptions import InputError
from dsp_tools.utils.xml_validation import validate_xml


def test_validate_xml_data_systematic() -> None:
assert validate_xml(input_file="testdata/xml-data/test-data-systematic.xml") is True


def test_validate_xml_data_minimal() -> None:
assert validate_xml(input_file=etree.parse(source="testdata/xml-data/test-data-minimal.xml")) is True


def test_validate_xml_invalid_resource_tag_line_twelve() -> None:
with pytest.raises(
InputError,
match=(
r"""The XML file cannot be uploaded due to the following validation error\(s\)\:
Line 12\: Element 'resource', attribute 'invalidtag'\: The attribute 'invalidtag' is not allowed\."""
),
):
validate_xml(input_file="testdata/invalid-testdata/xml-data/invalid-resource-tag.xml")


def test_validate_xml_invalid_resource_tag_problem() -> None:
with pytest.raises(
InputError,
match=r"XML-tags are not allowed in text properties with encoding=utf8\.\n"
r"The following resources of your XML file violate this rule:\n"
r" - line 13: resource 'the_only_resource', property ':test'\n"
r" - line 14: resource 'the_only_resource', property ':test'\n"
r" - line 15: resource 'the_only_resource', property ':test'\n"
r" - line 16: resource 'the_only_resource', property ':test'",
):
validate_xml(input_file="testdata/invalid-testdata/xml-data/utf8-text-with-xml-tags.xml")


def test_validate_xml_data_duplicate_iri() -> None:
with pytest.raises(
InputError,
match=r"The XML file cannot be uploaded due to the following validation error\(s\)\:\n"
r" Line 19\: Element 'resource'\: Duplicate key-sequence \['http://rdfh.ch/4123/54SYvWF0QUW6a'\] "
r"in unique identity-constraint 'IRI_attribute_of_resource_must_be_unique'\.",
):
validate_xml(input_file="testdata/invalid-testdata/xml-data/duplicate-iri.xml")


def test_validate_xml_duplicate_ark() -> None:
with pytest.raises(
InputError,
match=r"The XML file cannot be uploaded due to the following validation error\(s\)\:\n"
r" Line 19\: Element 'resource'\: Duplicate key-sequence \['ark\:/72163/4123-31ec6eab334-a.2022829'\] "
r"in unique identity-constraint 'ARK_attribute_of_resource_must_be_unique'\.",
):
validate_xml(input_file="testdata/invalid-testdata/xml-data/duplicate-ark.xml")


def test_validate_xml_empty_label() -> None:
with pytest.raises(
InputError,
match=r"The XML file cannot be uploaded due to the following validation error\(s\)\:\n"
r" Line 11\: Element 'resource', attribute 'label'\: \[facet 'minLength'\] "
r"The value '' has a length of '0'; this underruns the allowed minimum length of '1'\.",
):
validate_xml(input_file="testdata/invalid-testdata/xml-data/empty-label.xml")


if __name__ == "__main__":
pytest.main([__file__])
78 changes: 78 additions & 0 deletions test/unittests/utils/test_xml_validation_low_level.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import pytest
from lxml import etree

from dsp_tools.utils.xml_validation import _find_xml_tags_in_simple_text_elements


class TestFindXMLTagsInUTF8:
def test_find_xml_tags_in_simple_text_elements_all_good(self) -> None:
allowed_html_escapes = [
"(<2cm) (>10cm)",
"text < text/>",
"text < text> & text",
"text <text text > text",
'text < text text="text"> text',
'text <text text="text" > text',
]
utf8_texts_with_allowed_html_escapes = [
f"""
<knora shortcode="4123" default-ontology="testonto">
<resource label="label" restype=":restype" id="id">
<text-prop name=":name">
<text encoding="utf8">{txt}</text>
</text-prop>
</resource>
</knora>
"""
for txt in allowed_html_escapes
]
for xml in utf8_texts_with_allowed_html_escapes:
all_good, msg = _find_xml_tags_in_simple_text_elements(etree.fromstring(xml))
assert all_good is True
assert msg == ""

def test_find_xml_tags_in_simple_text_elements_forbidden_escapes(self) -> None:
test_ele = etree.fromstring(
"""
<knora shortcode="4123" default-ontology="testonto">
<resource label="label" restype=":restype" id="id">
<text-prop name=":name">
<text encoding="utf8">&lt;tag s="t"&gt;</text>
</text-prop>
</resource>
</knora>
"""
)
expected_msg = (
"XML-tags are not allowed in text properties with encoding=utf8.\n"
"The following resources of your XML file violate this rule:\n"
" - line 5: resource 'id', property ':name'"
)
all_good, res_msg = _find_xml_tags_in_simple_text_elements(test_ele)
assert all_good is False
assert res_msg == expected_msg

def test_find_xml_tags_in_simple_text_elements_forbidden_escapes_two(self) -> None:
test_ele = etree.fromstring(
"""
<knora shortcode="4123" default-ontology="testonto">
<resource label="label" restype=":restype" id="id">
<text-prop name=":propName">
<text encoding="utf8">&lt;em&gt;text&lt;/em&gt;</text>
</text-prop>
</resource>
</knora>
"""
)
expected_msg = (
"XML-tags are not allowed in text properties with encoding=utf8.\n"
"The following resources of your XML file violate this rule:\n"
" - line 5: resource 'id', property ':propName'"
)
all_good, res_msg = _find_xml_tags_in_simple_text_elements(test_ele)
assert all_good is False
assert res_msg == expected_msg


if __name__ == "__main__":
pytest.main([__file__])