Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
chore(xml_validation): turn into pytest (#815)
- Loading branch information
1 parent
652aa9f
commit 516a2de
Showing
7 changed files
with
152 additions
and
136 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
133 changes: 0 additions & 133 deletions
133
test/unittests/utils/test_validate_xml_against_schema.py
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
import pytest | ||
from lxml import etree | ||
|
||
from dsp_tools.models.exceptions import InputError | ||
from dsp_tools.utils.xml_validation import validate_xml | ||
|
||
|
||
def test_validate_xml_data_systematic() -> None: | ||
assert validate_xml(input_file="testdata/xml-data/test-data-systematic.xml") is True | ||
|
||
|
||
def test_validate_xml_data_minimal() -> None: | ||
assert validate_xml(input_file=etree.parse(source="testdata/xml-data/test-data-minimal.xml")) is True | ||
|
||
|
||
def test_validate_xml_invalid_resource_tag_line_twelve() -> None: | ||
with pytest.raises( | ||
InputError, | ||
match=( | ||
r"""The XML file cannot be uploaded due to the following validation error\(s\)\: | ||
Line 12\: Element 'resource', attribute 'invalidtag'\: The attribute 'invalidtag' is not allowed\.""" | ||
), | ||
): | ||
validate_xml(input_file="testdata/invalid-testdata/xml-data/invalid-resource-tag.xml") | ||
|
||
|
||
def test_validate_xml_invalid_resource_tag_problem() -> None: | ||
with pytest.raises( | ||
InputError, | ||
match=r"XML-tags are not allowed in text properties with encoding=utf8\.\n" | ||
r"The following resources of your XML file violate this rule:\n" | ||
r" - line 13: resource 'the_only_resource', property ':test'\n" | ||
r" - line 14: resource 'the_only_resource', property ':test'\n" | ||
r" - line 15: resource 'the_only_resource', property ':test'\n" | ||
r" - line 16: resource 'the_only_resource', property ':test'", | ||
): | ||
validate_xml(input_file="testdata/invalid-testdata/xml-data/utf8-text-with-xml-tags.xml") | ||
|
||
|
||
def test_validate_xml_data_duplicate_iri() -> None: | ||
with pytest.raises( | ||
InputError, | ||
match=r"The XML file cannot be uploaded due to the following validation error\(s\)\:\n" | ||
r" Line 19\: Element 'resource'\: Duplicate key-sequence \['http://rdfh.ch/4123/54SYvWF0QUW6a'\] " | ||
r"in unique identity-constraint 'IRI_attribute_of_resource_must_be_unique'\.", | ||
): | ||
validate_xml(input_file="testdata/invalid-testdata/xml-data/duplicate-iri.xml") | ||
|
||
|
||
def test_validate_xml_duplicate_ark() -> None: | ||
with pytest.raises( | ||
InputError, | ||
match=r"The XML file cannot be uploaded due to the following validation error\(s\)\:\n" | ||
r" Line 19\: Element 'resource'\: Duplicate key-sequence \['ark\:/72163/4123-31ec6eab334-a.2022829'\] " | ||
r"in unique identity-constraint 'ARK_attribute_of_resource_must_be_unique'\.", | ||
): | ||
validate_xml(input_file="testdata/invalid-testdata/xml-data/duplicate-ark.xml") | ||
|
||
|
||
def test_validate_xml_empty_label() -> None: | ||
with pytest.raises( | ||
InputError, | ||
match=r"The XML file cannot be uploaded due to the following validation error\(s\)\:\n" | ||
r" Line 11\: Element 'resource', attribute 'label'\: \[facet 'minLength'\] " | ||
r"The value '' has a length of '0'; this underruns the allowed minimum length of '1'\.", | ||
): | ||
validate_xml(input_file="testdata/invalid-testdata/xml-data/empty-label.xml") | ||
|
||
|
||
if __name__ == "__main__": | ||
pytest.main([__file__]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
import pytest | ||
from lxml import etree | ||
|
||
from dsp_tools.utils.xml_validation import _find_xml_tags_in_simple_text_elements | ||
|
||
|
||
class TestFindXMLTagsInUTF8: | ||
def test_find_xml_tags_in_simple_text_elements_all_good(self) -> None: | ||
allowed_html_escapes = [ | ||
"(<2cm) (>10cm)", | ||
"text < text/>", | ||
"text < text> & text", | ||
"text <text text > text", | ||
'text < text text="text"> text', | ||
'text <text text="text" > text', | ||
] | ||
utf8_texts_with_allowed_html_escapes = [ | ||
f""" | ||
<knora shortcode="4123" default-ontology="testonto"> | ||
<resource label="label" restype=":restype" id="id"> | ||
<text-prop name=":name"> | ||
<text encoding="utf8">{txt}</text> | ||
</text-prop> | ||
</resource> | ||
</knora> | ||
""" | ||
for txt in allowed_html_escapes | ||
] | ||
for xml in utf8_texts_with_allowed_html_escapes: | ||
all_good, msg = _find_xml_tags_in_simple_text_elements(etree.fromstring(xml)) | ||
assert all_good is True | ||
assert msg == "" | ||
|
||
def test_find_xml_tags_in_simple_text_elements_forbidden_escapes(self) -> None: | ||
test_ele = etree.fromstring( | ||
""" | ||
<knora shortcode="4123" default-ontology="testonto"> | ||
<resource label="label" restype=":restype" id="id"> | ||
<text-prop name=":name"> | ||
<text encoding="utf8"><tag s="t"></text> | ||
</text-prop> | ||
</resource> | ||
</knora> | ||
""" | ||
) | ||
expected_msg = ( | ||
"XML-tags are not allowed in text properties with encoding=utf8.\n" | ||
"The following resources of your XML file violate this rule:\n" | ||
" - line 5: resource 'id', property ':name'" | ||
) | ||
all_good, res_msg = _find_xml_tags_in_simple_text_elements(test_ele) | ||
assert all_good is False | ||
assert res_msg == expected_msg | ||
|
||
def test_find_xml_tags_in_simple_text_elements_forbidden_escapes_two(self) -> None: | ||
test_ele = etree.fromstring( | ||
""" | ||
<knora shortcode="4123" default-ontology="testonto"> | ||
<resource label="label" restype=":restype" id="id"> | ||
<text-prop name=":propName"> | ||
<text encoding="utf8"><em>text</em></text> | ||
</text-prop> | ||
</resource> | ||
</knora> | ||
""" | ||
) | ||
expected_msg = ( | ||
"XML-tags are not allowed in text properties with encoding=utf8.\n" | ||
"The following resources of your XML file violate this rule:\n" | ||
" - line 5: resource 'id', property ':propName'" | ||
) | ||
all_good, res_msg = _find_xml_tags_in_simple_text_elements(test_ele) | ||
assert all_good is False | ||
assert res_msg == expected_msg | ||
|
||
|
||
if __name__ == "__main__": | ||
pytest.main([__file__]) |