Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(test): turn xml_utils.py into pytest #828

Merged
merged 7 commits into from
Feb 26, 2024
Merged
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
97 changes: 54 additions & 43 deletions test/unittests/utils/test_xml_utils.py
Original file line number Diff line number Diff line change
@@ -1,56 +1,67 @@
import unittest
from typing import Any

import pytest
import regex
from lxml import etree

from dsp_tools.utils.xml_utils import parse_and_clean_xml_file

# ruff: noqa: PT009 (pytest-unittest-assertion) (remove this line when pytest is used instead of unittest)

@pytest.fixture()
def data_systematic_unclean() -> Any:
return etree.parse("testdata/xml-data/test-data-systematic.xml")
Nora-Olivia-Ammann marked this conversation as resolved.
Show resolved Hide resolved


@pytest.fixture()
def data_systematic_cleaned() -> etree._Element:
return parse_and_clean_xml_file("testdata/xml-data/test-data-systematic.xml")


def clean_resulting_tree(tree: etree._Element) -> str:
cleaned_str = regex.sub("\n", "", etree.tostring(tree, encoding=str))
return regex.sub(" +", " ", cleaned_str)


class TestParseAndCleanXML(unittest.TestCase):
def test_parse_and_clean_xml_file_same_regardless_of_input(self) -> None:
test_data_systematic_tree = etree.parse("testdata/xml-data/test-data-systematic.xml")
from_file = parse_and_clean_xml_file("testdata/xml-data/test-data-systematic.xml")
from_tree = parse_and_clean_xml_file(test_data_systematic_tree)
cleaned_from_file = clean_resulting_tree(from_file)
cleaned_from_tree = clean_resulting_tree(from_tree)
self.assertEqual(
cleaned_from_file,
cleaned_from_tree,
msg="The output must be equal, regardless if the input is a path or parsed.",
)

def test_annotations_regions_links_before(self) -> None:
test_data_systematic_tree = etree.parse("testdata/xml-data/test-data-systematic.xml")
annotations_regions_links_before = [
e for e in test_data_systematic_tree.iter() if regex.search("annotation|region|link", str(e.tag))
]
self.assertGreater(len(annotations_regions_links_before), 0)

def test_annotations_regions_links_after(self) -> None:
from_file = parse_and_clean_xml_file("testdata/xml-data/test-data-systematic.xml")
annotations_regions_links_after = [
e for e in from_file.iter() if regex.search("annotation|region|link", str(e.tag))
]
self.assertEqual(
len(annotations_regions_links_after),
0,
msg="The tags <annotation>, <region>, and <link> must be transformed to their technically correct form "
'<resource restype="Annotation/Region/LinkObj">',
)

def test_comment_removal(self) -> None:
from_file = parse_and_clean_xml_file("testdata/xml-data/test-data-systematic.xml")
comments = [e for e in from_file.iter() if isinstance(e, etree._Comment)]
self.assertEqual(
len(comments),
0,
msg="properties that are commented out would break the the constructor of the class XMLProperty, "
"if they are not removed in the parsing process",
)
def test_parse_and_clean_xml_file_same_regardless_of_input(
data_systematic_unclean: Any, data_systematic_cleaned: etree._Element
) -> None:
from_tree = parse_and_clean_xml_file(data_systematic_unclean)
cleaned_from_file = clean_resulting_tree(data_systematic_cleaned)
Nora-Olivia-Ammann marked this conversation as resolved.
Show resolved Hide resolved
cleaned_from_tree = clean_resulting_tree(from_tree)
assert (
cleaned_from_file == cleaned_from_tree
), "The output must be equal, regardless if the input is a path or parsed."


def test_annotations_regions_links_before(data_systematic_unclean: Any) -> None:
annotations_regions_links_before = [
e for e in data_systematic_unclean.iter() if regex.search("annotation|region|link", str(e.tag))
]
assert len(annotations_regions_links_before) == 5

Nora-Olivia-Ammann marked this conversation as resolved.
Show resolved Hide resolved

def test_annotations_regions_links_after(data_systematic_cleaned: etree._Element) -> None:
annotations_regions_links_after = [
e for e in data_systematic_cleaned.iter() if regex.search("annotation|region|link", str(e.tag))
]
assert len(annotations_regions_links_after) == 0, (
"The tags <annotation>, <region>, and <link> must be transformed "
'to their technically correct form <resource restype="Annotation/Region/LinkObj">'
)


def test_comment_removal_before(data_systematic_unclean: Any) -> None:
comments = [e for e in data_systematic_unclean.iter() if isinstance(e, etree._Comment)]
assert len(comments) == 7


def test_comment_removal_after(data_systematic_cleaned: etree._Element) -> None:
comments = [e for e in data_systematic_cleaned.iter() if isinstance(e, etree._Comment)]
assert len(comments) == 0, (
"properties that are commented out would break the the constructor of the class XMLProperty, "
"if they are not removed in the parsing process"
)


if __name__ == "__main__":
pytest.main([__file__])