Skip to content

Commit

Permalink
feat(excel2xml): allow for single tags in XML text (DEV-3427) (#885)
Browse files Browse the repository at this point in the history
  • Loading branch information
Nora-Olivia-Ammann committed Mar 20, 2024
1 parent 7818325 commit c73b126
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 2 deletions.
4 changes: 2 additions & 2 deletions src/dsp_tools/commands/excel2xml/excel2xml_lib.py
Expand Up @@ -1416,9 +1416,9 @@ def _escape_reserved_chars(text: str) -> str:
"code",
]
allowed_tags_regex = "|".join(allowed_tags)
lookahead = rf"(?!/?({allowed_tags_regex})>)"
lookbehind = rf"(?<!</?({allowed_tags_regex}))"
lookahead = rf"(?!/?({allowed_tags_regex})/?>)"
illegal_lt = rf"<{lookahead}"
lookbehind = rf"(?<!</?({allowed_tags_regex})/?)"
illegal_gt = rf"{lookbehind}>"
illegal_amp = r"&(?![#a-zA-Z0-9]+;)"
text = regex.sub(illegal_lt, "&lt;", text)
Expand Down
37 changes: 37 additions & 0 deletions test/unittests/commands/excel2xml/test_excel2xml_lib.py
Expand Up @@ -14,6 +14,7 @@
from lxml import etree

from dsp_tools import excel2xml
from dsp_tools.commands.excel2xml.excel2xml_lib import _escape_reserved_chars
from dsp_tools.models.exceptions import BaseError

# ruff: noqa: PT009 (pytest-unittest-assertion) (remove this line when pytest is used instead of unittest)
Expand Down Expand Up @@ -823,6 +824,42 @@ def test_create_json_list_mapping(self) -> None:
self.assertDictEqual(testlist_mapping_returned, testlist_mapping_expected)


class TestEscapedChars:
def test_single_br(self) -> None:
test_text = "Text <br/> text after"
res = _escape_reserved_chars(test_text)
assert res == test_text

def test_single_br_with_other(self) -> None:
test_text = "Text <br/>> text after"
expected = "Text <br/>&gt; text after"
res = _escape_reserved_chars(test_text)
assert res == expected

def test_wrong_single_br(self) -> None:
test_text = "Text <br//> text after"
expected = "Text &lt;br//&gt; text after"
res = _escape_reserved_chars(test_text)
assert res == expected

def test_emphasis(self) -> None:
test_text = "Text before [<em>emphasis</em>] Text after illegal amp: &"
expected = "Text before [<em>emphasis</em>] Text after illegal amp: &amp;"
res = _escape_reserved_chars(test_text)
assert res == expected

def test_link(self) -> None:
test_text = 'Before <a class="salsah-link" href="IRI:link:IRI">link</a> after'
res = _escape_reserved_chars(test_text)
assert res == test_text

def test_illegal_angular(self) -> None:
test_text = "Before <TagNotKnown>in tags</TagNotKnown> After."
expected = "Before &lt;TagNotKnown&gt;in tags&lt;/TagNotKnown&gt; After."
res = _escape_reserved_chars(test_text)
assert res == expected


def _strip_namespace(element: etree._Element) -> str:
"""Removes the namespace from the XML element."""
xml = etree.tostring(element, encoding="unicode")
Expand Down

0 comments on commit c73b126

Please sign in to comment.