Skip to content

Commit

Permalink
Fixed CDATA parsing issues. bug 680248
Browse files Browse the repository at this point in the history
  • Loading branch information
mattbasta committed Sep 11, 2011
1 parent daeb665 commit 717c07f
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 15 deletions.
19 changes: 19 additions & 0 deletions tests/test_markup_markuptester.py
Expand Up @@ -96,6 +96,25 @@ def test_cdata_properly():
</script>
</foo>""", "foo.xul", should_fail=False)

# Test that there are no problems if the CDATA element starts or ends on
# the same line as the parent tag.
err = _do_test_raw("""<foo>
<script><![CDATA[
<button><p><span><foo>
</bar></zap>
<selfclosing />
<><><><""><!><
]]></script>
</foo>""", "foo.xul", should_fail=False)

# Test that there are no problems if multiple CDATA elements open and
# close on the same line.
err = _do_test_raw("""<foo>
<foo><![CDATA[</bar></foo>]]></foo><![CDATA[
<![CDATA[ <-- Should be ignored since we're buffering.</bar><zap>
]]>
</foo>""", "foo.xul", should_fail=False)

err = _do_test_raw("""<foo>
<![CDATA[
<button><p><span><foo>
Expand Down
43 changes: 28 additions & 15 deletions validator/testcases/markup/markuptester.py
Expand Up @@ -68,33 +68,46 @@ def process(self, filename, data, extension="xul"):
self.context = ContextGenerator(data)

lines = data.split("\n")
force_buffer = False

buffering = False
pline = 0
for line in lines:
self.line += 1

search_line = line
while True:
# CDATA elements are gross. Pass the whole entity as one chunk
if "<![CDATA[" in search_line and not force_buffer:
# If a CDATA element is found, push it and its contents to the
# buffer. Push everything previous to it to the parser.
if "<![CDATA[" in search_line and not buffering:
# Find the CDATA element.
cdatapos = search_line.find("<![CDATA[")
post_cdata = search_line[cdatapos:]

if "]]>" in post_cdata:
search_line = post_cdata[post_cdata.find("]]>") + 3:]
continue
force_buffer = True
elif "]]>" in search_line and force_buffer:
force_buffer = False

# If the element isn't at the start of the line, pass
# everything before it to the parser.
if cdatapos:
self._feed_parser(search_line[:cdatapos])
# Collect the rest of the line to send it to the buffer.
search_line = search_line[cdatapos:]
buffering = True
continue

elif "]]>" in search_line and buffering:
# If we find the end element on the line being scanned,
# buffer everything up to the end of it, and let the rest
# of the line pass through for further processing.
end_cdatapos = search_line.find("]]>") + 3
self._save_to_buffer(search_line[:end_cdatapos])
search_line = search_line[end_cdatapos:]
buffering = False
break

if force_buffer:
self._save_to_buffer(line + "\n")
if buffering:
self._save_to_buffer(search_line + "\n")
else:
self._feed_parser(line)
self._feed_parser(search_line)

def _feed_parser(self, line):
"Feeds data into the parser"
"""Feed incoming data into the underlying HTMLParser."""

line = unicodehelper.decode(line)

Expand Down

0 comments on commit 717c07f

Please sign in to comment.