Fixed CDATA parsing issues. bug 680248

fox2mike · Sep 11, 2011 · 717c07f · 717c07f
1 parent daeb665
commit 717c07f
Show file tree

Hide file tree

Showing 2 changed files with 47 additions and 15 deletions.
diff --git a/tests/test_markup_markuptester.py b/tests/test_markup_markuptester.py
@@ -96,6 +96,25 @@ def test_cdata_properly():
     </script>
     </foo>""", "foo.xul", should_fail=False)
 
+    # Test that there are no problems if the CDATA element starts or ends on
+    # the same line as the parent tag.
+    err = _do_test_raw("""<foo>
+    <script><![CDATA[
+    <button><p><span><foo>
+    </bar></zap>
+    <selfclosing />
+    <><><><""><!><
+    ]]></script>
+    </foo>""", "foo.xul", should_fail=False)
+
+    # Test that there are no problems if multiple CDATA elements open and
+    # close on the same line.
+    err = _do_test_raw("""<foo>
+    <foo><![CDATA[</bar></foo>]]></foo><![CDATA[
+    <![CDATA[ <-- Should be ignored since we're buffering.</bar><zap>
+    ]]>
+    </foo>""", "foo.xul", should_fail=False)
+
     err = _do_test_raw("""<foo>
     <![CDATA[
     <button><p><span><foo>

diff --git a/validator/testcases/markup/markuptester.py b/validator/testcases/markup/markuptester.py
@@ -68,33 +68,46 @@ def process(self, filename, data, extension="xul"):
         self.context = ContextGenerator(data)
 
         lines = data.split("\n")
-        force_buffer = False
+
+        buffering = False
         pline = 0
         for line in lines:
             self.line += 1
 
             search_line = line
             while True:
-                # CDATA elements are gross. Pass the whole entity as one chunk
-                if "<![CDATA[" in search_line and not force_buffer:
+                # If a CDATA element is found, push it and its contents to the
+                # buffer. Push everything previous to it to the parser.
+                if "<![CDATA[" in search_line and not buffering:
+                    # Find the CDATA element.
                     cdatapos = search_line.find("<![CDATA[")
-                    post_cdata = search_line[cdatapos:]
-
-                    if "]]>" in post_cdata:
-                        search_line = post_cdata[post_cdata.find("]]>") + 3:]
-                        continue
-                    force_buffer = True
-                elif "]]>" in search_line and force_buffer:
-                    force_buffer = False
+
+                    # If the element isn't at the start of the line, pass
+                    # everything before it to the parser.
+                    if cdatapos:
+                        self._feed_parser(search_line[:cdatapos])
+                    # Collect the rest of the line to send it to the buffer.
+                    search_line = search_line[cdatapos:]
+                    buffering = True
+                    continue
+
+                elif "]]>" in search_line and buffering:
+                    # If we find the end element on the line being scanned,
+                    # buffer everything up to the end of it, and let the rest
+                    # of the line pass through for further processing.
+                    end_cdatapos = search_line.find("]]>") + 3
+                    self._save_to_buffer(search_line[:end_cdatapos])
+                    search_line = search_line[end_cdatapos:]
+                    buffering = False
                 break
 
-            if force_buffer:
-                self._save_to_buffer(line + "\n")
+            if buffering:
+                self._save_to_buffer(search_line + "\n")
             else:
-                self._feed_parser(line)
+                self._feed_parser(search_line)
 
     def _feed_parser(self, line):
-        "Feeds data into the parser"
+        """Feed incoming data into the underlying HTMLParser."""
 
         line = unicodehelper.decode(line)