From aba01a1d1fd495f1acbe0f04d64bc64123743549 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Tue, 3 Apr 2012 07:31:38 -0400 Subject: [PATCH 1/4] Test to reproduce issue #643. --- test/xml/test_document_fragment.rb | 35 +++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/test/xml/test_document_fragment.rb b/test/xml/test_document_fragment.rb index d7c6dfa27a..6abe4e86b3 100644 --- a/test/xml/test_document_fragment.rb +++ b/test/xml/test_document_fragment.rb @@ -176,15 +176,34 @@ def awesome! assert fragment.children.respond_to?(:awesome!), fragment.children.class end - def test_for_libxml_in_context_fragment_parsing_bug_workaround - 10.times do - begin - fragment = Nokogiri::XML.fragment("
") - parent = fragment.children.first - child = parent.parse("

").first - parent.add_child child + if Nokogiri.uses_libxml? + def test_for_libxml_in_context_fragment_parsing_bug_workaround + 10.times do + begin + fragment = Nokogiri::XML.fragment("
") + parent = fragment.children.first + child = parent.parse("

").first + parent.add_child child + end + GC.start end - GC.start + end + + def test_for_libxml_in_context_memory_badness_when_encountering_encoding_errors + # see issue #643 for background + # this test exists solely to raise an error during valgrind test runs. + html = <<-EOHTML + + + + + +
Foo
+ + +EOHTML + doc = Nokogiri::HTML html + doc.at_css("div").replace("Bar") end end end From bda2bcb6c83fc10e27c0742e1222ce653620359b Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Fri, 27 Apr 2012 08:17:00 -0400 Subject: [PATCH 2/4] Fixing segfault related to unsupported encodings in in-context parsing on 1.8.7. Closes #643. --- ext/nokogiri/xml_node.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/nokogiri/xml_node.c b/ext/nokogiri/xml_node.c index c8b7e3bc9d..6bc704f830 100644 --- a/ext/nokogiri/xml_node.c +++ b/ext/nokogiri/xml_node.c @@ -1219,7 +1219,7 @@ static VALUE process_xincludes(VALUE self, VALUE options) /* TODO: DOCUMENT ME */ static VALUE in_context(VALUE self, VALUE _str, VALUE _options) { - xmlNodePtr node, list, child_iter, tmp, node_children, doc_children; + xmlNodePtr node, list = 0, child_iter, tmp, node_children, doc_children; xmlNodeSetPtr set; xmlParserErrors error; VALUE doc, err; From 97ca5628ba7d556b51159abd5fca1328d9f7e87b Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Fri, 27 Apr 2012 08:19:14 -0400 Subject: [PATCH 3/4] Minor cleanup in xml_node.c `in_context`. --- ext/nokogiri/xml_node.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/ext/nokogiri/xml_node.c b/ext/nokogiri/xml_node.c index 6bc704f830..90ffcf0c15 100644 --- a/ext/nokogiri/xml_node.c +++ b/ext/nokogiri/xml_node.c @@ -1219,7 +1219,7 @@ static VALUE process_xincludes(VALUE self, VALUE options) /* TODO: DOCUMENT ME */ static VALUE in_context(VALUE self, VALUE _str, VALUE _options) { - xmlNodePtr node, list = 0, child_iter, tmp, node_children, doc_children; + xmlNodePtr node, list = 0, child_iter, node_children, doc_children; xmlNodeSetPtr set; xmlParserErrors error; VALUE doc, err; @@ -1257,8 +1257,8 @@ static VALUE in_context(VALUE self, VALUE _str, VALUE _options) * is because if there were errors, it's possible for the child pointers * to be manipulated. */ if (error != XML_ERR_OK) { - node->doc->children = doc_children; - node->children = node_children; + node->doc->children = doc_children; + node->children = node_children; } /* make sure parent/child pointers are coherent so an unlink will work @@ -1266,9 +1266,9 @@ static VALUE in_context(VALUE self, VALUE _str, VALUE _options) */ child_iter = node->doc->children ; while (child_iter) { - if (child_iter->parent != (xmlNodePtr)node->doc) - child_iter->parent = (xmlNodePtr)node->doc; - child_iter = child_iter->next; + if (child_iter->parent != (xmlNodePtr)node->doc) + child_iter->parent = (xmlNodePtr)node->doc; + child_iter = child_iter->next; } #ifndef HTML_PARSE_NOIMPLIED @@ -1285,12 +1285,12 @@ static VALUE in_context(VALUE self, VALUE _str, VALUE _options) * https://bugzilla.gnome.org/show_bug.cgi?id=668155 */ if (error != XML_ERR_OK && doc_is_empty && node->doc->children != NULL) { - tmp = node; - while (tmp->parent) - tmp = tmp->parent; + child_iter = node; + while (child_iter->parent) + child_iter = child_iter->parent; - if (tmp->type == XML_DOCUMENT_FRAG_NODE) - node->doc->children = NULL; + if (child_iter->type == XML_DOCUMENT_FRAG_NODE) + node->doc->children = NULL; } /* FIXME: This probably needs to handle more constants... */ From cf7fd3aef7745c5cece68e5108fdd65eb53f4eb8 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Fri, 27 Apr 2012 08:19:32 -0400 Subject: [PATCH 4/4] The elephant in the room: deprecate support for 1.8.7? --- ROADMAP.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index bd3e097fac..df514721f1 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -62,9 +62,9 @@ ## Encoding -We have a lot of issues open around encoding. Is this really an issue? -Somebody who knows something about encoding, and cares, should point -this one. +We have a lot of issues open around encoding. How bad are things? +Would it help if we deprecated support for Ruby 1.8.7? Somebody who +knows encoding well should head this up. * Extract EncodingReader as a real object that can be injected https://groups.google.com/forum/#!msg/nokogiri-talk/arJeAtMqvkg/tGihB-iBRSAJ