Permalink
Browse files

split whitewashing into fragment and document versions

  • Loading branch information...
flavorjones committed Apr 28, 2009
1 parent b8093f7 commit b380843e000233a5f25e65da90264df6e9fe4a48
Showing with 24 additions and 2 deletions.
  1. +17 −1 lib/dryopteris/sanitize.rb
  2. +7 −1 test/test_basic.rb
View
@@ -18,7 +18,22 @@ def strip_tags(string_or_io, encoding=nil)
body_element.inner_text
end
- def whitewash(string_or_io, encoding=nil)
+
+ def whitewash(string, encoding=nil)
+ return nil if string.nil?
+ return "" if string.strip.size == 0
+
+ string = "<html><body>" + string + "</body></html>"
+ doc = Nokogiri::HTML.parse(string, nil, encoding)
+ body = doc.xpath("/html/body").first
+ return "" if body.nil?
+ body.children.each do |node|
+ traverse_conditionally_top_down(node, :whitewash_node)
+ end
+ body.children.map { |x| x.to_xml }.join
+ end
+
+ def whitewash_document(string_or_io, encoding=nil)
return nil if string_or_io.nil?
return "" if string_or_io.strip.size == 0
@@ -31,6 +46,7 @@ def whitewash(string_or_io, encoding=nil)
body.children.map { |x| x.to_xml }.join
end
+
def sanitize(string, encoding=nil)
return nil if string.nil?
return "" if string.strip.size == 0
View
@@ -73,6 +73,12 @@ def test_fragment_with_text_nodes_leading_and_trailing
assert_equal "text<p>fragment</p>text", Dryopteris.sanitize("text<p>fragment</p>text")
end
+ def test_whitewash_on_fragment
+ html = "safe<frameset rows=\"*\"><frame src=\"http://example.com\"></frameset> <b>description</b>"
+ whitewashed = Dryopteris.whitewash_document(html)
+ assert_equal "<p>safe</p><b>description</b>", whitewashed
+ end
+
def test_whitewash_on_microsofty_markup
html = <<-EOHTML
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"><meta name="ProgId" content="Word.Document"><meta name="Generator" content="Microsoft Word 11"><meta name="Originator" content="Microsoft Word 11"><link rel="File-List" href="file:///C:%5CDOCUME%7E1%5CNICOLE%7E1%5CLOCALS%7E1%5CTemp%5Cmsohtml1%5C01%5Cclip_filelist.xml"><!--[if gte mso 9]><xml>
@@ -140,7 +146,7 @@ def test_whitewash_on_microsofty_markup
<p class="MsoNormal">Foo <b style="">BOLD<o:p></o:p></b></p>
EOHTML
- whitewashed = Dryopteris.whitewash(html)
+ whitewashed = Dryopteris.whitewash_document(html)
assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed
end

0 comments on commit b380843

Please sign in to comment.