Permalink
Browse files

sanitize_document is here. willkommen, bienvenue.

  • Loading branch information...
1 parent dcc6ab3 commit 80a1ecf5bf4c0fe1ab541a9e252a595e6a78eade @flavorjones flavorjones committed Feb 10, 2009
Showing with 20 additions and 0 deletions.
  1. +13 −0 lib/dryopteris/sanitize.rb
  2. +7 −0 test/test_sanitizer.rb
View
13 lib/dryopteris/sanitize.rb
@@ -32,6 +32,19 @@ def sanitize(string, encoding=nil)
body.children.map { |x| x.to_xml }.join
end
+ def sanitize_document(string_or_io, encoding=nil)
+ return nil if string_or_io.nil?
+ return "" if string_or_io.strip.size == 0
+
+ doc = Nokogiri::HTML.parse(string_or_io, nil, encoding)
+ elements = doc.xpath("/html/head/*","/html/body/*")
+ return "" if (elements.nil? || elements.empty?)
+ elements.each do |node|
+ traverse_conditionally_top_down(node, :sanitize_node)
+ end
+ doc.root.to_xml
+ end
+
private
def traverse_conditionally_top_down(node, method_name)
return if send(method_name, node)
View
7 test/test_sanitizer.rb
@@ -7,9 +7,16 @@ def sanitize_html stream
Dryopteris.sanitize(stream)
end
+ def sanitize_doc stream
+ Dryopteris.sanitize_document(stream)
+ end
+
def check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
# libxml uses double-quotes, so let's swappo-boppo our quotes before comparing.
assert_equal htmloutput, sanitize_html(input).gsub(/"/,"'"), input
+
+ doc = sanitize_doc(input).gsub(/"/,"'")
+ assert doc.include?(htmloutput), "#{input}:\n#{doc}\nshould include:\n#{htmloutput}"
end
WhiteList::ALLOWED_ELEMENTS.each do |tag_name|

0 comments on commit 80a1ecf

Please sign in to comment.