Skip to content

Commit

Permalink
sanitize_document is here. willkommen, bienvenue.
Browse files Browse the repository at this point in the history
  • Loading branch information
flavorjones committed Feb 10, 2009
1 parent dcc6ab3 commit 80a1ecf
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 0 deletions.
13 changes: 13 additions & 0 deletions lib/dryopteris/sanitize.rb
Expand Up @@ -32,6 +32,19 @@ def sanitize(string, encoding=nil)
body.children.map { |x| x.to_xml }.join
end

def sanitize_document(string_or_io, encoding=nil)
return nil if string_or_io.nil?
return "" if string_or_io.strip.size == 0

doc = Nokogiri::HTML.parse(string_or_io, nil, encoding)
elements = doc.xpath("/html/head/*","/html/body/*")
return "" if (elements.nil? || elements.empty?)
elements.each do |node|
traverse_conditionally_top_down(node, :sanitize_node)
end
doc.root.to_xml
end

private
def traverse_conditionally_top_down(node, method_name)
return if send(method_name, node)
Expand Down
7 changes: 7 additions & 0 deletions test/test_sanitizer.rb
Expand Up @@ -7,9 +7,16 @@ def sanitize_html stream
Dryopteris.sanitize(stream)
end

def sanitize_doc stream
Dryopteris.sanitize_document(stream)
end

def check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
# libxml uses double-quotes, so let's swappo-boppo our quotes before comparing.
assert_equal htmloutput, sanitize_html(input).gsub(/"/,"'"), input

doc = sanitize_doc(input).gsub(/"/,"'")
assert doc.include?(htmloutput), "#{input}:\n#{doc}\nshould include:\n#{htmloutput}"
end

WhiteList::ALLOWED_ELEMENTS.each do |tag_name|
Expand Down

0 comments on commit 80a1ecf

Please sign in to comment.