Skip to content

Commit

Permalink
pulling in html5 sanitizer tests
Browse files Browse the repository at this point in the history
  • Loading branch information
tenderlove committed Dec 4, 2008
1 parent 65acff8 commit f2f88fc
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 3 deletions.
2 changes: 2 additions & 0 deletions test/helper.rb
@@ -0,0 +1,2 @@
require 'test/unit'
require 'dryopteris'
4 changes: 1 addition & 3 deletions test/test_basic.rb
@@ -1,6 +1,4 @@

require 'test/unit'
require 'dryopteris'
require File.expand_path(File.join(File.dirname(__FILE__), 'helper'))

class TestBasic < Test::Unit::TestCase

Expand Down
116 changes: 116 additions & 0 deletions test/test_sanitizer.rb
@@ -0,0 +1,116 @@
require File.expand_path(File.join(File.dirname(__FILE__), 'helper'))

class SanitizeTest < Test::Unit::TestCase
include Dryopteris

def sanitize_html stream
Dryopteris.sanitize(stream)
end

def check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
assert_equal htmloutput, sanitize_html(input)
end

WhiteList::ALLOWED_ELEMENTS.each do |tag_name|
define_method "test_should_allow_#{tag_name}_tag" do
input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
htmloutput = "<#{tag_name.downcase} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name.downcase}>"
xhtmloutput = "<#{tag_name} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name}>"
rexmloutput = xhtmloutput

if %w[caption colgroup optgroup option tbody td tfoot th thead tr].include?(tag_name)
htmloutput = "foo &lt;bad&gt;bar&lt;/bad&gt; baz"
xhtmloutput = htmloutput
elsif tag_name == 'col'
htmloutput = "foo &lt;bad&gt;bar&lt;/bad&gt; baz"
xhtmloutput = htmloutput
rexmloutput = "<col title='1' />"
elsif tag_name == 'table'
htmloutput = "foo &lt;bad&gt;bar&lt;/bad&gt;baz<table title='1'> </table>"
xhtmloutput = htmloutput
elsif tag_name == 'image'
htmloutput = "<img title='1'/>foo &lt;bad&gt;bar&lt;/bad&gt; baz"
xhtmloutput = htmloutput
rexmloutput = "<image title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</image>"
elsif VOID_ELEMENTS.include?(tag_name)
htmloutput = "<#{tag_name} title='1'/>foo &lt;bad&gt;bar&lt;/bad&gt; baz"
xhtmloutput = htmloutput
htmloutput += '<br/>' if tag_name == 'br'
rexmloutput = "<#{tag_name} title='1' />"
end
check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
end
end

WhiteList::ALLOWED_ELEMENTS.each do |tag_name|
define_method "test_should_forbid_#{tag_name.upcase}_tag" do
input = "<#{tag_name.upcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.upcase}>"
output = "&lt;#{tag_name.upcase} title=\"1\"&gt;foo &lt;bad&gt;bar&lt;/bad&gt; baz&lt;/#{tag_name.upcase}&gt;"
check_sanitization(input, output, output, output)
end
end

WhiteList::ALLOWED_ATTRIBUTES.each do |attribute_name|
next if attribute_name == 'style'
define_method "test_should_allow_#{attribute_name}_attribute" do
input = "<p #{attribute_name}='foo'>foo <bad>bar</bad> baz</p>"
output = "<p #{attribute_name}='foo'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
htmloutput = "<p #{attribute_name.downcase}='foo'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
check_sanitization(input, htmloutput, output, output)
end
end

WhiteList::ALLOWED_ATTRIBUTES.each do |attribute_name|
define_method "test_should_forbid_#{attribute_name.upcase}_attribute" do
input = "<p #{attribute_name.upcase}='display: none;'>foo <bad>bar</bad> baz</p>"
output = "<p>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
check_sanitization(input, output, output, output)
end
end

WhiteList::ALLOWED_PROTOCOLS.each do |protocol|
define_method "test_should_allow_#{protocol}_uris" do
input = %(<a href="#{protocol}">foo</a>)
output = "<a href='#{protocol}'>foo</a>"
check_sanitization(input, output, output, output)
end
end

WhiteList::ALLOWED_PROTOCOLS.each do |protocol|
define_method "test_should_allow_uppercase_#{protocol}_uris" do
input = %(<a href="#{protocol.upcase}">foo</a>)
output = "<a href='#{protocol.upcase}'>foo</a>"
check_sanitization(input, output, output, output)
end
end

def test_should_handle_astral_plane_characters
input = "<p>&#x1d4b5; &#x1d538;</p>"
output = "<p>\360\235\222\265 \360\235\224\270</p>"
check_sanitization(input, output, output, output)

input = "<p><tspan>\360\235\224\270</tspan> a</p>"
output = "<p><tspan>\360\235\224\270</tspan> a</p>"
check_sanitization(input, output, output, output)
end

# This affects only NS4. Is it worth fixing?
# def test_javascript_includes
# input = %(<div size="&{alert('XSS')}">foo</div>)
# output = "<div>foo</div>"
# check_sanitization(input, output, output, output)
# end

#html5_test_files('sanitizer').each do |filename|
# JSON::parse(open(filename).read).each do |test|
# define_method "test_#{test['name']}" do
# check_sanitization(
# test['input'],
# test['output'],
# test['xhtml'] || test['output'],
# test['rexml'] || test['output']
# )
# end
# end
#end
end

0 comments on commit f2f88fc

Please sign in to comment.