Skip to content

Commit

Permalink
Replace CSS regexes with Crass.
Browse files Browse the repository at this point in the history
Related to #90 and #91.
  • Loading branch information
flavorjones committed Aug 17, 2015
1 parent f388282 commit ca56295
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 27 deletions.
3 changes: 2 additions & 1 deletion Gemfile
Expand Up @@ -5,6 +5,7 @@
source "https://rubygems.org/"

gem "nokogiri", ">=1.5.9"
gem "crass", "~>1.0.2"

gem "rdoc", "~>4.0", :group => [:development, :test]
gem "rake", ">=0.8", :group => [:development, :test]
Expand All @@ -15,6 +16,6 @@ gem "hoe-gemspec", ">=0", :group => [:development, :test]
gem "hoe-debugging", ">=0", :group => [:development, :test]
gem "hoe-bundler", ">=0", :group => [:development, :test]
gem "hoe-git", ">=0", :group => [:development, :test]
gem "hoe", "~>3.6", :group => [:development, :test]
gem "hoe", "~>3.13", :group => [:development, :test]

# vim: syntax=ruby
1 change: 1 addition & 0 deletions Rakefile
Expand Up @@ -17,6 +17,7 @@ Hoe.spec "loofah" do
self.license "MIT"

extra_deps << ["nokogiri", ">=1.5.9"]
extra_deps << ["crass", "~> 1.0.2"]

extra_dev_deps << ["rake", ">=0.8"]
extra_dev_deps << ["minitest", "~>2.2"]
Expand Down
46 changes: 24 additions & 22 deletions lib/loofah/html5/scrub.rb
@@ -1,12 +1,15 @@
#encoding: US-ASCII

require 'cgi'
require 'crass'

module Loofah
module HTML5 # :nodoc:
module Scrub

CONTROL_CHARACTERS = /[`\u0000-\u0020\u007f\u0080-\u0101]/
CSS_KEYWORDISH = /\A(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)\z/
CRASS_SEMICOLON = {:node => :semicolon, :raw => ";"}

class << self

Expand Down Expand Up @@ -61,36 +64,35 @@ def scrub_css_attribute node
style.value = scrub_css(style.value) if style
end

# lifted nearly verbatim from html5lib
def scrub_css style
# disallow urls
style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ')
style_tree = Crass.parse_properties style
sanitized_tree = []

# gauntlet
return '' unless style =~ /\A([:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*\z/
return '' unless style =~ /\A\s*([-\w]+\s*:[^:;]*(;\s*|$))*\z/

clean = []
style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop, val|
next if val.empty?
prop.downcase!
if WhiteList::ALLOWED_CSS_PROPERTIES.include?(prop)
clean << "#{prop}: #{val};"
elsif WhiteList::SHORTHAND_CSS_PROPERTIES.include?(prop.split('-')[0])
clean << "#{prop}: #{val};" unless val.split().any? do |keyword|
!WhiteList::ALLOWED_CSS_KEYWORDS.include?(keyword) &&
keyword !~ /\A(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)\z/
style_tree.each do |node|
next unless node[:node] == :property
next if node[:children].any? do |child|
[:url, :bad_url, :function].include? child[:node]
end
name = node[:name].downcase
if WhiteList::ALLOWED_CSS_PROPERTIES.include?(name) || WhiteList::ALLOWED_SVG_PROPERTIES.include?(name)
sanitized_tree << node << CRASS_SEMICOLON
elsif WhiteList::SHORTHAND_CSS_PROPERTIES.include?(name.split('-').first)
value = node[:value].split.map do |keyword|
if WhiteList::ALLOWED_CSS_KEYWORDS.include?(keyword) || keyword =~ CSS_KEYWORDISH
keyword
end
end.compact
unless value.empty?
propstring = sprintf "%s:%s", name, value.join(" ")
sanitized_node = Crass.parse_properties(propstring).first
sanitized_tree << sanitized_node << CRASS_SEMICOLON
end
elsif WhiteList::ALLOWED_SVG_PROPERTIES.include?(prop)
clean << "#{prop}: #{val};"
end
end

style = clean.join(' ')
Crass::Parser.stringify sanitized_tree
end

end

end
end
end
2 changes: 1 addition & 1 deletion test/assets/testdata_sanitizer_tests1.dat
Expand Up @@ -152,7 +152,7 @@
{
"name": "platypus",
"input": "<a href=\"http://www.ragingplatypus.com/\" style=\"display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;\">never trust your upstream platypus</a>",
"output": "<a href='http://www.ragingplatypus.com/' style='display: block; width: 100%; height: 100%; background-color: black; background-x: center; background-y: center;'>never trust your upstream platypus</a>"
"output": "<a href='http://www.ragingplatypus.com/' style='display:block;width:100%;height:100%;background-color:black;background-x:center;background-y:center;'>never trust your upstream platypus</a>"
},

{
Expand Down
27 changes: 25 additions & 2 deletions test/html5/test_sanitizer.rb
Expand Up @@ -229,14 +229,12 @@ def test_figure_element_is_valid
end

def test_css_negative_value_sanitization
skip "pending better CSS parsing, see https://github.com/flavorjones/loofah/issues/90"
html = "<span style=\"letter-spacing:-0.03em;\">"
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
assert_match %r/-0.03em/, sane.inner_html
end

def test_css_negative_value_sanitization_shorthand_css_properties
skip "pending better CSS parsing, see https://github.com/flavorjones/loofah/issues/90"
html = "<span style=\"margin-left:-0.05em;\">"
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
assert_match %r/-0.05em/, sane.inner_html
Expand All @@ -249,6 +247,31 @@ def test_issue_90_slow_regex
Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
}
end

def test_upper_case_css_property
html = "<div style=\"COLOR: BLUE; NOTAPROPERTY: RED;\">asdf</div>"
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_xml)
assert_match /COLOR:\s*BLUE/i, sane.at_css("div")["style"]
refute_match /NOTAPROPERTY/i, sane.at_css("div")["style"]
end

def test_many_properties_some_allowed
html = "<div style=\"background: bold notaproperty center alsonotaproperty 10px;\">asdf</div>"
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_xml)
assert_match /bold\s+center\s+10px/, sane.at_css("div")["style"]
end

def test_many_properties_non_allowed
html = "<div style=\"background: notaproperty alsonotaproperty;\">asdf</div>"
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_xml)
assert_nil sane.at_css("div")["style"]
end

def test_svg_properties
html = "<line style='stroke-width: 10px;'></line>"
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_xml)
assert_match /stroke-width:\s*10px/, sane.at_css("line")["style"]
end
end

# <html5_license>
Expand Down
2 changes: 1 addition & 1 deletion test/integration/test_helpers.rb
Expand Up @@ -37,7 +37,7 @@ class IntegrationTestHelpers < Loofah::TestCase

context ".sanitize_css" do
it "removes unsafe css properties" do
assert_equal "display: block; background-color: blue;", Loofah::Helpers.sanitize_css("display:block;background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg);background-color:blue")
assert_match /display:\s*block;\s*background-color:\s*blue;/, Loofah::Helpers.sanitize_css("display:block;background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg);background-color:blue")
end
end
end

0 comments on commit ca56295

Please sign in to comment.