Skip to content

Commit

Permalink
Merge branch 'flavorjones-remediate-attribute-escaping'
Browse files Browse the repository at this point in the history
  • Loading branch information
flavorjones committed Mar 19, 2018
2 parents 0c97c74 + f739cf8 commit 332ec6a
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 3 deletions.
1 change: 1 addition & 0 deletions lib/loofah.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
require 'loofah/elements'

require 'loofah/html5/whitelist'
require 'loofah/html5/libxml2_workarounds'
require 'loofah/html5/scrub'

require 'loofah/scrubber'
Expand Down
26 changes: 26 additions & 0 deletions lib/loofah/html5/libxml2_workarounds.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# coding: utf-8
require 'set'

module Loofah
#
# constants related to working around unhelpful libxml2 behavior
#
# ಠ_ಠ
#
module LibxmlWorkarounds
#
# these attributes and qualifying parent tags are determined by the code at:
#
# https://git.gnome.org/browse/libxml2/tree/HTMLtree.c?h=v2.9.2#n714
#
# see comments about CVE-2018-8048 within the tests for more information
#
BROKEN_ESCAPING_ATTRIBUTES = Set.new %w[
href
action
src
name
]
BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG = {"name" => "a"}
end
end
33 changes: 31 additions & 2 deletions lib/loofah/html5/scrub.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#encoding: US-ASCII

require 'cgi'
require 'crass'

Expand Down Expand Up @@ -65,6 +63,8 @@ def scrub_attributes node
node.attribute_nodes.each do |attr_node|
node.remove_attribute(attr_node.name) if attr_node.value !~ /[^[:space:]]/
end

force_correct_attribute_escaping! node
end

def scrub_css_attribute node
Expand Down Expand Up @@ -100,6 +100,35 @@ def scrub_css style

Crass::Parser.stringify sanitized_tree
end

private

#
# libxml2 >= 2.9.2 fails to escape comments within some attributes.
#
# see comments about CVE-2018-8048 within the tests for more information
#
def force_correct_attribute_escaping! node
return unless Nokogiri::VersionInfo.instance.libxml2?

node.attribute_nodes.each do |attr_node|
next unless LibxmlWorkarounds::BROKEN_ESCAPING_ATTRIBUTES.include?(attr_node.name)

tag_name = LibxmlWorkarounds::BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG[attr_node.name]
next unless tag_name.nil? || tag_name == node.name

#
# this block is just like CGI.escape in Ruby 2.4, but
# only encodes space and double-quote, to mimic
# pre-2.9.2 behavior
#
encoding = attr_node.value.encoding
attr_node.value = attr_node.value.gsub(/[ "]/) do |m|
'%' + m.unpack('H2' * m.bytesize).join('%').upcase
end.force_encoding(encoding)
end
end

end
end
end
Expand Down
67 changes: 66 additions & 1 deletion test/integration/test_ad_hoc.rb
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,71 @@ def test_dont_remove_whitespace_between_tags
html = "<p>Foo</p>\n<p>Bar</p>"
assert_equal "Foo\nBar", Loofah.scrub_document(html, :prune).text
end

#
# tests for CVE-2018-8048 (see https://github.com/flavorjones/loofah/issues/144)
#
# libxml2 >= 2.9.2 fails to escape comments within some attributes. It
# wants to ensure these comments can be treated as "server-side includes",
# but as a result fails to ensure that serialization is well-formed,
# resulting in an opportunity for XSS injection of code into a final
# re-parsed document (presumably in a browser).
#
# we'll test this by parsing the HTML, serializing it, then
# re-parsing it to ensure there isn't any ambiguity in the output
# that might allow code injection into a browser consuming
# "sanitized" output.
#
[
#
# these tags and attributes are determined by the code at:
#
# https://git.gnome.org/browse/libxml2/tree/HTMLtree.c?h=v2.9.2#n714
#
{tag: "a", attr: "href"},
{tag: "div", attr: "href"},
{tag: "a", attr: "action"},
{tag: "div", attr: "action"},
{tag: "a", attr: "src"},
{tag: "div", attr: "src"},
{tag: "a", attr: "name"},
#
# note that div+name is _not_ affected by the libxml2 issue.
# but we test it anyway to ensure our logic isn't modifying
# attributes that don't need modifying.
#
{tag: "div", attr: "name", unescaped: true},
].each do |config|

define_method "test_uri_escaping_of_#{config[:attr]}_attr_in_#{config[:tag]}_tag" do
html = %{<#{config[:tag]} #{config[:attr]}='examp<!--" unsafeattr=foo()>-->le.com'>test</#{config[:tag]}>}

reparsed = Loofah.fragment(Loofah.fragment(html).scrub!(:prune).to_html)
attributes = reparsed.at_css(config[:tag]).attribute_nodes

assert_equal [config[:attr]], attributes.collect(&:name)
if Nokogiri::VersionInfo.new.libxml2?
if config[:unescaped]
#
# this attribute was emitted wrapped in single-quotes, so a double quote is A-OK.
# assert that this attribute's serialization is unaffected.
#
assert_equal %{examp<!--" unsafeattr=foo()>-->le.com}, attributes.first.value
else
#
# let's match the behavior in libxml < 2.9.2.
# test that this attribute's serialization is well-formed and sanitized.
#
assert_equal %{examp<!--%22%20unsafeattr=foo()>-->le.com}, attributes.first.value
end
else
#
# yay for consistency in javaland. move along, nothing to see here.
#
assert_equal %{examp<!--%22 unsafeattr=foo()>-->le.com}, attributes.first.value
end
end

end
end
end

0 comments on commit 332ec6a

Please sign in to comment.