Skip to content

Commit

Permalink
EscapeUtils requires input that's UTF-8 or US-ASCII (1.9 only)
Browse files Browse the repository at this point in the history
Since we're using houdini for all escaping now, we need to ensure
that the input text is UTF-8 compatible. This is so we don't possibly
corrupt the input. If the input isn't tagged UTF-8 or US-ASCII an
`Encoding::CompatibilityError` exception is raised.
  • Loading branch information
brianmario committed Nov 10, 2011
1 parent dccfd44 commit 13062d3
Show file tree
Hide file tree
Showing 13 changed files with 238 additions and 73 deletions.
46 changes: 34 additions & 12 deletions ext/escape_utils/escape_utils.c
Expand Up @@ -9,7 +9,26 @@
#endif
#ifdef HAVE_RUBY_ENCODING_H
#include <ruby/encoding.h>
static VALUE rb_eEncodingCompatibilityError;
static VALUE eu_new_str(const char *str, size_t len) {
return rb_enc_str_new(str, len, rb_utf8_encoding());
}
#else
static VALUE eu_new_str(const char *str, size_t len) {
return rb_str_new(str, len);
}
#endif

static void check_utf8_encoding(VALUE str) {
#ifdef HAVE_RUBY_ENCODING_H
rb_encoding *enc;

enc = rb_enc_get(str);
if (enc != rb_utf8_encoding() && enc != rb_usascii_encoding()) {
rb_raise(rb_eEncodingCompatibilityError, "Input must be UTF-8 or US-ASCII, %s given", rb_enc_name(enc));
}
#endif
}

#include "houdini.h"

Expand All @@ -35,7 +54,6 @@ static VALUE rb_eu_set_html_secure(VALUE self, VALUE val)
return val;
}


/**
* Generic template
*/
Expand All @@ -46,19 +64,18 @@ rb_eu__generic(VALUE str, houdini_cb callback, size_t chunk_size)
struct buf *out_buf;

if (NIL_P(str))
return rb_str_new2("");
return eu_new_str("", 0);

Check_Type(str, T_STRING);

check_utf8_encoding(str);

out_buf = bufnew(chunk_size);

callback(out_buf, (uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str));
result = rb_str_new((char *)out_buf->data, out_buf->size);
result = eu_new_str((const char *)out_buf->data, out_buf->size);
bufrelease(out_buf);

#ifdef HAVE_RUBY_ENCODING_H
rb_enc_copy(result, str);
#endif

return result;
}

Expand All @@ -79,17 +96,16 @@ static VALUE rb_eu_escape_html(int argc, VALUE *argv, VALUE self)
}

Check_Type(str, T_STRING);

check_utf8_encoding(str);

out_buf = bufnew(128);

houdini_escape_html0(out_buf, (uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str), secure);

rb_out_buf = rb_str_new((char *)out_buf->data, out_buf->size);
rb_out_buf = eu_new_str((const char *)out_buf->data, out_buf->size);
bufrelease(out_buf);

#ifdef HAVE_RUBY_ENCODING_H
rb_enc_copy(rb_out_buf, str);
#endif

return rb_out_buf;
}

Expand Down Expand Up @@ -156,6 +172,12 @@ static VALUE rb_eu_unescape_uri(VALUE self, VALUE str)
void Init_escape_utils()
{
rb_mEscapeUtils = rb_define_module("EscapeUtils");

#ifdef HAVE_RUBY_ENCODING_H
VALUE rb_cEncoding = rb_const_get(rb_cObject, rb_intern("Encoding"));
rb_eEncodingCompatibilityError = rb_const_get(rb_cEncoding, rb_intern("CompatibilityError"));
#endif

rb_define_method(rb_mEscapeUtils, "escape_html", rb_eu_escape_html, -1);
rb_define_method(rb_mEscapeUtils, "unescape_html", rb_eu_unescape_html, 1);
rb_define_method(rb_mEscapeUtils, "escape_xml", rb_eu_escape_xml, 1);
Expand Down
21 changes: 17 additions & 4 deletions spec/html/escape_spec.rb
@@ -1,3 +1,4 @@
# encoding: UTF-8
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')

describe EscapeUtils, "escape_html" do
Expand Down Expand Up @@ -32,11 +33,23 @@
end

if RUBY_VERSION =~ /^1.9/
it "return value should be in original string's encoding" do
str = "<b>Bourbon & Branch</b>".encode('us-ascii')
EscapeUtils.escape_html(str).encoding.should eql(Encoding.find('us-ascii'))
it "input must be UTF-8 or US-ASCII" do
str = "<b>Bourbon & Branch</b>"

str.force_encoding 'ISO-8859-1'
lambda {
EscapeUtils.escape_html(str)
}.should raise_error(Encoding::CompatibilityError)

str.force_encoding 'UTF-8'
lambda {
EscapeUtils.escape_html(str)
}.should_not raise_error(Encoding::CompatibilityError)
end

it "return value should be in UTF-8" do
str = "<b>Bourbon & Branch</b>".encode('utf-8')
EscapeUtils.escape_html(str).encoding.should eql(Encoding.find('utf-8'))
EscapeUtils.escape_html(str).encoding.should eql(Encoding.find('UTF-8'))
end
end
end
24 changes: 19 additions & 5 deletions spec/html/unescape_spec.rb
@@ -1,3 +1,4 @@
# encoding: UTF-8
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')

describe EscapeUtils, "unescape_html" do
Expand Down Expand Up @@ -27,11 +28,24 @@
end

if RUBY_VERSION =~ /^1.9/
it "return value should be in original string's encoding" do
str = "&lt;b&gt;Bourbon &amp; Branch&lt;/b&gt;".encode('us-ascii')
EscapeUtils.unescape_html(str).encoding.should eql(Encoding.find('us-ascii'))
str = "&lt;b&gt;Bourbon &amp; Branch&lt;/b&gt;".encode('utf-8')
EscapeUtils.unescape_html(str).encoding.should eql(Encoding.find('utf-8'))
it "input must be UTF-8 or US-ASCII" do
escaped = EscapeUtils.escape_html("<b>Bourbon & Branch</b>")

escaped.force_encoding 'ISO-8859-1'
lambda {
EscapeUtils.unescape_html(escaped)
}.should raise_error(Encoding::CompatibilityError)

escaped.force_encoding 'UTF-8'
lambda {
EscapeUtils.unescape_html(escaped)
}.should_not raise_error(Encoding::CompatibilityError)
end

it "return value should be in UTF-8" do
escaped = EscapeUtils.escape_html("<b>Bourbon & Branch</b>")

EscapeUtils.unescape_html(escaped).encoding.should eql(Encoding.find('UTF-8'))
end
end
end
5 changes: 3 additions & 2 deletions spec/html_safety_spec.rb
@@ -1,3 +1,4 @@
# encoding: UTF-8
require File.expand_path(File.dirname(__FILE__) + '/spec_helper.rb')

class Object
Expand Down Expand Up @@ -26,10 +27,10 @@ def html_safe
end
end

include EscapeUtils::HtmlSafety

describe EscapeUtils::HtmlSafety do

include EscapeUtils::HtmlSafety

it "should escape unsafe strings and make them safe" do
escaped = _escape_html("<strong>unsafe</strong>")
escaped.should eql("&lt;strong&gt;unsafe&lt;&#47;strong&gt;")
Expand Down
23 changes: 18 additions & 5 deletions spec/javascript/escape_spec.rb
@@ -1,3 +1,4 @@
# encoding: UTF-8
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')

describe EscapeUtils, "escape_javascript" do
Expand All @@ -23,11 +24,23 @@
end

if RUBY_VERSION =~ /^1.9/
it "return value should be in original string's encoding" do
str = "dont </close> tags".encode('us-ascii')
EscapeUtils.escape_javascript(str).encoding.should eql(Encoding.find('us-ascii'))
str = "dont </close> tags".encode('utf-8')
EscapeUtils.escape_javascript(str).encoding.should eql(Encoding.find('utf-8'))
it "input must be UTF-8 or US-ASCII" do
str = "dont </close> tags"

str.force_encoding 'ISO-8859-1'
lambda {
EscapeUtils.escape_javascript(str)
}.should raise_error(Encoding::CompatibilityError)

str.force_encoding 'UTF-8'
lambda {
EscapeUtils.escape_javascript(str)
}.should_not raise_error(Encoding::CompatibilityError)
end

it "return value should be in UTF-8" do
str = "dont </close> tags"
EscapeUtils.escape_javascript(str).encoding.should eql(Encoding.find('UTF-8'))
end
end
end
Expand Down
24 changes: 19 additions & 5 deletions spec/javascript/unescape_spec.rb
@@ -1,3 +1,4 @@
# encoding: UTF-8
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')

describe EscapeUtils, "unescape_javascript" do
Expand Down Expand Up @@ -27,11 +28,24 @@
end

if RUBY_VERSION =~ /^1.9/
it "return value should be in original string's encoding" do
str = "dont <\\/close> tags".encode('us-ascii')
EscapeUtils.unescape_javascript(str).encoding.should eql(Encoding.find('us-ascii'))
str = "dont <\\/close> tags".encode('utf-8')
EscapeUtils.unescape_javascript(str).encoding.should eql(Encoding.find('utf-8'))
it "input must be UTF-8 or US-ASCII" do
escaped = EscapeUtils.escape_javascript("dont </close> tags")

escaped.force_encoding 'ISO-8859-1'
lambda {
EscapeUtils.unescape_javascript(escaped)
}.should raise_error(Encoding::CompatibilityError)

escaped.force_encoding 'UTF-8'
lambda {
EscapeUtils.unescape_javascript(escaped)
}.should_not raise_error(Encoding::CompatibilityError)
end

it "return value should be in UTF-8" do
escaped = EscapeUtils.escape_javascript("dont </close> tags")

EscapeUtils.unescape_javascript(escaped).encoding.should eql(Encoding.find('UTF-8'))
end
end
end
23 changes: 18 additions & 5 deletions spec/query/escape_spec.rb
@@ -1,3 +1,4 @@
# encoding: UTF-8
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')

describe EscapeUtils, "escape_url" do
Expand Down Expand Up @@ -34,11 +35,23 @@
end

if RUBY_VERSION =~ /^1.9/
it "return value should be in original string's encoding" do
str = "http://www.homerun.com/".encode('us-ascii')
EscapeUtils.escape_url(str).encoding.should eql(Encoding.find('us-ascii'))
str = "http://www.homerun.com/".encode('utf-8')
EscapeUtils.escape_url(str).encoding.should eql(Encoding.find('utf-8'))
it "input must be UTF-8 or US-ASCII" do
str = "a space"

str.force_encoding 'ISO-8859-1'
lambda {
EscapeUtils.escape_url(str)
}.should raise_error(Encoding::CompatibilityError)

str.force_encoding 'UTF-8'
lambda {
EscapeUtils.escape_url(str)
}.should_not raise_error(Encoding::CompatibilityError)
end

it "return value should be in UTF-8" do
str = "a+space"
EscapeUtils.escape_url(str).encoding.should eql(Encoding.find('UTF-8'))
end
end
end
24 changes: 18 additions & 6 deletions spec/query/unescape_spec.rb
@@ -1,5 +1,4 @@
# encoding: UTF-8

require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')

describe EscapeUtils, "unescape_url" do
Expand Down Expand Up @@ -36,11 +35,24 @@
end

if RUBY_VERSION =~ /^1.9/
it "return value should be in original string's encoding" do
str = "http%3A%2F%2Fwww.homerun.com%2F".encode('us-ascii')
EscapeUtils.unescape_url(str).encoding.should eql(Encoding.find('us-ascii'))
str = "http%3A%2F%2Fwww.homerun.com%2F".encode('utf-8')
EscapeUtils.unescape_url(str).encoding.should eql(Encoding.find('utf-8'))
it "input must be UTF-8 or US-ASCII" do
escaped = EscapeUtils.unescape_url("a+space")

escaped.force_encoding 'ISO-8859-1'
lambda {
EscapeUtils.unescape_url(escaped)
}.should raise_error(Encoding::CompatibilityError)

escaped.force_encoding 'UTF-8'
lambda {
EscapeUtils.unescape_url(escaped)
}.should_not raise_error(Encoding::CompatibilityError)
end

it "return value should be in UTF-8" do
escaped = EscapeUtils.escape_url("a space")

EscapeUtils.unescape_url(escaped).encoding.should eql(Encoding.find('UTF-8'))
end
end
end
25 changes: 19 additions & 6 deletions spec/uri/escape_spec.rb
@@ -1,3 +1,4 @@
# encoding: UTF-8
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
require 'uri'

Expand All @@ -7,7 +8,7 @@
end

it "should escape each byte exactly like URI.escape" do
(0..255).each do |i|
(0..127).each do |i|
c = i.chr
EscapeUtils.escape_uri(c).should eql(URI.escape(c))
end
Expand All @@ -33,11 +34,23 @@
end

if RUBY_VERSION =~ /^1.9/
it "return value should be in original string's encoding" do
str = "http://www.homerun.com/".encode('us-ascii')
EscapeUtils.escape_uri(str).encoding.should eql(Encoding.find('us-ascii'))
str = "http://www.homerun.com/".encode('utf-8')
EscapeUtils.escape_uri(str).encoding.should eql(Encoding.find('utf-8'))
it "input must be UTF-8 or US-ASCII" do
str = "fo<o>bar"

str.force_encoding 'ISO-8859-1'
lambda {
EscapeUtils.escape_uri(str)
}.should raise_error(Encoding::CompatibilityError)

str.force_encoding 'UTF-8'
lambda {
EscapeUtils.escape_uri(str)
}.should_not raise_error(Encoding::CompatibilityError)
end

it "return value should be in UTF-8" do
str = "fo<o>bar"
EscapeUtils.escape_uri(str).encoding.should eql(Encoding.find('UTF-8'))
end
end
end

0 comments on commit 13062d3

Please sign in to comment.