Permalink
Browse files

EscapeUtils requires input that's UTF-8 or US-ASCII (1.9 only)

Since we're using houdini for all escaping now, we need to ensure
that the input text is UTF-8 compatible. This is so we don't possibly
corrupt the input. If the input isn't tagged UTF-8 or US-ASCII an
`Encoding::CompatibilityError` exception is raised.
  • Loading branch information...
1 parent dccfd44 commit 13062d39cf3002a2381e6ffc8403c1b25048de96 @brianmario committed Nov 10, 2011
View
46 ext/escape_utils/escape_utils.c
@@ -9,7 +9,26 @@
#endif
#ifdef HAVE_RUBY_ENCODING_H
#include <ruby/encoding.h>
+static VALUE rb_eEncodingCompatibilityError;
+static VALUE eu_new_str(const char *str, size_t len) {
+ return rb_enc_str_new(str, len, rb_utf8_encoding());
+}
+#else
+static VALUE eu_new_str(const char *str, size_t len) {
+ return rb_str_new(str, len);
+}
+#endif
+
+static void check_utf8_encoding(VALUE str) {
+#ifdef HAVE_RUBY_ENCODING_H
+ rb_encoding *enc;
+
+ enc = rb_enc_get(str);
+ if (enc != rb_utf8_encoding() && enc != rb_usascii_encoding()) {
+ rb_raise(rb_eEncodingCompatibilityError, "Input must be UTF-8 or US-ASCII, %s given", rb_enc_name(enc));
+ }
#endif
+}
#include "houdini.h"
@@ -35,7 +54,6 @@ static VALUE rb_eu_set_html_secure(VALUE self, VALUE val)
return val;
}
-
/**
* Generic template
*/
@@ -46,19 +64,18 @@ rb_eu__generic(VALUE str, houdini_cb callback, size_t chunk_size)
struct buf *out_buf;
if (NIL_P(str))
- return rb_str_new2("");
+ return eu_new_str("", 0);
Check_Type(str, T_STRING);
+
+ check_utf8_encoding(str);
+
out_buf = bufnew(chunk_size);
callback(out_buf, (uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str));
- result = rb_str_new((char *)out_buf->data, out_buf->size);
+ result = eu_new_str((const char *)out_buf->data, out_buf->size);
bufrelease(out_buf);
-#ifdef HAVE_RUBY_ENCODING_H
- rb_enc_copy(result, str);
-#endif
-
return result;
}
@@ -79,17 +96,16 @@ static VALUE rb_eu_escape_html(int argc, VALUE *argv, VALUE self)
}
Check_Type(str, T_STRING);
+
+ check_utf8_encoding(str);
+
out_buf = bufnew(128);
houdini_escape_html0(out_buf, (uint8_t *)RSTRING_PTR(str), RSTRING_LEN(str), secure);
- rb_out_buf = rb_str_new((char *)out_buf->data, out_buf->size);
+ rb_out_buf = eu_new_str((const char *)out_buf->data, out_buf->size);
bufrelease(out_buf);
-#ifdef HAVE_RUBY_ENCODING_H
- rb_enc_copy(rb_out_buf, str);
-#endif
-
return rb_out_buf;
}
@@ -156,6 +172,12 @@ static VALUE rb_eu_unescape_uri(VALUE self, VALUE str)
void Init_escape_utils()
{
rb_mEscapeUtils = rb_define_module("EscapeUtils");
+
+#ifdef HAVE_RUBY_ENCODING_H
+ VALUE rb_cEncoding = rb_const_get(rb_cObject, rb_intern("Encoding"));
+ rb_eEncodingCompatibilityError = rb_const_get(rb_cEncoding, rb_intern("CompatibilityError"));
+#endif
+
rb_define_method(rb_mEscapeUtils, "escape_html", rb_eu_escape_html, -1);
rb_define_method(rb_mEscapeUtils, "unescape_html", rb_eu_unescape_html, 1);
rb_define_method(rb_mEscapeUtils, "escape_xml", rb_eu_escape_xml, 1);
View
21 spec/html/escape_spec.rb
@@ -1,3 +1,4 @@
+# encoding: UTF-8
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
describe EscapeUtils, "escape_html" do
@@ -32,11 +33,23 @@
end
if RUBY_VERSION =~ /^1.9/
- it "return value should be in original string's encoding" do
- str = "<b>Bourbon & Branch</b>".encode('us-ascii')
- EscapeUtils.escape_html(str).encoding.should eql(Encoding.find('us-ascii'))
+ it "input must be UTF-8 or US-ASCII" do
+ str = "<b>Bourbon & Branch</b>"
+
+ str.force_encoding 'ISO-8859-1'
+ lambda {
+ EscapeUtils.escape_html(str)
+ }.should raise_error(Encoding::CompatibilityError)
+
+ str.force_encoding 'UTF-8'
+ lambda {
+ EscapeUtils.escape_html(str)
+ }.should_not raise_error(Encoding::CompatibilityError)
+ end
+
+ it "return value should be in UTF-8" do
str = "<b>Bourbon & Branch</b>".encode('utf-8')
- EscapeUtils.escape_html(str).encoding.should eql(Encoding.find('utf-8'))
+ EscapeUtils.escape_html(str).encoding.should eql(Encoding.find('UTF-8'))
end
end
end
View
24 spec/html/unescape_spec.rb
@@ -1,3 +1,4 @@
+# encoding: UTF-8
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
describe EscapeUtils, "unescape_html" do
@@ -27,11 +28,24 @@
end
if RUBY_VERSION =~ /^1.9/
- it "return value should be in original string's encoding" do
- str = "&lt;b&gt;Bourbon &amp; Branch&lt;/b&gt;".encode('us-ascii')
- EscapeUtils.unescape_html(str).encoding.should eql(Encoding.find('us-ascii'))
- str = "&lt;b&gt;Bourbon &amp; Branch&lt;/b&gt;".encode('utf-8')
- EscapeUtils.unescape_html(str).encoding.should eql(Encoding.find('utf-8'))
+ it "input must be UTF-8 or US-ASCII" do
+ escaped = EscapeUtils.escape_html("<b>Bourbon & Branch</b>")
+
+ escaped.force_encoding 'ISO-8859-1'
+ lambda {
+ EscapeUtils.unescape_html(escaped)
+ }.should raise_error(Encoding::CompatibilityError)
+
+ escaped.force_encoding 'UTF-8'
+ lambda {
+ EscapeUtils.unescape_html(escaped)
+ }.should_not raise_error(Encoding::CompatibilityError)
+ end
+
+ it "return value should be in UTF-8" do
+ escaped = EscapeUtils.escape_html("<b>Bourbon & Branch</b>")
+
+ EscapeUtils.unescape_html(escaped).encoding.should eql(Encoding.find('UTF-8'))
end
end
end
View
5 spec/html_safety_spec.rb
@@ -1,3 +1,4 @@
+# encoding: UTF-8
require File.expand_path(File.dirname(__FILE__) + '/spec_helper.rb')
class Object
@@ -26,10 +27,10 @@ def html_safe
end
end
-include EscapeUtils::HtmlSafety
-
describe EscapeUtils::HtmlSafety do
+ include EscapeUtils::HtmlSafety
+
it "should escape unsafe strings and make them safe" do
escaped = _escape_html("<strong>unsafe</strong>")
escaped.should eql("&lt;strong&gt;unsafe&lt;&#47;strong&gt;")
View
23 spec/javascript/escape_spec.rb
@@ -1,3 +1,4 @@
+# encoding: UTF-8
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
describe EscapeUtils, "escape_javascript" do
@@ -23,11 +24,23 @@
end
if RUBY_VERSION =~ /^1.9/
- it "return value should be in original string's encoding" do
- str = "dont </close> tags".encode('us-ascii')
- EscapeUtils.escape_javascript(str).encoding.should eql(Encoding.find('us-ascii'))
- str = "dont </close> tags".encode('utf-8')
- EscapeUtils.escape_javascript(str).encoding.should eql(Encoding.find('utf-8'))
+ it "input must be UTF-8 or US-ASCII" do
+ str = "dont </close> tags"
+
+ str.force_encoding 'ISO-8859-1'
+ lambda {
+ EscapeUtils.escape_javascript(str)
+ }.should raise_error(Encoding::CompatibilityError)
+
+ str.force_encoding 'UTF-8'
+ lambda {
+ EscapeUtils.escape_javascript(str)
+ }.should_not raise_error(Encoding::CompatibilityError)
+ end
+
+ it "return value should be in UTF-8" do
+ str = "dont </close> tags"
+ EscapeUtils.escape_javascript(str).encoding.should eql(Encoding.find('UTF-8'))
end
end
end
View
24 spec/javascript/unescape_spec.rb
@@ -1,3 +1,4 @@
+# encoding: UTF-8
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
describe EscapeUtils, "unescape_javascript" do
@@ -27,11 +28,24 @@
end
if RUBY_VERSION =~ /^1.9/
- it "return value should be in original string's encoding" do
- str = "dont <\\/close> tags".encode('us-ascii')
- EscapeUtils.unescape_javascript(str).encoding.should eql(Encoding.find('us-ascii'))
- str = "dont <\\/close> tags".encode('utf-8')
- EscapeUtils.unescape_javascript(str).encoding.should eql(Encoding.find('utf-8'))
+ it "input must be UTF-8 or US-ASCII" do
+ escaped = EscapeUtils.escape_javascript("dont </close> tags")
+
+ escaped.force_encoding 'ISO-8859-1'
+ lambda {
+ EscapeUtils.unescape_javascript(escaped)
+ }.should raise_error(Encoding::CompatibilityError)
+
+ escaped.force_encoding 'UTF-8'
+ lambda {
+ EscapeUtils.unescape_javascript(escaped)
+ }.should_not raise_error(Encoding::CompatibilityError)
+ end
+
+ it "return value should be in UTF-8" do
+ escaped = EscapeUtils.escape_javascript("dont </close> tags")
+
+ EscapeUtils.unescape_javascript(escaped).encoding.should eql(Encoding.find('UTF-8'))
end
end
end
View
23 spec/query/escape_spec.rb
@@ -1,3 +1,4 @@
+# encoding: UTF-8
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
describe EscapeUtils, "escape_url" do
@@ -34,11 +35,23 @@
end
if RUBY_VERSION =~ /^1.9/
- it "return value should be in original string's encoding" do
- str = "http://www.homerun.com/".encode('us-ascii')
- EscapeUtils.escape_url(str).encoding.should eql(Encoding.find('us-ascii'))
- str = "http://www.homerun.com/".encode('utf-8')
- EscapeUtils.escape_url(str).encoding.should eql(Encoding.find('utf-8'))
+ it "input must be UTF-8 or US-ASCII" do
+ str = "a space"
+
+ str.force_encoding 'ISO-8859-1'
+ lambda {
+ EscapeUtils.escape_url(str)
+ }.should raise_error(Encoding::CompatibilityError)
+
+ str.force_encoding 'UTF-8'
+ lambda {
+ EscapeUtils.escape_url(str)
+ }.should_not raise_error(Encoding::CompatibilityError)
+ end
+
+ it "return value should be in UTF-8" do
+ str = "a+space"
+ EscapeUtils.escape_url(str).encoding.should eql(Encoding.find('UTF-8'))
end
end
end
View
24 spec/query/unescape_spec.rb
@@ -1,5 +1,4 @@
# encoding: UTF-8
-
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
describe EscapeUtils, "unescape_url" do
@@ -36,11 +35,24 @@
end
if RUBY_VERSION =~ /^1.9/
- it "return value should be in original string's encoding" do
- str = "http%3A%2F%2Fwww.homerun.com%2F".encode('us-ascii')
- EscapeUtils.unescape_url(str).encoding.should eql(Encoding.find('us-ascii'))
- str = "http%3A%2F%2Fwww.homerun.com%2F".encode('utf-8')
- EscapeUtils.unescape_url(str).encoding.should eql(Encoding.find('utf-8'))
+ it "input must be UTF-8 or US-ASCII" do
+ escaped = EscapeUtils.unescape_url("a+space")
+
+ escaped.force_encoding 'ISO-8859-1'
+ lambda {
+ EscapeUtils.unescape_url(escaped)
+ }.should raise_error(Encoding::CompatibilityError)
+
+ escaped.force_encoding 'UTF-8'
+ lambda {
+ EscapeUtils.unescape_url(escaped)
+ }.should_not raise_error(Encoding::CompatibilityError)
+ end
+
+ it "return value should be in UTF-8" do
+ escaped = EscapeUtils.escape_url("a space")
+
+ EscapeUtils.unescape_url(escaped).encoding.should eql(Encoding.find('UTF-8'))
end
end
end
View
25 spec/uri/escape_spec.rb
@@ -1,3 +1,4 @@
+# encoding: UTF-8
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
require 'uri'
@@ -7,7 +8,7 @@
end
it "should escape each byte exactly like URI.escape" do
- (0..255).each do |i|
+ (0..127).each do |i|
c = i.chr
EscapeUtils.escape_uri(c).should eql(URI.escape(c))
end
@@ -33,11 +34,23 @@
end
if RUBY_VERSION =~ /^1.9/
- it "return value should be in original string's encoding" do
- str = "http://www.homerun.com/".encode('us-ascii')
- EscapeUtils.escape_uri(str).encoding.should eql(Encoding.find('us-ascii'))
- str = "http://www.homerun.com/".encode('utf-8')
- EscapeUtils.escape_uri(str).encoding.should eql(Encoding.find('utf-8'))
+ it "input must be UTF-8 or US-ASCII" do
+ str = "fo<o>bar"
+
+ str.force_encoding 'ISO-8859-1'
+ lambda {
+ EscapeUtils.escape_uri(str)
+ }.should raise_error(Encoding::CompatibilityError)
+
+ str.force_encoding 'UTF-8'
+ lambda {
+ EscapeUtils.escape_uri(str)
+ }.should_not raise_error(Encoding::CompatibilityError)
+ end
+
+ it "return value should be in UTF-8" do
+ str = "fo<o>bar"
+ EscapeUtils.escape_uri(str).encoding.should eql(Encoding.find('UTF-8'))
end
end
end
View
24 spec/uri/unescape_spec.rb
@@ -1,5 +1,4 @@
# encoding: UTF-8
-
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
describe EscapeUtils, "unescape_uri" do
@@ -47,11 +46,24 @@
end
if RUBY_VERSION =~ /^1.9/
- it "return value should be in original string's encoding" do
- str = "http%3A%2F%2Fwww.homerun.com%2F".encode('us-ascii')
- EscapeUtils.unescape_uri(str).encoding.should eql(Encoding.find('us-ascii'))
- str = "http%3A%2F%2Fwww.homerun.com%2F".encode('utf-8')
- EscapeUtils.unescape_uri(str).encoding.should eql(Encoding.find('utf-8'))
+ it "input must be UTF-8 or US-ASCII" do
+ escaped = EscapeUtils.escape_uri("fo%3Co%3Ebar")
+
+ escaped.force_encoding 'ISO-8859-1'
+ lambda {
+ EscapeUtils.unescape_uri(escaped)
+ }.should raise_error(Encoding::CompatibilityError)
+
+ escaped.force_encoding 'UTF-8'
+ lambda {
+ EscapeUtils.unescape_uri(escaped)
+ }.should_not raise_error(Encoding::CompatibilityError)
+ end
+
+ it "return value should be in UTF-8" do
+ escaped = EscapeUtils.escape_uri("a space")
+
+ EscapeUtils.unescape_uri(escaped).encoding.should eql(Encoding.find('UTF-8'))
end
end
end
View
25 spec/url/escape_spec.rb
@@ -1,3 +1,4 @@
+# encoding: UTF-8
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
require 'cgi'
@@ -11,7 +12,7 @@
end
it "should escape each possible byte value exactly like CGI.escape" do
- (0..255).each do |i|
+ (0..127).each do |i|
c = i.chr
EscapeUtils.escape_url(c).should eql(CGI.escape(c))
end
@@ -42,11 +43,23 @@
end
if RUBY_VERSION =~ /^1.9/
- it "return value should be in original string's encoding" do
- str = "http://www.homerun.com/".encode('us-ascii')
- EscapeUtils.escape_url(str).encoding.should eql(Encoding.find('us-ascii'))
- str = "http://www.homerun.com/".encode('utf-8')
- EscapeUtils.escape_url(str).encoding.should eql(Encoding.find('utf-8'))
+ it "input must be UTF-8 or US-ASCII" do
+ str = "fo<o>bar"
+
+ str.force_encoding 'ISO-8859-1'
+ lambda {
+ EscapeUtils.escape_url(str)
+ }.should raise_error(Encoding::CompatibilityError)
+
+ str.force_encoding 'UTF-8'
+ lambda {
+ EscapeUtils.escape_url(str)
+ }.should_not raise_error(Encoding::CompatibilityError)
+ end
+
+ it "return value should be in UTF-8" do
+ str = "fo<o>bar"
+ EscapeUtils.escape_url(str).encoding.should eql(Encoding.find('UTF-8'))
end
end
end
View
24 spec/url/unescape_spec.rb
@@ -1,5 +1,4 @@
# encoding: UTF-8
-
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
describe EscapeUtils, "unescape_url" do
@@ -47,11 +46,24 @@
end
if RUBY_VERSION =~ /^1.9/
- it "return value should be in original string's encoding" do
- str = "http%3A%2F%2Fwww.homerun.com%2F".encode('us-ascii')
- EscapeUtils.unescape_url(str).encoding.should eql(Encoding.find('us-ascii'))
- str = "http%3A%2F%2Fwww.homerun.com%2F".encode('utf-8')
- EscapeUtils.unescape_url(str).encoding.should eql(Encoding.find('utf-8'))
+ it "input must be UTF-8 or US-ASCII" do
+ escaped = EscapeUtils.escape_url("fo<o>bar")
+
+ escaped.force_encoding 'ISO-8859-1'
+ lambda {
+ EscapeUtils.unescape_url(escaped)
+ }.should raise_error(Encoding::CompatibilityError)
+
+ escaped.force_encoding 'UTF-8'
+ lambda {
+ EscapeUtils.unescape_url(escaped)
+ }.should_not raise_error(Encoding::CompatibilityError)
+ end
+
+ it "return value should be in UTF-8" do
+ escaped = EscapeUtils.escape_url("fo<o>bar")
+
+ EscapeUtils.unescape_url(escaped).encoding.should eql(Encoding.find('UTF-8'))
end
end
end
View
23 spec/xml/escape_spec.rb
@@ -1,3 +1,4 @@
+# encoding: UTF-8
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
describe EscapeUtils, "escape_xml" do
@@ -45,11 +46,23 @@
end
if RUBY_VERSION =~ /^1.9/
- it "return value should be in original string's encoding" do
- str = "<b>Bourbon & Branch</b>".encode('us-ascii')
- EscapeUtils.escape_xml(str).encoding.should eql(Encoding.find('us-ascii'))
- str = "<b>Bourbon & Branch</b>".encode('utf-8')
- EscapeUtils.escape_xml(str).encoding.should eql(Encoding.find('utf-8'))
+ it "input must be UTF-8 or US-ASCII" do
+ str = "<some_tag/>"
+
+ str.force_encoding 'ISO-8859-1'
+ lambda {
+ EscapeUtils.escape_xml(str)
+ }.should raise_error(Encoding::CompatibilityError)
+
+ str.force_encoding 'UTF-8'
+ lambda {
+ EscapeUtils.escape_xml(str)
+ }.should_not raise_error(Encoding::CompatibilityError)
+ end
+
+ it "return value should be in UTF-8" do
+ str = "<some_tag/>"
+ EscapeUtils.escape_xml(str).encoding.should eql(Encoding.find('UTF-8'))
end
end
end

0 comments on commit 13062d3

Please sign in to comment.