Skip to content

Commit

Permalink
Rename escape_slash in script_safe and also escape E+2028 and E+2029
Browse files Browse the repository at this point in the history
It is rather common to directly interpolate JSON string inside
<script> tags in HTML as to provide configuration or parameters to a
script.

However this may lead to XSS vulnerabilities, to prevent that 3
characters need to be escaped:

  - `/` (forward slash)
  - `U+2028` (LINE SEPARATOR)
  - `U+2029` (PARAGRAPH SEPARATOR)

The forward slash need to be escaped to prevent closing the script
tag early, and the other two are valid JSON but invalid Javascript
and can be used to break JS parsing.

Given that the intent of escaping forward slash is the same than escaping
U+2028 and U+2029, I chos to rename and repurpose the existing `escape_slash`
option.
  • Loading branch information
byroot committed Apr 14, 2023
1 parent 248bc5b commit 637fc52
Show file tree
Hide file tree
Showing 14 changed files with 193 additions and 146 deletions.
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Changes

* `escape_slash` option was renamed as `script_safe` and now also escape U+2028 and U+2029. `escape_slash` is now an alias of `script_safe`.

### 2021-10-24 (2.6.1)

* Restore version.rb with 2.6.1
Expand Down
6 changes: 1 addition & 5 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,7 @@ source 'https://rubygems.org'

case ENV['JSON']
when 'ext', nil
if ENV['RUBY_ENGINE'] == 'jruby'
gemspec :name => 'json-java'
else
gemspec :name => 'json'
end
gemspec :name => 'json'
when 'pure'
gemspec :name => 'json_pure'
end
Expand Down
4 changes: 2 additions & 2 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -168,9 +168,9 @@ if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby'

desc "Package the jruby gem"
task :jruby_gem => :create_jar do
sh 'gem build json-java.gemspec'
sh 'gem build json.gemspec'
mkdir_p 'pkg'
mv "json-#{PKG_VERSION}-java.gem", 'pkg'
mv "json-#{PKG_VERSION}.gem", 'pkg'
end

desc "Testing library (jruby)"
Expand Down
62 changes: 43 additions & 19 deletions ext/json/ext/generator/generator.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before,
i_object_nl, i_array_nl, i_max_nesting, i_allow_nan, i_ascii_only,
i_pack, i_unpack, i_create_id, i_extend, i_key_p,
i_aref, i_send, i_respond_to_p, i_match, i_keys, i_depth,
i_buffer_initial_length, i_dup, i_escape_slash;
i_buffer_initial_length, i_dup, i_script_safe, i_escape_slash;

/*
* Copyright 2001-2004 Unicode, Inc.
Expand Down Expand Up @@ -124,7 +124,7 @@ static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16

/* Converts string to a JSON string in FBuffer buffer, where all but the ASCII
* and control characters are JSON escaped. */
static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char escape_slash)
static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char script_safe)
{
const UTF8 *source = (UTF8 *) RSTRING_PTR(string);
const UTF8 *sourceEnd = source + RSTRING_LEN(string);
Expand Down Expand Up @@ -175,7 +175,7 @@ static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char escap
fbuffer_append(buffer, "\\\"", 2);
break;
case '/':
if(escape_slash) {
if(script_safe) {
fbuffer_append(buffer, "\\/", 2);
break;
}
Expand Down Expand Up @@ -228,7 +228,7 @@ static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char escap
* characters required by the JSON standard are JSON escaped. The remaining
* characters (should be UTF8) are just passed through and appended to the
* result. */
static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char escape_slash)
static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char script_safe)
{
const char *ptr = RSTRING_PTR(string), *p;
unsigned long len = RSTRING_LEN(string), start = 0, end = 0;
Expand Down Expand Up @@ -280,7 +280,7 @@ static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char escape_slas
escape_len = 2;
break;
case '/':
if(escape_slash) {
if(script_safe) {
escape = "\\/";
escape_len = 2;
break;
Expand All @@ -294,6 +294,22 @@ static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char escape_slas
rb_raise(rb_path2class("JSON::GeneratorError"),
"partial character in source, but hit end");
}

if (script_safe && c == 0xE2) {
unsigned char c2 = (unsigned char) *(p+1);
unsigned char c3 = (unsigned char) *(p+2);
if (c2 == 0x80 && (c3 == 0xA8 || c3 == 0xA9)) {
fbuffer_append(buffer, ptr + start, end - start);
start = end = (end + clen);
if (c3 == 0xA8) {
fbuffer_append(buffer, "\\u2028", 6);
} else {
fbuffer_append(buffer, "\\u2029", 6);
}
continue;
}
}

if (!isLegalUTF8((UTF8 *) p, clen)) {
rb_raise(rb_path2class("JSON::GeneratorError"),
"source sequence is illegal/malformed utf-8");
Expand Down Expand Up @@ -726,8 +742,12 @@ static VALUE cState_configure(VALUE self, VALUE opts)
state->allow_nan = RTEST(tmp);
tmp = rb_hash_aref(opts, ID2SYM(i_ascii_only));
state->ascii_only = RTEST(tmp);
tmp = rb_hash_aref(opts, ID2SYM(i_escape_slash));
state->escape_slash = RTEST(tmp);
tmp = rb_hash_aref(opts, ID2SYM(i_script_safe));
state->script_safe = RTEST(tmp);
if (!state->script_safe) {
tmp = rb_hash_aref(opts, ID2SYM(i_escape_slash));
state->script_safe = RTEST(tmp);
}
return self;
}

Expand Down Expand Up @@ -762,7 +782,7 @@ static VALUE cState_to_h(VALUE self)
rb_hash_aset(result, ID2SYM(i_allow_nan), state->allow_nan ? Qtrue : Qfalse);
rb_hash_aset(result, ID2SYM(i_ascii_only), state->ascii_only ? Qtrue : Qfalse);
rb_hash_aset(result, ID2SYM(i_max_nesting), LONG2FIX(state->max_nesting));
rb_hash_aset(result, ID2SYM(i_escape_slash), state->escape_slash ? Qtrue : Qfalse);
rb_hash_aset(result, ID2SYM(i_script_safe), state->script_safe ? Qtrue : Qfalse);
rb_hash_aset(result, ID2SYM(i_depth), LONG2FIX(state->depth));
rb_hash_aset(result, ID2SYM(i_buffer_initial_length), LONG2FIX(state->buffer_initial_length));
return result;
Expand Down Expand Up @@ -947,9 +967,9 @@ static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_S
}
#endif
if (state->ascii_only) {
convert_UTF8_to_JSON_ASCII(buffer, obj, state->escape_slash);
convert_UTF8_to_JSON_ASCII(buffer, obj, state->script_safe);
} else {
convert_UTF8_to_JSON(buffer, obj, state->escape_slash);
convert_UTF8_to_JSON(buffer, obj, state->script_safe);
}
fbuffer_append_char(buffer, '"');
}
Expand Down Expand Up @@ -1390,27 +1410,27 @@ static VALUE cState_max_nesting_set(VALUE self, VALUE depth)
}

/*
* call-seq: escape_slash
* call-seq: script_safe
*
* If this boolean is true, the forward slashes will be escaped in
* the json output.
*/
static VALUE cState_escape_slash(VALUE self)
static VALUE cState_script_safe(VALUE self)
{
GET_STATE(self);
return state->escape_slash ? Qtrue : Qfalse;
return state->script_safe ? Qtrue : Qfalse;
}

/*
* call-seq: escape_slash=(depth)
* call-seq: script_safe=(depth)
*
* This sets whether or not the forward slashes will be escaped in
* the json output.
*/
static VALUE cState_escape_slash_set(VALUE self, VALUE enable)
static VALUE cState_script_safe_set(VALUE self, VALUE enable)
{
GET_STATE(self);
state->escape_slash = RTEST(enable);
state->script_safe = RTEST(enable);
return Qnil;
}

Expand Down Expand Up @@ -1530,9 +1550,12 @@ void Init_generator(void)
rb_define_method(cState, "array_nl=", cState_array_nl_set, 1);
rb_define_method(cState, "max_nesting", cState_max_nesting, 0);
rb_define_method(cState, "max_nesting=", cState_max_nesting_set, 1);
rb_define_method(cState, "escape_slash", cState_escape_slash, 0);
rb_define_method(cState, "escape_slash?", cState_escape_slash, 0);
rb_define_method(cState, "escape_slash=", cState_escape_slash_set, 1);
rb_define_method(cState, "script_safe", cState_script_safe, 0);
rb_define_method(cState, "script_safe?", cState_script_safe, 0);
rb_define_method(cState, "script_safe=", cState_script_safe_set, 1);
rb_define_alias(cState, "escape_slash", "script_safe");
rb_define_alias(cState, "escape_slash?", "script_safe?");
rb_define_alias(cState, "escape_slash=", "script_safe=");
rb_define_method(cState, "check_circular?", cState_check_circular_p, 0);
rb_define_method(cState, "allow_nan?", cState_allow_nan_p, 0);
rb_define_method(cState, "ascii_only?", cState_ascii_only_p, 0);
Expand Down Expand Up @@ -1589,6 +1612,7 @@ void Init_generator(void)
i_object_nl = rb_intern("object_nl");
i_array_nl = rb_intern("array_nl");
i_max_nesting = rb_intern("max_nesting");
i_script_safe = rb_intern("script_safe");
i_escape_slash = rb_intern("escape_slash");
i_allow_nan = rb_intern("allow_nan");
i_ascii_only = rb_intern("ascii_only");
Expand Down
10 changes: 5 additions & 5 deletions ext/json/ext/generator/generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ static const UTF32 halfMask = 0x3FFUL;
static unsigned char isLegalUTF8(const UTF8 *source, unsigned long length);
static void unicode_escape(char *buf, UTF16 character);
static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16 character);
static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char escape_slash);
static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char escape_slash);
static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char script_safe);
static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char script_safe);
static char *fstrndup(const char *ptr, unsigned long len);

/* ruby api and some helpers */
Expand All @@ -72,7 +72,7 @@ typedef struct JSON_Generator_StateStruct {
long max_nesting;
char allow_nan;
char ascii_only;
char escape_slash;
char script_safe;
long depth;
long buffer_initial_length;
} JSON_Generator_State;
Expand Down Expand Up @@ -151,8 +151,8 @@ static VALUE cState_allow_nan_p(VALUE self);
static VALUE cState_ascii_only_p(VALUE self);
static VALUE cState_depth(VALUE self);
static VALUE cState_depth_set(VALUE self, VALUE depth);
static VALUE cState_escape_slash(VALUE self);
static VALUE cState_escape_slash_set(VALUE self, VALUE depth);
static VALUE cState_script_safe(VALUE self);
static VALUE cState_script_safe_set(VALUE self, VALUE depth);
static FBuffer *cState_prepare_buffer(VALUE self);
#ifndef ZALLOC
#define ZALLOC(type) ((type *)ruby_zalloc(sizeof(type)))
Expand Down
2 changes: 1 addition & 1 deletion java/src/json/ext/Generator.java
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ public RuntimeInfo getInfo() {

public StringEncoder getStringEncoder() {
if (stringEncoder == null) {
stringEncoder = new StringEncoder(context, getState().asciiOnly(), getState().escapeSlash());
stringEncoder = new StringEncoder(context, getState().asciiOnly(), getState().scriptSafe());
}
return stringEncoder;
}
Expand Down
42 changes: 25 additions & 17 deletions java/src/json/ext/GeneratorState.java
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ public class GeneratorState extends RubyObject {
* If set to <code>true</code> the forward slash will be escaped in
* json output.
*/
private boolean escapeSlash = DEFAULT_ESCAPE_SLASH;
static final boolean DEFAULT_ESCAPE_SLASH = false;
private boolean scriptSafe = DEFAULT_SCRIPT_SAFE;
static final boolean DEFAULT_SCRIPT_SAFE = false;
/**
* The initial buffer length of this state. (This isn't really used on all
* non-C implementations.)
Expand Down Expand Up @@ -177,9 +177,9 @@ static GeneratorState fromState(ThreadContext context, RuntimeInfo info,
* <code>-Infinity</code> should be generated, otherwise an exception is
* thrown if these values are encountered.
* This options defaults to <code>false</code>.
* <dt><code>:escape_slash</code>
* <dd>set to <code>true</code> if the forward slashes should be escaped
* in the json output (default: <code>false</code>)
* <dt><code>:script_safe</code>
* <dd>set to <code>true</code> if U+2028, U+2029 and forward slashes should be escaped
* in the json output to make it safe to include in a JavaScript tag (default: <code>false</code>)
*/
@JRubyMethod(optional=1, visibility=Visibility.PRIVATE)
public IRubyObject initialize(ThreadContext context, IRubyObject[] args) {
Expand All @@ -203,7 +203,7 @@ public IRubyObject initialize_copy(ThreadContext context, IRubyObject vOrig) {
this.allowNaN = orig.allowNaN;
this.asciiOnly = orig.asciiOnly;
this.quirksMode = orig.quirksMode;
this.escapeSlash = orig.escapeSlash;
this.scriptSafe = orig.scriptSafe;
this.bufferInitialLength = orig.bufferInitialLength;
this.depth = orig.depth;
return this;
Expand Down Expand Up @@ -359,19 +359,24 @@ public IRubyObject max_nesting_set(IRubyObject max_nesting) {
/**
* Returns true if forward slashes are escaped in the json output.
*/
public boolean escapeSlash() {
return escapeSlash;
public boolean scriptSafe() {
return scriptSafe;
}

@JRubyMethod(name="escape_slash")
public RubyBoolean escape_slash_get(ThreadContext context) {
return context.getRuntime().newBoolean(escapeSlash);
@JRubyMethod(name="script_safe", alias="escape_slash")
public RubyBoolean script_safe_get(ThreadContext context) {
return context.getRuntime().newBoolean(scriptSafe);
}

@JRubyMethod(name="escape_slash=")
public IRubyObject escape_slash_set(IRubyObject escape_slash) {
escapeSlash = escape_slash.isTrue();
return escape_slash.getRuntime().newBoolean(escapeSlash);
@JRubyMethod(name="script_safe=", alias="escape_slash=")
public IRubyObject script_safe_set(IRubyObject script_safe) {
scriptSafe = script_safe.isTrue();
return script_safe.getRuntime().newBoolean(scriptSafe);
}

@JRubyMethod(name="script_safe?", alias="escape_slash?")
public RubyBoolean script_safe_p(ThreadContext context) {
return context.getRuntime().newBoolean(scriptSafe);
}

public boolean allowNaN() {
Expand Down Expand Up @@ -458,7 +463,10 @@ public IRubyObject configure(ThreadContext context, IRubyObject vOpts) {
maxNesting = opts.getInt("max_nesting", DEFAULT_MAX_NESTING);
allowNaN = opts.getBool("allow_nan", DEFAULT_ALLOW_NAN);
asciiOnly = opts.getBool("ascii_only", DEFAULT_ASCII_ONLY);
escapeSlash = opts.getBool("escape_slash", DEFAULT_ESCAPE_SLASH);
scriptSafe = opts.getBool("script_safe", DEFAULT_SCRIPT_SAFE);
if (!scriptSafe) {
scriptSafe = opts.getBool("escape_slash", DEFAULT_SCRIPT_SAFE);
}
bufferInitialLength = opts.getInt("buffer_initial_length", DEFAULT_BUFFER_INITIAL_LENGTH);

depth = opts.getInt("depth", 0);
Expand Down Expand Up @@ -486,7 +494,7 @@ public RubyHash to_h(ThreadContext context) {
result.op_aset(context, runtime.newSymbol("allow_nan"), allow_nan_p(context));
result.op_aset(context, runtime.newSymbol("ascii_only"), ascii_only_p(context));
result.op_aset(context, runtime.newSymbol("max_nesting"), max_nesting_get(context));
result.op_aset(context, runtime.newSymbol("escape_slash"), escape_slash_get(context));
result.op_aset(context, runtime.newSymbol("script_safe"), script_safe_get(context));
result.op_aset(context, runtime.newSymbol("depth"), depth_get(context));
result.op_aset(context, runtime.newSymbol("buffer_initial_length"), buffer_initial_length_get(context));
for (String name: getInstanceVariableNameList()) {
Expand Down
15 changes: 11 additions & 4 deletions java/src/json/ext/StringEncoder.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* and throws a GeneratorError if any problem is found.
*/
final class StringEncoder extends ByteListTranscoder {
private final boolean asciiOnly, escapeSlash;
private final boolean asciiOnly, scriptSafe;

// Escaped characters will reuse this array, to avoid new allocations
// or appending them byte-by-byte
Expand All @@ -37,10 +37,10 @@ final class StringEncoder extends ByteListTranscoder {
new byte[] {'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};

StringEncoder(ThreadContext context, boolean asciiOnly, boolean escapeSlash) {
StringEncoder(ThreadContext context, boolean asciiOnly, boolean scriptSafe) {
super(context);
this.asciiOnly = asciiOnly;
this.escapeSlash = escapeSlash;
this.scriptSafe = scriptSafe;
}

void encode(ByteList src, ByteList out) {
Expand Down Expand Up @@ -75,10 +75,17 @@ private void handleChar(int c) {
escapeChar('b');
break;
case '/':
if(escapeSlash) {
if(scriptSafe) {
escapeChar((char)c);
break;
}
case 0x2028:
case 0x2029:
if (scriptSafe) {
quoteStop(charStart);
escapeUtf8Char(c);
break;
}
default:
if (c >= 0x20 && c <= 0x7f ||
(c >= 0x80 && !asciiOnly)) {
Expand Down
34 changes: 0 additions & 34 deletions json-java.gemspec

This file was deleted.

0 comments on commit 637fc52

Please sign in to comment.