Skip to content

Commit

Permalink
Merge pull request #525 from Shopify/script-safe-option
Browse files Browse the repository at this point in the history
Rename escape_slash in script_safe and also escape E+2028 and E+2029
  • Loading branch information
hsbt committed Dec 1, 2023
2 parents 123032c + 29e5ccf commit c065433
Show file tree
Hide file tree
Showing 10 changed files with 144 additions and 65 deletions.
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Changes

* `escape_slash` option was renamed as `script_safe` and now also escape U+2028 and U+2029. `escape_slash` is now an alias of `script_safe`.

### 2021-10-24 (2.6.1)

* Restore version.rb with 2.6.1
Expand Down
62 changes: 43 additions & 19 deletions ext/json/ext/generator/generator.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before,
i_object_nl, i_array_nl, i_max_nesting, i_allow_nan, i_ascii_only,
i_pack, i_unpack, i_create_id, i_extend, i_key_p,
i_aref, i_send, i_respond_to_p, i_match, i_keys, i_depth,
i_buffer_initial_length, i_dup, i_escape_slash;
i_buffer_initial_length, i_dup, i_script_safe, i_escape_slash;

/*
* Copyright 2001-2004 Unicode, Inc.
Expand Down Expand Up @@ -124,7 +124,7 @@ static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16

/* Converts string to a JSON string in FBuffer buffer, where all but the ASCII
* and control characters are JSON escaped. */
static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char escape_slash)
static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char script_safe)
{
const UTF8 *source = (UTF8 *) RSTRING_PTR(string);
const UTF8 *sourceEnd = source + RSTRING_LEN(string);
Expand Down Expand Up @@ -175,7 +175,7 @@ static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char escap
fbuffer_append(buffer, "\\\"", 2);
break;
case '/':
if(escape_slash) {
if(script_safe) {
fbuffer_append(buffer, "\\/", 2);
break;
}
Expand Down Expand Up @@ -228,7 +228,7 @@ static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char escap
* characters required by the JSON standard are JSON escaped. The remaining
* characters (should be UTF8) are just passed through and appended to the
* result. */
static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char escape_slash)
static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char script_safe)
{
const char *ptr = RSTRING_PTR(string), *p;
unsigned long len = RSTRING_LEN(string), start = 0, end = 0;
Expand Down Expand Up @@ -280,7 +280,7 @@ static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char escape_slas
escape_len = 2;
break;
case '/':
if(escape_slash) {
if(script_safe) {
escape = "\\/";
escape_len = 2;
break;
Expand All @@ -294,6 +294,22 @@ static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char escape_slas
rb_raise(rb_path2class("JSON::GeneratorError"),
"partial character in source, but hit end");
}

if (script_safe && c == 0xE2) {
unsigned char c2 = (unsigned char) *(p+1);
unsigned char c3 = (unsigned char) *(p+2);
if (c2 == 0x80 && (c3 == 0xA8 || c3 == 0xA9)) {
fbuffer_append(buffer, ptr + start, end - start);
start = end = (end + clen);
if (c3 == 0xA8) {
fbuffer_append(buffer, "\\u2028", 6);
} else {
fbuffer_append(buffer, "\\u2029", 6);
}
continue;
}
}

if (!isLegalUTF8((UTF8 *) p, clen)) {
rb_raise(rb_path2class("JSON::GeneratorError"),
"source sequence is illegal/malformed utf-8");
Expand Down Expand Up @@ -727,8 +743,12 @@ static VALUE cState_configure(VALUE self, VALUE opts)
state->allow_nan = RTEST(tmp);
tmp = rb_hash_aref(opts, ID2SYM(i_ascii_only));
state->ascii_only = RTEST(tmp);
tmp = rb_hash_aref(opts, ID2SYM(i_escape_slash));
state->escape_slash = RTEST(tmp);
tmp = rb_hash_aref(opts, ID2SYM(i_script_safe));
state->script_safe = RTEST(tmp);
if (!state->script_safe) {
tmp = rb_hash_aref(opts, ID2SYM(i_escape_slash));
state->script_safe = RTEST(tmp);
}
return self;
}

Expand Down Expand Up @@ -763,7 +783,7 @@ static VALUE cState_to_h(VALUE self)
rb_hash_aset(result, ID2SYM(i_allow_nan), state->allow_nan ? Qtrue : Qfalse);
rb_hash_aset(result, ID2SYM(i_ascii_only), state->ascii_only ? Qtrue : Qfalse);
rb_hash_aset(result, ID2SYM(i_max_nesting), LONG2FIX(state->max_nesting));
rb_hash_aset(result, ID2SYM(i_escape_slash), state->escape_slash ? Qtrue : Qfalse);
rb_hash_aset(result, ID2SYM(i_script_safe), state->script_safe ? Qtrue : Qfalse);
rb_hash_aset(result, ID2SYM(i_depth), LONG2FIX(state->depth));
rb_hash_aset(result, ID2SYM(i_buffer_initial_length), LONG2FIX(state->buffer_initial_length));
return result;
Expand Down Expand Up @@ -948,9 +968,9 @@ static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_S
}
#endif
if (state->ascii_only) {
convert_UTF8_to_JSON_ASCII(buffer, obj, state->escape_slash);
convert_UTF8_to_JSON_ASCII(buffer, obj, state->script_safe);
} else {
convert_UTF8_to_JSON(buffer, obj, state->escape_slash);
convert_UTF8_to_JSON(buffer, obj, state->script_safe);
}
fbuffer_append_char(buffer, '"');
}
Expand Down Expand Up @@ -1391,27 +1411,27 @@ static VALUE cState_max_nesting_set(VALUE self, VALUE depth)
}

/*
* call-seq: escape_slash
* call-seq: script_safe
*
* If this boolean is true, the forward slashes will be escaped in
* the json output.
*/
static VALUE cState_escape_slash(VALUE self)
static VALUE cState_script_safe(VALUE self)
{
GET_STATE(self);
return state->escape_slash ? Qtrue : Qfalse;
return state->script_safe ? Qtrue : Qfalse;
}

/*
* call-seq: escape_slash=(depth)
* call-seq: script_safe=(depth)
*
* This sets whether or not the forward slashes will be escaped in
* the json output.
*/
static VALUE cState_escape_slash_set(VALUE self, VALUE enable)
static VALUE cState_script_safe_set(VALUE self, VALUE enable)
{
GET_STATE(self);
state->escape_slash = RTEST(enable);
state->script_safe = RTEST(enable);
return Qnil;
}

Expand Down Expand Up @@ -1531,9 +1551,12 @@ void Init_generator(void)
rb_define_method(cState, "array_nl=", cState_array_nl_set, 1);
rb_define_method(cState, "max_nesting", cState_max_nesting, 0);
rb_define_method(cState, "max_nesting=", cState_max_nesting_set, 1);
rb_define_method(cState, "escape_slash", cState_escape_slash, 0);
rb_define_method(cState, "escape_slash?", cState_escape_slash, 0);
rb_define_method(cState, "escape_slash=", cState_escape_slash_set, 1);
rb_define_method(cState, "script_safe", cState_script_safe, 0);
rb_define_method(cState, "script_safe?", cState_script_safe, 0);
rb_define_method(cState, "script_safe=", cState_script_safe_set, 1);
rb_define_alias(cState, "escape_slash", "script_safe");
rb_define_alias(cState, "escape_slash?", "script_safe?");
rb_define_alias(cState, "escape_slash=", "script_safe=");
rb_define_method(cState, "check_circular?", cState_check_circular_p, 0);
rb_define_method(cState, "allow_nan?", cState_allow_nan_p, 0);
rb_define_method(cState, "ascii_only?", cState_ascii_only_p, 0);
Expand Down Expand Up @@ -1590,6 +1613,7 @@ void Init_generator(void)
i_object_nl = rb_intern("object_nl");
i_array_nl = rb_intern("array_nl");
i_max_nesting = rb_intern("max_nesting");
i_script_safe = rb_intern("script_safe");
i_escape_slash = rb_intern("escape_slash");
i_allow_nan = rb_intern("allow_nan");
i_ascii_only = rb_intern("ascii_only");
Expand Down
10 changes: 5 additions & 5 deletions ext/json/ext/generator/generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ static const UTF32 halfMask = 0x3FFUL;
static unsigned char isLegalUTF8(const UTF8 *source, unsigned long length);
static void unicode_escape(char *buf, UTF16 character);
static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16 character);
static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char escape_slash);
static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char escape_slash);
static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char script_safe);
static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char script_safe);
static char *fstrndup(const char *ptr, unsigned long len);

/* ruby api and some helpers */
Expand All @@ -72,7 +72,7 @@ typedef struct JSON_Generator_StateStruct {
long max_nesting;
char allow_nan;
char ascii_only;
char escape_slash;
char script_safe;
long depth;
long buffer_initial_length;
} JSON_Generator_State;
Expand Down Expand Up @@ -151,8 +151,8 @@ static VALUE cState_allow_nan_p(VALUE self);
static VALUE cState_ascii_only_p(VALUE self);
static VALUE cState_depth(VALUE self);
static VALUE cState_depth_set(VALUE self, VALUE depth);
static VALUE cState_escape_slash(VALUE self);
static VALUE cState_escape_slash_set(VALUE self, VALUE depth);
static VALUE cState_script_safe(VALUE self);
static VALUE cState_script_safe_set(VALUE self, VALUE depth);
static FBuffer *cState_prepare_buffer(VALUE self);
#ifndef ZALLOC
#define ZALLOC(type) ((type *)ruby_zalloc(sizeof(type)))
Expand Down
2 changes: 1 addition & 1 deletion java/src/json/ext/Generator.java
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ public RuntimeInfo getInfo() {

public StringEncoder getStringEncoder() {
if (stringEncoder == null) {
stringEncoder = new StringEncoder(context, getState().asciiOnly(), getState().escapeSlash());
stringEncoder = new StringEncoder(context, getState().asciiOnly(), getState().scriptSafe());
}
return stringEncoder;
}
Expand Down
42 changes: 25 additions & 17 deletions java/src/json/ext/GeneratorState.java
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ public class GeneratorState extends RubyObject {
* If set to <code>true</code> the forward slash will be escaped in
* json output.
*/
private boolean escapeSlash = DEFAULT_ESCAPE_SLASH;
static final boolean DEFAULT_ESCAPE_SLASH = false;
private boolean scriptSafe = DEFAULT_SCRIPT_SAFE;
static final boolean DEFAULT_SCRIPT_SAFE = false;
/**
* The initial buffer length of this state. (This isn't really used on all
* non-C implementations.)
Expand Down Expand Up @@ -177,9 +177,9 @@ static GeneratorState fromState(ThreadContext context, RuntimeInfo info,
* <code>-Infinity</code> should be generated, otherwise an exception is
* thrown if these values are encountered.
* This options defaults to <code>false</code>.
* <dt><code>:escape_slash</code>
* <dd>set to <code>true</code> if the forward slashes should be escaped
* in the json output (default: <code>false</code>)
* <dt><code>:script_safe</code>
* <dd>set to <code>true</code> if U+2028, U+2029 and forward slashes should be escaped
* in the json output to make it safe to include in a JavaScript tag (default: <code>false</code>)
*/
@JRubyMethod(optional=1, visibility=Visibility.PRIVATE)
public IRubyObject initialize(ThreadContext context, IRubyObject[] args) {
Expand All @@ -203,7 +203,7 @@ public IRubyObject initialize_copy(ThreadContext context, IRubyObject vOrig) {
this.allowNaN = orig.allowNaN;
this.asciiOnly = orig.asciiOnly;
this.quirksMode = orig.quirksMode;
this.escapeSlash = orig.escapeSlash;
this.scriptSafe = orig.scriptSafe;
this.bufferInitialLength = orig.bufferInitialLength;
this.depth = orig.depth;
return this;
Expand Down Expand Up @@ -359,19 +359,24 @@ public IRubyObject max_nesting_set(IRubyObject max_nesting) {
/**
* Returns true if forward slashes are escaped in the json output.
*/
public boolean escapeSlash() {
return escapeSlash;
public boolean scriptSafe() {
return scriptSafe;
}

@JRubyMethod(name="escape_slash")
public RubyBoolean escape_slash_get(ThreadContext context) {
return context.getRuntime().newBoolean(escapeSlash);
@JRubyMethod(name="script_safe", alias="escape_slash")
public RubyBoolean script_safe_get(ThreadContext context) {
return context.getRuntime().newBoolean(scriptSafe);
}

@JRubyMethod(name="escape_slash=")
public IRubyObject escape_slash_set(IRubyObject escape_slash) {
escapeSlash = escape_slash.isTrue();
return escape_slash.getRuntime().newBoolean(escapeSlash);
@JRubyMethod(name="script_safe=", alias="escape_slash=")
public IRubyObject script_safe_set(IRubyObject script_safe) {
scriptSafe = script_safe.isTrue();
return script_safe.getRuntime().newBoolean(scriptSafe);
}

@JRubyMethod(name="script_safe?", alias="escape_slash?")
public RubyBoolean script_safe_p(ThreadContext context) {
return context.getRuntime().newBoolean(scriptSafe);
}

public boolean allowNaN() {
Expand Down Expand Up @@ -458,7 +463,10 @@ public IRubyObject configure(ThreadContext context, IRubyObject vOpts) {
maxNesting = opts.getInt("max_nesting", DEFAULT_MAX_NESTING);
allowNaN = opts.getBool("allow_nan", DEFAULT_ALLOW_NAN);
asciiOnly = opts.getBool("ascii_only", DEFAULT_ASCII_ONLY);
escapeSlash = opts.getBool("escape_slash", DEFAULT_ESCAPE_SLASH);
scriptSafe = opts.getBool("script_safe", DEFAULT_SCRIPT_SAFE);
if (!scriptSafe) {
scriptSafe = opts.getBool("escape_slash", DEFAULT_SCRIPT_SAFE);
}
bufferInitialLength = opts.getInt("buffer_initial_length", DEFAULT_BUFFER_INITIAL_LENGTH);

depth = opts.getInt("depth", 0);
Expand Down Expand Up @@ -486,7 +494,7 @@ public RubyHash to_h(ThreadContext context) {
result.op_aset(context, runtime.newSymbol("allow_nan"), allow_nan_p(context));
result.op_aset(context, runtime.newSymbol("ascii_only"), ascii_only_p(context));
result.op_aset(context, runtime.newSymbol("max_nesting"), max_nesting_get(context));
result.op_aset(context, runtime.newSymbol("escape_slash"), escape_slash_get(context));
result.op_aset(context, runtime.newSymbol("script_safe"), script_safe_get(context));
result.op_aset(context, runtime.newSymbol("depth"), depth_get(context));
result.op_aset(context, runtime.newSymbol("buffer_initial_length"), buffer_initial_length_get(context));
for (String name: getInstanceVariableNameList()) {
Expand Down
15 changes: 11 additions & 4 deletions java/src/json/ext/StringEncoder.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* and throws a GeneratorError if any problem is found.
*/
final class StringEncoder extends ByteListTranscoder {
private final boolean asciiOnly, escapeSlash;
private final boolean asciiOnly, scriptSafe;

// Escaped characters will reuse this array, to avoid new allocations
// or appending them byte-by-byte
Expand All @@ -37,10 +37,10 @@ final class StringEncoder extends ByteListTranscoder {
new byte[] {'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};

StringEncoder(ThreadContext context, boolean asciiOnly, boolean escapeSlash) {
StringEncoder(ThreadContext context, boolean asciiOnly, boolean scriptSafe) {
super(context);
this.asciiOnly = asciiOnly;
this.escapeSlash = escapeSlash;
this.scriptSafe = scriptSafe;
}

void encode(ByteList src, ByteList out) {
Expand Down Expand Up @@ -75,10 +75,17 @@ private void handleChar(int c) {
escapeChar('b');
break;
case '/':
if(escapeSlash) {
if(scriptSafe) {
escapeChar((char)c);
break;
}
case 0x2028:
case 0x2029:
if (scriptSafe) {
quoteStop(charStart);
escapeUtf8Char(c);
break;
}
default:
if (c >= 0x20 && c <= 0x7f ||
(c >= 0x80 && !asciiOnly)) {
Expand Down
9 changes: 9 additions & 0 deletions lib/json.rb
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,15 @@
# # Raises JSON::NestingError (nesting of 2 is too deep):
# JSON.generate(obj, max_nesting: 2)
#
# ====== Escaping Options
#
# Options +script_safe+ (boolean) specifies wether <tt>'\u2028'</tt>, <tt>'\u2029'</tt>
# and <tt>'/'</tt> should be escaped as to make the JSON object safe to interpolate in script
# tags.
#
# Options +ascii_only+ (boolean) specifies wether all characters outside the ASCII range
# should be escaped.
#
# ====== Output Options
#
# The default formatting options generate the most compact
Expand Down
4 changes: 2 additions & 2 deletions lib/json/common.rb
Original file line number Diff line number Diff line change
Expand Up @@ -572,13 +572,13 @@ class << self
# Sets or returns the default options for the JSON.dump method.
# Initially:
# opts = JSON.dump_default_options
# opts # => {:max_nesting=>false, :allow_nan=>true, :escape_slash=>false}
# opts # => {:max_nesting=>false, :allow_nan=>true, :script_safe=>false}
attr_accessor :dump_default_options
end
self.dump_default_options = {
:max_nesting => false,
:allow_nan => true,
:escape_slash => false,
:script_safe => false,
}

# :call-seq:
Expand Down
Loading

0 comments on commit c065433

Please sign in to comment.