diff --git a/CHANGES.md b/CHANGES.md index 11e3632a..11fe4c9e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,7 @@ # Changes +* `escape_slash` option was renamed as `script_safe` and now also escape U+2028 and U+2029. `escape_slash` is now an alias of `script_safe`. + ### 2021-10-24 (2.6.1) * Restore version.rb with 2.6.1 diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c index 98d0ea46..70d6fbe6 100644 --- a/ext/json/ext/generator/generator.c +++ b/ext/json/ext/generator/generator.c @@ -16,7 +16,7 @@ static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before, i_object_nl, i_array_nl, i_max_nesting, i_allow_nan, i_ascii_only, i_pack, i_unpack, i_create_id, i_extend, i_key_p, i_aref, i_send, i_respond_to_p, i_match, i_keys, i_depth, - i_buffer_initial_length, i_dup, i_escape_slash; + i_buffer_initial_length, i_dup, i_script_safe, i_escape_slash; /* * Copyright 2001-2004 Unicode, Inc. @@ -124,7 +124,7 @@ static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16 /* Converts string to a JSON string in FBuffer buffer, where all but the ASCII * and control characters are JSON escaped. */ -static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char escape_slash) +static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char script_safe) { const UTF8 *source = (UTF8 *) RSTRING_PTR(string); const UTF8 *sourceEnd = source + RSTRING_LEN(string); @@ -175,7 +175,7 @@ static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char escap fbuffer_append(buffer, "\\\"", 2); break; case '/': - if(escape_slash) { + if(script_safe) { fbuffer_append(buffer, "\\/", 2); break; } @@ -228,7 +228,7 @@ static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char escap * characters required by the JSON standard are JSON escaped. The remaining * characters (should be UTF8) are just passed through and appended to the * result. */ -static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char escape_slash) +static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char script_safe) { const char *ptr = RSTRING_PTR(string), *p; unsigned long len = RSTRING_LEN(string), start = 0, end = 0; @@ -280,7 +280,7 @@ static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char escape_slas escape_len = 2; break; case '/': - if(escape_slash) { + if(script_safe) { escape = "\\/"; escape_len = 2; break; @@ -294,6 +294,22 @@ static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char escape_slas rb_raise(rb_path2class("JSON::GeneratorError"), "partial character in source, but hit end"); } + + if (script_safe && c == 0xE2) { + unsigned char c2 = (unsigned char) *(p+1); + unsigned char c3 = (unsigned char) *(p+2); + if (c2 == 0x80 && (c3 == 0xA8 || c3 == 0xA9)) { + fbuffer_append(buffer, ptr + start, end - start); + start = end = (end + clen); + if (c3 == 0xA8) { + fbuffer_append(buffer, "\\u2028", 6); + } else { + fbuffer_append(buffer, "\\u2029", 6); + } + continue; + } + } + if (!isLegalUTF8((UTF8 *) p, clen)) { rb_raise(rb_path2class("JSON::GeneratorError"), "source sequence is illegal/malformed utf-8"); @@ -726,8 +742,12 @@ static VALUE cState_configure(VALUE self, VALUE opts) state->allow_nan = RTEST(tmp); tmp = rb_hash_aref(opts, ID2SYM(i_ascii_only)); state->ascii_only = RTEST(tmp); - tmp = rb_hash_aref(opts, ID2SYM(i_escape_slash)); - state->escape_slash = RTEST(tmp); + tmp = rb_hash_aref(opts, ID2SYM(i_script_safe)); + state->script_safe = RTEST(tmp); + if (!state->script_safe) { + tmp = rb_hash_aref(opts, ID2SYM(i_escape_slash)); + state->script_safe = RTEST(tmp); + } return self; } @@ -762,7 +782,7 @@ static VALUE cState_to_h(VALUE self) rb_hash_aset(result, ID2SYM(i_allow_nan), state->allow_nan ? Qtrue : Qfalse); rb_hash_aset(result, ID2SYM(i_ascii_only), state->ascii_only ? Qtrue : Qfalse); rb_hash_aset(result, ID2SYM(i_max_nesting), LONG2FIX(state->max_nesting)); - rb_hash_aset(result, ID2SYM(i_escape_slash), state->escape_slash ? Qtrue : Qfalse); + rb_hash_aset(result, ID2SYM(i_script_safe), state->script_safe ? Qtrue : Qfalse); rb_hash_aset(result, ID2SYM(i_depth), LONG2FIX(state->depth)); rb_hash_aset(result, ID2SYM(i_buffer_initial_length), LONG2FIX(state->buffer_initial_length)); return result; @@ -947,9 +967,9 @@ static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_S } #endif if (state->ascii_only) { - convert_UTF8_to_JSON_ASCII(buffer, obj, state->escape_slash); + convert_UTF8_to_JSON_ASCII(buffer, obj, state->script_safe); } else { - convert_UTF8_to_JSON(buffer, obj, state->escape_slash); + convert_UTF8_to_JSON(buffer, obj, state->script_safe); } fbuffer_append_char(buffer, '"'); } @@ -1390,27 +1410,27 @@ static VALUE cState_max_nesting_set(VALUE self, VALUE depth) } /* - * call-seq: escape_slash + * call-seq: script_safe * * If this boolean is true, the forward slashes will be escaped in * the json output. */ -static VALUE cState_escape_slash(VALUE self) +static VALUE cState_script_safe(VALUE self) { GET_STATE(self); - return state->escape_slash ? Qtrue : Qfalse; + return state->script_safe ? Qtrue : Qfalse; } /* - * call-seq: escape_slash=(depth) + * call-seq: script_safe=(depth) * * This sets whether or not the forward slashes will be escaped in * the json output. */ -static VALUE cState_escape_slash_set(VALUE self, VALUE enable) +static VALUE cState_script_safe_set(VALUE self, VALUE enable) { GET_STATE(self); - state->escape_slash = RTEST(enable); + state->script_safe = RTEST(enable); return Qnil; } @@ -1530,9 +1550,12 @@ void Init_generator(void) rb_define_method(cState, "array_nl=", cState_array_nl_set, 1); rb_define_method(cState, "max_nesting", cState_max_nesting, 0); rb_define_method(cState, "max_nesting=", cState_max_nesting_set, 1); - rb_define_method(cState, "escape_slash", cState_escape_slash, 0); - rb_define_method(cState, "escape_slash?", cState_escape_slash, 0); - rb_define_method(cState, "escape_slash=", cState_escape_slash_set, 1); + rb_define_method(cState, "script_safe", cState_script_safe, 0); + rb_define_method(cState, "script_safe?", cState_script_safe, 0); + rb_define_method(cState, "script_safe=", cState_script_safe_set, 1); + rb_define_alias(cState, "escape_slash", "script_safe"); + rb_define_alias(cState, "escape_slash?", "script_safe?"); + rb_define_alias(cState, "escape_slash=", "script_safe="); rb_define_method(cState, "check_circular?", cState_check_circular_p, 0); rb_define_method(cState, "allow_nan?", cState_allow_nan_p, 0); rb_define_method(cState, "ascii_only?", cState_ascii_only_p, 0); @@ -1589,6 +1612,7 @@ void Init_generator(void) i_object_nl = rb_intern("object_nl"); i_array_nl = rb_intern("array_nl"); i_max_nesting = rb_intern("max_nesting"); + i_script_safe = rb_intern("script_safe"); i_escape_slash = rb_intern("escape_slash"); i_allow_nan = rb_intern("allow_nan"); i_ascii_only = rb_intern("ascii_only"); diff --git a/ext/json/ext/generator/generator.h b/ext/json/ext/generator/generator.h index 3ebd6225..5e6a2280 100644 --- a/ext/json/ext/generator/generator.h +++ b/ext/json/ext/generator/generator.h @@ -49,8 +49,8 @@ static const UTF32 halfMask = 0x3FFUL; static unsigned char isLegalUTF8(const UTF8 *source, unsigned long length); static void unicode_escape(char *buf, UTF16 character); static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16 character); -static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char escape_slash); -static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char escape_slash); +static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char script_safe); +static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char script_safe); static char *fstrndup(const char *ptr, unsigned long len); /* ruby api and some helpers */ @@ -72,7 +72,7 @@ typedef struct JSON_Generator_StateStruct { long max_nesting; char allow_nan; char ascii_only; - char escape_slash; + char script_safe; long depth; long buffer_initial_length; } JSON_Generator_State; @@ -151,8 +151,8 @@ static VALUE cState_allow_nan_p(VALUE self); static VALUE cState_ascii_only_p(VALUE self); static VALUE cState_depth(VALUE self); static VALUE cState_depth_set(VALUE self, VALUE depth); -static VALUE cState_escape_slash(VALUE self); -static VALUE cState_escape_slash_set(VALUE self, VALUE depth); +static VALUE cState_script_safe(VALUE self); +static VALUE cState_script_safe_set(VALUE self, VALUE depth); static FBuffer *cState_prepare_buffer(VALUE self); #ifndef ZALLOC #define ZALLOC(type) ((type *)ruby_zalloc(sizeof(type))) diff --git a/java/src/json/ext/Generator.java b/java/src/json/ext/Generator.java index 6a996868..ccbcd761 100644 --- a/java/src/json/ext/Generator.java +++ b/java/src/json/ext/Generator.java @@ -139,7 +139,7 @@ public RuntimeInfo getInfo() { public StringEncoder getStringEncoder() { if (stringEncoder == null) { - stringEncoder = new StringEncoder(context, getState().asciiOnly(), getState().escapeSlash()); + stringEncoder = new StringEncoder(context, getState().asciiOnly(), getState().scriptSafe()); } return stringEncoder; } diff --git a/java/src/json/ext/GeneratorState.java b/java/src/json/ext/GeneratorState.java index a0541d67..ac464b51 100644 --- a/java/src/json/ext/GeneratorState.java +++ b/java/src/json/ext/GeneratorState.java @@ -86,8 +86,8 @@ public class GeneratorState extends RubyObject { * If set to true the forward slash will be escaped in * json output. */ - private boolean escapeSlash = DEFAULT_ESCAPE_SLASH; - static final boolean DEFAULT_ESCAPE_SLASH = false; + private boolean scriptSafe = DEFAULT_SCRIPT_SAFE; + static final boolean DEFAULT_SCRIPT_SAFE = false; /** * The initial buffer length of this state. (This isn't really used on all * non-C implementations.) @@ -177,9 +177,9 @@ static GeneratorState fromState(ThreadContext context, RuntimeInfo info, * -Infinity should be generated, otherwise an exception is * thrown if these values are encountered. * This options defaults to false. - *
:escape_slash - *
set to true if the forward slashes should be escaped - * in the json output (default: false) + *
:script_safe + *
set to true if U+2028, U+2029 and forward slashes should be escaped + * in the json output to make it safe to include in a JavaScript tag (default: false) */ @JRubyMethod(optional=1, visibility=Visibility.PRIVATE) public IRubyObject initialize(ThreadContext context, IRubyObject[] args) { @@ -203,7 +203,7 @@ public IRubyObject initialize_copy(ThreadContext context, IRubyObject vOrig) { this.allowNaN = orig.allowNaN; this.asciiOnly = orig.asciiOnly; this.quirksMode = orig.quirksMode; - this.escapeSlash = orig.escapeSlash; + this.scriptSafe = orig.scriptSafe; this.bufferInitialLength = orig.bufferInitialLength; this.depth = orig.depth; return this; @@ -359,19 +359,24 @@ public IRubyObject max_nesting_set(IRubyObject max_nesting) { /** * Returns true if forward slashes are escaped in the json output. */ - public boolean escapeSlash() { - return escapeSlash; + public boolean scriptSafe() { + return scriptSafe; } - @JRubyMethod(name="escape_slash") - public RubyBoolean escape_slash_get(ThreadContext context) { - return context.getRuntime().newBoolean(escapeSlash); + @JRubyMethod(name="script_safe", alias="escape_slash") + public RubyBoolean script_safe_get(ThreadContext context) { + return context.getRuntime().newBoolean(scriptSafe); } - @JRubyMethod(name="escape_slash=") - public IRubyObject escape_slash_set(IRubyObject escape_slash) { - escapeSlash = escape_slash.isTrue(); - return escape_slash.getRuntime().newBoolean(escapeSlash); + @JRubyMethod(name="script_safe=", alias="escape_slash=") + public IRubyObject script_safe_set(IRubyObject script_safe) { + scriptSafe = script_safe.isTrue(); + return script_safe.getRuntime().newBoolean(scriptSafe); + } + + @JRubyMethod(name="script_safe?", alias="escape_slash?") + public RubyBoolean script_safe_p(ThreadContext context) { + return context.getRuntime().newBoolean(scriptSafe); } public boolean allowNaN() { @@ -458,7 +463,10 @@ public IRubyObject configure(ThreadContext context, IRubyObject vOpts) { maxNesting = opts.getInt("max_nesting", DEFAULT_MAX_NESTING); allowNaN = opts.getBool("allow_nan", DEFAULT_ALLOW_NAN); asciiOnly = opts.getBool("ascii_only", DEFAULT_ASCII_ONLY); - escapeSlash = opts.getBool("escape_slash", DEFAULT_ESCAPE_SLASH); + scriptSafe = opts.getBool("script_safe", DEFAULT_SCRIPT_SAFE); + if (!scriptSafe) { + scriptSafe = opts.getBool("escape_slash", DEFAULT_SCRIPT_SAFE); + } bufferInitialLength = opts.getInt("buffer_initial_length", DEFAULT_BUFFER_INITIAL_LENGTH); depth = opts.getInt("depth", 0); @@ -486,7 +494,7 @@ public RubyHash to_h(ThreadContext context) { result.op_aset(context, runtime.newSymbol("allow_nan"), allow_nan_p(context)); result.op_aset(context, runtime.newSymbol("ascii_only"), ascii_only_p(context)); result.op_aset(context, runtime.newSymbol("max_nesting"), max_nesting_get(context)); - result.op_aset(context, runtime.newSymbol("escape_slash"), escape_slash_get(context)); + result.op_aset(context, runtime.newSymbol("script_safe"), script_safe_get(context)); result.op_aset(context, runtime.newSymbol("depth"), depth_get(context)); result.op_aset(context, runtime.newSymbol("buffer_initial_length"), buffer_initial_length_get(context)); for (String name: getInstanceVariableNameList()) { diff --git a/java/src/json/ext/StringEncoder.java b/java/src/json/ext/StringEncoder.java index 26678ede..290aa249 100644 --- a/java/src/json/ext/StringEncoder.java +++ b/java/src/json/ext/StringEncoder.java @@ -15,7 +15,7 @@ * and throws a GeneratorError if any problem is found. */ final class StringEncoder extends ByteListTranscoder { - private final boolean asciiOnly, escapeSlash; + private final boolean asciiOnly, scriptSafe; // Escaped characters will reuse this array, to avoid new allocations // or appending them byte-by-byte @@ -37,10 +37,10 @@ final class StringEncoder extends ByteListTranscoder { new byte[] {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; - StringEncoder(ThreadContext context, boolean asciiOnly, boolean escapeSlash) { + StringEncoder(ThreadContext context, boolean asciiOnly, boolean scriptSafe) { super(context); this.asciiOnly = asciiOnly; - this.escapeSlash = escapeSlash; + this.scriptSafe = scriptSafe; } void encode(ByteList src, ByteList out) { @@ -75,10 +75,17 @@ private void handleChar(int c) { escapeChar('b'); break; case '/': - if(escapeSlash) { + if(scriptSafe) { escapeChar((char)c); break; } + case 0x2028: + case 0x2029: + if (scriptSafe) { + quoteStop(charStart); + escapeUtf8Char(c); + break; + } default: if (c >= 0x20 && c <= 0x7f || (c >= 0x80 && !asciiOnly)) { diff --git a/lib/json.rb b/lib/json.rb index 1e64bfcb..807488ff 100644 --- a/lib/json.rb +++ b/lib/json.rb @@ -285,6 +285,15 @@ # # Raises JSON::NestingError (nesting of 2 is too deep): # JSON.generate(obj, max_nesting: 2) # +# ====== Escaping Options +# +# Options +script_safe+ (boolean) specifies wether '\u2028', '\u2029' +# and '/' should be escaped as to make the JSON object safe to interpolate in script +# tags. +# +# Options +ascii_only+ (boolean) specifies wether all characters outside the ASCII range +# should be escaped. +# # ====== Output Options # # The default formatting options generate the most compact diff --git a/lib/json/common.rb b/lib/json/common.rb index ea46896f..29cafdf0 100644 --- a/lib/json/common.rb +++ b/lib/json/common.rb @@ -592,13 +592,13 @@ class << self # Sets or returns the default options for the JSON.dump method. # Initially: # opts = JSON.dump_default_options - # opts # => {:max_nesting=>false, :allow_nan=>true, :escape_slash=>false} + # opts # => {:max_nesting=>false, :allow_nan=>true, :script_safe=>false} attr_accessor :dump_default_options end self.dump_default_options = { :max_nesting => false, :allow_nan => true, - :escape_slash => false, + :script_safe => false, } # :call-seq: diff --git a/lib/json/pure/generator.rb b/lib/json/pure/generator.rb index 2257ee34..f417b543 100644 --- a/lib/json/pure/generator.rb +++ b/lib/json/pure/generator.rb @@ -37,25 +37,34 @@ module JSON '\\' => '\\\\', } # :nodoc: - ESCAPE_SLASH_MAP = MAP.merge( + ESCAPE_PATTERN = /[\/"\\\x0-\x1f]/n # :nodoc: + + SCRIPT_SAFE_MAP = MAP.merge( '/' => '\\/', + "\u2028".b => '\u2028', + "\u2029".b => '\u2029', ) + SCRIPT_SAFE_ESCAPE_PATTERN = Regexp.union(ESCAPE_PATTERN, "\u2028".b, "\u2029".b) + # Convert a UTF8 encoded Ruby string _string_ to a JSON string, encoded with # UTF16 big endian characters as \u????, and return it. - def utf8_to_json(string, escape_slash = false) # :nodoc: + def utf8_to_json(string, script_safe = false) # :nodoc: string = string.dup string.force_encoding(::Encoding::ASCII_8BIT) - map = escape_slash ? ESCAPE_SLASH_MAP : MAP - string.gsub!(/[\/"\\\x0-\x1f]/) { map[$&] || $& } + if script_safe + string.gsub!(SCRIPT_SAFE_ESCAPE_PATTERN) { SCRIPT_SAFE_MAP[$&] || $& } + else + string.gsub!(ESCAPE_PATTERN) { MAP[$&] || $& } + end string.force_encoding(::Encoding::UTF_8) string end - def utf8_to_json_ascii(string, escape_slash = false) # :nodoc: + def utf8_to_json_ascii(string, script_safe = false) # :nodoc: string = string.dup string.force_encoding(::Encoding::ASCII_8BIT) - map = escape_slash ? ESCAPE_SLASH_MAP : MAP + map = script_safe ? SCRIPT_SAFE_MAP : MAP string.gsub!(/[\/"\\\x0-\x1f]/n) { map[$&] || $& } string.gsub!(/( (?: @@ -115,7 +124,8 @@ def self.from_state(opts) # * *space_before*: a string that is put before a : pair delimiter (default: ''), # * *object_nl*: a string that is put at the end of a JSON object (default: ''), # * *array_nl*: a string that is put at the end of a JSON array (default: ''), - # * *escape_slash*: true if forward slash (/) should be escaped (default: false) + # * *script_safe*: true if U+2028, U+2029 and forward slash (/) should be escaped + # as to make the JSON object safe to interpolate in a script tag (default: false). # * *check_circular*: is deprecated now, use the :max_nesting option instead, # * *max_nesting*: sets the maximum level of data structure nesting in # the generated JSON, max_nesting = 0 if no maximum should be checked. @@ -130,7 +140,7 @@ def initialize(opts = {}) @array_nl = '' @allow_nan = false @ascii_only = false - @escape_slash = false + @script_safe = false @buffer_initial_length = 1024 configure opts end @@ -158,7 +168,7 @@ def initialize(opts = {}) # If this attribute is set to true, forward slashes will be escaped in # all json strings. - attr_accessor :escape_slash + attr_accessor :script_safe # :stopdoc: attr_reader :buffer_initial_length @@ -200,8 +210,8 @@ def ascii_only? end # Returns true, if forward slashes are escaped. Otherwise returns false. - def escape_slash? - @escape_slash + def script_safe? + @script_safe end # Configure this State instance with the Hash _opts_, and return @@ -226,7 +236,14 @@ def configure(opts) @ascii_only = opts[:ascii_only] if opts.key?(:ascii_only) @depth = opts[:depth] || 0 @buffer_initial_length ||= opts[:buffer_initial_length] - @escape_slash = !!opts[:escape_slash] if opts.key?(:escape_slash) + + @script_safe = if opts.key?(:script_safe) + !!opts[:script_safe] + elsif opts.key?(:escape_slash) + !!opts[:escape_slash] + else + false + end if !opts.key?(:max_nesting) # defaults to 100 @max_nesting = 100 @@ -419,9 +436,9 @@ def to_json(state = nil, *args) string = encode(::Encoding::UTF_8) end if state.ascii_only? - '"' << JSON.utf8_to_json_ascii(string, state.escape_slash) << '"' + '"' << JSON.utf8_to_json_ascii(string, state.script_safe) << '"' else - '"' << JSON.utf8_to_json(string, state.escape_slash) << '"' + '"' << JSON.utf8_to_json(string, state.script_safe) << '"' end end diff --git a/tests/json_generator_test.rb b/tests/json_generator_test.rb old mode 100644 new mode 100755 index f31b6b29..2e367201 --- a/tests/json_generator_test.rb +++ b/tests/json_generator_test.rb @@ -149,7 +149,7 @@ def test_pretty_state :ascii_only => false, :buffer_initial_length => 1024, :depth => 0, - :escape_slash => false, + :script_safe => false, :indent => " ", :max_nesting => 100, :object_nl => "\n", @@ -166,7 +166,7 @@ def test_safe_state :ascii_only => false, :buffer_initial_length => 1024, :depth => 0, - :escape_slash => false, + :script_safe => false, :indent => "", :max_nesting => 100, :object_nl => "", @@ -183,7 +183,7 @@ def test_fast_state :ascii_only => false, :buffer_initial_length => 1024, :depth => 0, - :escape_slash => false, + :script_safe => false, :indent => "", :max_nesting => 0, :object_nl => "", @@ -370,6 +370,18 @@ def test_backslash # data = [ '/' ] json = '["\/"]' + assert_equal json, generate(data, :script_safe => true) + # + data = [ "\u2028\u2029" ] + json = '["\u2028\u2029"]' + assert_equal json, generate(data, :script_safe => true) + # + data = [ "ABC \u2028 DEF \u2029 GHI" ] + json = '["ABC \u2028 DEF \u2029 GHI"]' + assert_equal json, generate(data, :script_safe => true) + # + data = [ "/\u2028\u2029" ] + json = '["\/\u2028\u2029"]' assert_equal json, generate(data, :escape_slash => true) # data = ['"']