Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

initial asciiOnly patches

  • Loading branch information...
commit 5685697dfb94083f497319b47a64990b14feab13 1 parent bf6ede1
@brianmario authored
View
12 ext/api/yajl_gen.h
@@ -83,6 +83,13 @@ extern "C" {
* member is only relevant when beautify is true */
const char * indentString;
} yajl_gen_config;
+
+ typedef struct {
+ unsigned int beautify;
+ const char * indentString;
+ /* tells the encoder to \uXXXX escape all characters < 0x20 and > 0x7F */
+ unsigned int asciiOnly;
+ } yajl_gen_config2;
/** allocate a generator handle
* \param config a pointer to a structure containing parameters which
@@ -118,6 +125,11 @@ extern "C" {
const yajl_gen_config * config,
const yajl_alloc_funcs * allocFuncs,
void * ctx);
+
+ yajl_gen YAJL_API yajl_gen_alloc3(yajl_print_t callback,
+ const yajl_gen_config2 * config,
+ const yajl_alloc_funcs * allocFuncs,
+ void * ctx);
/** free a generator handle */
YAJL_API void yajl_gen_free(yajl_gen handle);
View
122 ext/yajl_encode.c
@@ -37,13 +37,6 @@
#include <string.h>
#include <stdio.h>
-static void CharToHex(unsigned char c, char * hexBuf)
-{
- const char * hexchar = "0123456789ABCDEF";
- hexBuf[0] = hexchar[c >> 4];
- hexBuf[1] = hexchar[c & 0x0F];
-}
-
void
yajl_string_encode(yajl_buf buf, const unsigned char * str,
unsigned int len)
@@ -51,45 +44,126 @@ yajl_string_encode(yajl_buf buf, const unsigned char * str,
yajl_string_encode2((const yajl_print_t) &yajl_buf_append, buf, str, len);
}
+static const unsigned long utf8_limits[] = {
+ 0x0, /* 1 */
+ 0x80, /* 2 */
+ 0x800, /* 3 */
+ 0x10000, /* 4 */
+ 0x200000, /* 5 */
+ 0x4000000, /* 6 */
+ 0x80000000, /* 7 */
+};
+
void
yajl_string_encode2(const yajl_print_t print,
void * ctx,
const unsigned char * str,
unsigned int len)
{
- unsigned int beg = 0;
- unsigned int end = 0;
- char hexBuf[7];
- hexBuf[0] = '\\'; hexBuf[1] = 'u'; hexBuf[2] = '0'; hexBuf[3] = '0';
- hexBuf[6] = 0;
+ yajl_string_encode3(print, ctx, str, len, 0);
+}
- while (end < len) {
+void
+yajl_string_encode3(const yajl_print_t print,
+ void * ctx,
+ const unsigned char * str,
+ unsigned int len,
+ unsigned int asciiOnly)
+{
+ unsigned int curPos = 0;
+ char curByte;
+
+ while (curPos < len) {
const char * escaped = NULL;
- switch (str[end]) {
+ curByte = str[curPos];
+ switch (curByte) {
case '\r': escaped = "\\r"; break;
case '\n': escaped = "\\n"; break;
case '\\': escaped = "\\\\"; break;
- /* case '/': escaped = "\\/"; break; */
case '"': escaped = "\\\""; break;
case '\f': escaped = "\\f"; break;
case '\b': escaped = "\\b"; break;
case '\t': escaped = "\\t"; break;
- default:
- if ((unsigned char) str[end] < 32) {
- CharToHex(str[end], hexBuf + 4);
- escaped = hexBuf;
+ default: {
+ int codePointChar = curByte & 0xff;
+ unsigned long codePoint = codePointChar;
+ char hexEsc[7] = "\\u0000";
+ const unsigned char hexChars[17] = "0123456789abcdef";
+
+ if (asciiOnly) {
+ unsigned int numChars;
+
+ if (!(codePoint & 0x80)) {
+ if (curByte < 0x20) {
+ hexEsc[5] = hexChars[codePoint & 0x0f];
+ hexEsc[4] = hexChars[(codePoint >> 4) & 0x0f];
+ escaped = hexEsc;
+ }
+ break;
+ }
+
+ if (!(codePoint & 0x40)) {
+ // malformed UTF-8 character
+ // return invalidUtf8;
+ return;
+ }
+
+ if (!(codePoint & 0x20)) { numChars = 2; codePoint &= 0x1f; }
+ else if (!(codePoint & 0x10)) { numChars = 3; codePoint &= 0x0f; }
+ else if (!(codePoint & 0x08)) { numChars = 4; codePoint &= 0x07; }
+ else if (!(codePoint & 0x04)) { numChars = 5; codePoint &= 0x03; }
+ else if (!(codePoint & 0x02)) { numChars = 6; codePoint &= 0x01; }
+ else {
+ // malformed UTF-8 character
+ // return invalidUtf8;
+ return;
+ }
+ while(--numChars) {
+ curByte = str[++curPos];
+ codePointChar = curByte & 0xff;
+ if ((codePointChar & 0xc0) != 0x80) {
+ // malformed UTF-8 character
+ // return invalidUtf8;
+ return;
+ } else {
+ codePointChar &= 0x3f;
+ codePoint = codePoint << 6 | codePointChar;
+ }
+ }
+
+ if (codePoint < utf8_limits[numChars]) {
+ // redundant UTF-8 sequence
+ // return invalidUtf8;
+ return;
+ }
+
+ hexEsc[5] = hexChars[codePoint & 0x0f];
+ hexEsc[4] = hexChars[(codePoint >> 4) & 0x0f];
+ hexEsc[3] = hexChars[(codePoint >> 8) & 0x0f];
+ hexEsc[2] = hexChars[(codePoint >> 12) & 0x0f];
+ escaped = hexEsc;
+ break;
+ } else {
+ // let everything pass through un-touched
+ // except ascii control chars
+ if (!(codePoint & 0x80)) {
+ if (curByte < 0x20) {
+ hexEsc[5] = hexChars[codePoint & 0x0f];
+ hexEsc[4] = hexChars[(codePoint >> 4) & 0x0f];
+ escaped = hexEsc;
+ }
+ }
+ break;
}
- break;
+ }
}
if (escaped != NULL) {
- print(ctx, (const char *) (str + beg), end - beg);
print(ctx, escaped, strlen(escaped));
- beg = ++end;
} else {
- ++end;
+ print(ctx, &curByte, 1);
}
+ curPos++;
}
- print(ctx, (const char *) (str + beg), end - beg);
}
static void hexToDigit(unsigned int * val, const unsigned char * hex)
View
6 ext/yajl_encode.h
@@ -41,6 +41,12 @@ void yajl_string_encode2(const yajl_print_t printer,
const unsigned char * str,
unsigned int length);
+void yajl_string_encode3(yajl_print_t printer,
+ void * ctx,
+ const unsigned char * str,
+ unsigned int length,
+ unsigned int asciiOnly);
+
void yajl_string_encode(yajl_buf buf, const unsigned char * str,
unsigned int length);
View
12 ext/yajl_ext.c
@@ -527,10 +527,10 @@ static VALUE rb_yajl_parser_set_complete_cb(VALUE self, VALUE callback) {
*/
static VALUE rb_yajl_encoder_new(int argc, VALUE * argv, VALUE klass) {
yajl_encoder_wrapper * wrapper;
- yajl_gen_config cfg;
+ yajl_gen_config2 cfg;
VALUE opts, obj, indent;
const char * indentString = " ";
- int beautify = 0;
+ int beautify = 0, asciiOnly = 0;
/* Scan off config vars */
if (rb_scan_args(argc, argv, "01", &opts) == 1) {
@@ -547,11 +547,14 @@ static VALUE rb_yajl_encoder_new(int argc, VALUE * argv, VALUE klass) {
indentString = RSTRING_PTR(indent);
}
}
+ if (rb_hash_aref(opts, sym_ascii_only) == Qtrue) {
+ asciiOnly = 1;
+ }
}
- cfg = (yajl_gen_config){beautify, indentString};
+ cfg = (yajl_gen_config2){beautify, indentString, asciiOnly};
obj = Data_Make_Struct(klass, yajl_encoder_wrapper, yajl_encoder_wrapper_mark, yajl_encoder_wrapper_free, wrapper);
- wrapper->encoder = yajl_gen_alloc(&cfg, NULL);
+ wrapper->encoder = yajl_gen_alloc3(NULL, &cfg, NULL, NULL);
wrapper->on_progress_callback = Qnil;
if (opts != Qnil && rb_funcall(opts, intern_has_key, 1, sym_terminator) == Qtrue) {
wrapper->terminator = rb_hash_aref(opts, sym_terminator);
@@ -897,6 +900,7 @@ void Init_yajl_ext() {
sym_indent = ID2SYM(rb_intern("indent"));
sym_terminator = ID2SYM(rb_intern("terminator"));
sym_symbolize_keys = ID2SYM(rb_intern("symbolize_keys"));
+ sym_ascii_only = ID2SYM(rb_intern("ascii_only"));
#ifdef HAVE_RUBY_ENCODING_H
utf8Encoding = rb_enc_find_index("UTF-8");
View
5 ext/yajl_ext.h
@@ -50,7 +50,8 @@ int utf8Encoding;
static VALUE cParseError, cEncodeError, mYajl, cParser, cEncoder;
static ID intern_io_read, intern_call, intern_keys, intern_to_s,
intern_to_json, intern_has_key, intern_to_sym, intern_as_json;
-static ID sym_allow_comments, sym_check_utf8, sym_pretty, sym_indent, sym_terminator, sym_symbolize_keys;
+static ID sym_allow_comments, sym_check_utf8, sym_pretty, sym_indent,
+ sym_terminator, sym_symbolize_keys, sym_ascii_only;
#define GetParser(obj, sval) (sval = (yajl_parser_wrapper*)DATA_PTR(obj));
#define GetEncoder(obj, sval) (sval = (yajl_encoder_wrapper*)DATA_PTR(obj));
@@ -125,4 +126,4 @@ static VALUE rb_yajl_json_ext_false_to_json(int argc, VALUE * argv, VALUE self);
static VALUE rb_yajl_json_ext_nil_to_json(int argc, VALUE * argv, VALUE self);
static VALUE rb_yajl_encoder_enable_json_gem_ext(VALUE klass);
-void Init_yajl_ext();
+void Init_yajl_ext();
View
11 ext/yajl_gen.c
@@ -54,6 +54,7 @@ struct yajl_gen_t
{
unsigned int depth;
unsigned int pretty;
+ unsigned int asciiOnly;
const char * indentString;
yajl_gen_state state[YAJL_MAX_DEPTH];
yajl_print_t print;
@@ -75,6 +76,15 @@ yajl_gen_alloc2(const yajl_print_t callback,
const yajl_alloc_funcs * afs,
void * ctx)
{
+ return NULL;
+}
+
+yajl_gen
+yajl_gen_alloc3(yajl_print_t callback,
+ const yajl_gen_config2 * config,
+ const yajl_alloc_funcs * afs,
+ void * ctx)
+{
yajl_gen g = NULL;
yajl_alloc_funcs afsBuffer;
@@ -97,6 +107,7 @@ yajl_gen_alloc2(const yajl_print_t callback,
if (config) {
g->pretty = config->beautify;
g->indentString = config->indentString ? config->indentString : " ";
+ g->asciiOnly = config->asciiOnly;
}
if (callback) {
View
2  spec/json_gem_compatibility/compatibility_spec.rb
@@ -84,7 +84,7 @@ class Dummy; end
'""'.should eql(''.to_json)
'"\\b"'.should eql("\b".to_json)
'"\u0001"'.should eql(0x1.chr.to_json)
- '"\u001F"'.should eql(0x1f.chr.to_json)
+ '"\u001f"'.should eql(0x1f.chr.to_json)
'" "'.should eql(' '.to_json)
"\"#{0x7f.chr}\"".should eql(0x7f.chr.to_json)
utf8 = [ "© ≠ €! \01" ]
Please sign in to comment.
Something went wrong with that request. Please try again.