Skip to content

Commit

Permalink
initial asciiOnly patches
Browse files Browse the repository at this point in the history
  • Loading branch information
brianmario committed Mar 25, 2010
1 parent bf6ede1 commit 5685697
Show file tree
Hide file tree
Showing 7 changed files with 139 additions and 31 deletions.
12 changes: 12 additions & 0 deletions ext/api/yajl_gen.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,13 @@ extern "C" {
* member is only relevant when beautify is true */
const char * indentString;
} yajl_gen_config;

typedef struct {
unsigned int beautify;
const char * indentString;
/* tells the encoder to \uXXXX escape all characters < 0x20 and > 0x7F */
unsigned int asciiOnly;
} yajl_gen_config2;

/** allocate a generator handle
* \param config a pointer to a structure containing parameters which
Expand Down Expand Up @@ -118,6 +125,11 @@ extern "C" {
const yajl_gen_config * config,
const yajl_alloc_funcs * allocFuncs,
void * ctx);

yajl_gen YAJL_API yajl_gen_alloc3(yajl_print_t callback,
const yajl_gen_config2 * config,
const yajl_alloc_funcs * allocFuncs,
void * ctx);

/** free a generator handle */
YAJL_API void yajl_gen_free(yajl_gen handle);
Expand Down
122 changes: 98 additions & 24 deletions ext/yajl_encode.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,59 +37,133 @@
#include <string.h>
#include <stdio.h>

static void CharToHex(unsigned char c, char * hexBuf)
{
const char * hexchar = "0123456789ABCDEF";
hexBuf[0] = hexchar[c >> 4];
hexBuf[1] = hexchar[c & 0x0F];
}

void
yajl_string_encode(yajl_buf buf, const unsigned char * str,
unsigned int len)
{
yajl_string_encode2((const yajl_print_t) &yajl_buf_append, buf, str, len);
}

static const unsigned long utf8_limits[] = {
0x0, /* 1 */
0x80, /* 2 */
0x800, /* 3 */
0x10000, /* 4 */
0x200000, /* 5 */
0x4000000, /* 6 */
0x80000000, /* 7 */
};

void
yajl_string_encode2(const yajl_print_t print,
void * ctx,
const unsigned char * str,
unsigned int len)
{
unsigned int beg = 0;
unsigned int end = 0;
char hexBuf[7];
hexBuf[0] = '\\'; hexBuf[1] = 'u'; hexBuf[2] = '0'; hexBuf[3] = '0';
hexBuf[6] = 0;
yajl_string_encode3(print, ctx, str, len, 0);
}

while (end < len) {
void
yajl_string_encode3(const yajl_print_t print,
void * ctx,
const unsigned char * str,
unsigned int len,
unsigned int asciiOnly)
{
unsigned int curPos = 0;
char curByte;

while (curPos < len) {
const char * escaped = NULL;
switch (str[end]) {
curByte = str[curPos];
switch (curByte) {
case '\r': escaped = "\\r"; break;
case '\n': escaped = "\\n"; break;
case '\\': escaped = "\\\\"; break;
/* case '/': escaped = "\\/"; break; */
case '"': escaped = "\\\""; break;
case '\f': escaped = "\\f"; break;
case '\b': escaped = "\\b"; break;
case '\t': escaped = "\\t"; break;
default:
if ((unsigned char) str[end] < 32) {
CharToHex(str[end], hexBuf + 4);
escaped = hexBuf;
default: {
int codePointChar = curByte & 0xff;
unsigned long codePoint = codePointChar;
char hexEsc[7] = "\\u0000";
const unsigned char hexChars[17] = "0123456789abcdef";

if (asciiOnly) {
unsigned int numChars;

if (!(codePoint & 0x80)) {
if (curByte < 0x20) {
hexEsc[5] = hexChars[codePoint & 0x0f];
hexEsc[4] = hexChars[(codePoint >> 4) & 0x0f];
escaped = hexEsc;
}
break;
}

if (!(codePoint & 0x40)) {
// malformed UTF-8 character
// return invalidUtf8;
return;
}

if (!(codePoint & 0x20)) { numChars = 2; codePoint &= 0x1f; }
else if (!(codePoint & 0x10)) { numChars = 3; codePoint &= 0x0f; }
else if (!(codePoint & 0x08)) { numChars = 4; codePoint &= 0x07; }
else if (!(codePoint & 0x04)) { numChars = 5; codePoint &= 0x03; }
else if (!(codePoint & 0x02)) { numChars = 6; codePoint &= 0x01; }
else {
// malformed UTF-8 character
// return invalidUtf8;
return;
}
while(--numChars) {
curByte = str[++curPos];
codePointChar = curByte & 0xff;
if ((codePointChar & 0xc0) != 0x80) {
// malformed UTF-8 character
// return invalidUtf8;
return;
} else {
codePointChar &= 0x3f;
codePoint = codePoint << 6 | codePointChar;
}
}

if (codePoint < utf8_limits[numChars]) {
// redundant UTF-8 sequence
// return invalidUtf8;
return;
}

hexEsc[5] = hexChars[codePoint & 0x0f];
hexEsc[4] = hexChars[(codePoint >> 4) & 0x0f];
hexEsc[3] = hexChars[(codePoint >> 8) & 0x0f];
hexEsc[2] = hexChars[(codePoint >> 12) & 0x0f];
escaped = hexEsc;
break;
} else {
// let everything pass through un-touched
// except ascii control chars
if (!(codePoint & 0x80)) {
if (curByte < 0x20) {
hexEsc[5] = hexChars[codePoint & 0x0f];
hexEsc[4] = hexChars[(codePoint >> 4) & 0x0f];
escaped = hexEsc;
}
}
break;
}
break;
}
}
if (escaped != NULL) {
print(ctx, (const char *) (str + beg), end - beg);
print(ctx, escaped, strlen(escaped));
beg = ++end;
} else {
++end;
print(ctx, &curByte, 1);
}
curPos++;
}
print(ctx, (const char *) (str + beg), end - beg);
}

static void hexToDigit(unsigned int * val, const unsigned char * hex)
Expand Down
6 changes: 6 additions & 0 deletions ext/yajl_encode.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ void yajl_string_encode2(const yajl_print_t printer,
const unsigned char * str,
unsigned int length);

void yajl_string_encode3(yajl_print_t printer,
void * ctx,
const unsigned char * str,
unsigned int length,
unsigned int asciiOnly);

void yajl_string_encode(yajl_buf buf, const unsigned char * str,
unsigned int length);

Expand Down
12 changes: 8 additions & 4 deletions ext/yajl_ext.c
Original file line number Diff line number Diff line change
Expand Up @@ -527,10 +527,10 @@ static VALUE rb_yajl_parser_set_complete_cb(VALUE self, VALUE callback) {
*/
static VALUE rb_yajl_encoder_new(int argc, VALUE * argv, VALUE klass) {
yajl_encoder_wrapper * wrapper;
yajl_gen_config cfg;
yajl_gen_config2 cfg;
VALUE opts, obj, indent;
const char * indentString = " ";
int beautify = 0;
int beautify = 0, asciiOnly = 0;

/* Scan off config vars */
if (rb_scan_args(argc, argv, "01", &opts) == 1) {
Expand All @@ -547,11 +547,14 @@ static VALUE rb_yajl_encoder_new(int argc, VALUE * argv, VALUE klass) {
indentString = RSTRING_PTR(indent);
}
}
if (rb_hash_aref(opts, sym_ascii_only) == Qtrue) {
asciiOnly = 1;
}
}
cfg = (yajl_gen_config){beautify, indentString};
cfg = (yajl_gen_config2){beautify, indentString, asciiOnly};

obj = Data_Make_Struct(klass, yajl_encoder_wrapper, yajl_encoder_wrapper_mark, yajl_encoder_wrapper_free, wrapper);
wrapper->encoder = yajl_gen_alloc(&cfg, NULL);
wrapper->encoder = yajl_gen_alloc3(NULL, &cfg, NULL, NULL);
wrapper->on_progress_callback = Qnil;
if (opts != Qnil && rb_funcall(opts, intern_has_key, 1, sym_terminator) == Qtrue) {
wrapper->terminator = rb_hash_aref(opts, sym_terminator);
Expand Down Expand Up @@ -897,6 +900,7 @@ void Init_yajl_ext() {
sym_indent = ID2SYM(rb_intern("indent"));
sym_terminator = ID2SYM(rb_intern("terminator"));
sym_symbolize_keys = ID2SYM(rb_intern("symbolize_keys"));
sym_ascii_only = ID2SYM(rb_intern("ascii_only"));

#ifdef HAVE_RUBY_ENCODING_H
utf8Encoding = rb_enc_find_index("UTF-8");
Expand Down
5 changes: 3 additions & 2 deletions ext/yajl_ext.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ int utf8Encoding;
static VALUE cParseError, cEncodeError, mYajl, cParser, cEncoder;
static ID intern_io_read, intern_call, intern_keys, intern_to_s,
intern_to_json, intern_has_key, intern_to_sym, intern_as_json;
static ID sym_allow_comments, sym_check_utf8, sym_pretty, sym_indent, sym_terminator, sym_symbolize_keys;
static ID sym_allow_comments, sym_check_utf8, sym_pretty, sym_indent,
sym_terminator, sym_symbolize_keys, sym_ascii_only;

#define GetParser(obj, sval) (sval = (yajl_parser_wrapper*)DATA_PTR(obj));
#define GetEncoder(obj, sval) (sval = (yajl_encoder_wrapper*)DATA_PTR(obj));
Expand Down Expand Up @@ -125,4 +126,4 @@ static VALUE rb_yajl_json_ext_false_to_json(int argc, VALUE * argv, VALUE self);
static VALUE rb_yajl_json_ext_nil_to_json(int argc, VALUE * argv, VALUE self);
static VALUE rb_yajl_encoder_enable_json_gem_ext(VALUE klass);

void Init_yajl_ext();
void Init_yajl_ext();
11 changes: 11 additions & 0 deletions ext/yajl_gen.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ struct yajl_gen_t
{
unsigned int depth;
unsigned int pretty;
unsigned int asciiOnly;
const char * indentString;
yajl_gen_state state[YAJL_MAX_DEPTH];
yajl_print_t print;
Expand All @@ -74,6 +75,15 @@ yajl_gen_alloc2(const yajl_print_t callback,
const yajl_gen_config * config,
const yajl_alloc_funcs * afs,
void * ctx)
{
return NULL;
}

yajl_gen
yajl_gen_alloc3(yajl_print_t callback,
const yajl_gen_config2 * config,
const yajl_alloc_funcs * afs,
void * ctx)
{
yajl_gen g = NULL;
yajl_alloc_funcs afsBuffer;
Expand All @@ -97,6 +107,7 @@ yajl_gen_alloc2(const yajl_print_t callback,
if (config) {
g->pretty = config->beautify;
g->indentString = config->indentString ? config->indentString : " ";
g->asciiOnly = config->asciiOnly;
}

if (callback) {
Expand Down
2 changes: 1 addition & 1 deletion spec/json_gem_compatibility/compatibility_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ class Dummy; end
'""'.should eql(''.to_json)
'"\\b"'.should eql("\b".to_json)
'"\u0001"'.should eql(0x1.chr.to_json)
'"\u001F"'.should eql(0x1f.chr.to_json)
'"\u001f"'.should eql(0x1f.chr.to_json)
'" "'.should eql(' '.to_json)
"\"#{0x7f.chr}\"".should eql(0x7f.chr.to_json)
utf8 = [ "© ≠ €! \01" ]
Expand Down

0 comments on commit 5685697

Please sign in to comment.