Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Loading…

Add a 'process_nested' option to fire a callback on nested objects #50

Open
wants to merge 2 commits into from

3 participants

@hoxworth

A limitation I have run into in the past in using the yajl-ruby Gem for parsing large JSON documents is the case where I need to parse a very large array of objects but would like to have a callback fired on each object as opposed to the on_parse_complete callback at the end. I had been using a hacked-together patch personally for a while, but figured it made sense to push this to the main project.

This patch adds a process_nested option to theYajl::Parser.new call that will call a on_parse_nested (if set) callback when every object / array is parsed from the JSON along with the depth the object was parsed at. An additional option to the Yajl::Parser.new call, nested_depth, may be called to declare at what maximum depth the callback will be fired; 0, the default, will fire the callback on every object.

The on_parse_nested Proc receives two arguments, obj and depth. obj is the same object received by the on_parse_complete callback, and depth is the depth at which the nested object was parsed.

Let me know if you have any questions, or if you feel this doesn't make sense for the yajl-ruby project.

@brianmario
Owner

We probably don't need the process_nested boolean here if we can just infer that they want to use the feature based on if the nested_depth key exists in the options hash?

@avsej avsej referenced this pull request from a commit in avsej/yajl-ruby
@avsej avsej Replace 'process_nested' option with test of 'on_parse_nested' (issue #…
…50)

It uses the fact of availability 'on_parse_nested' callback instead of
'process_nested' option. This patch also remove trailing spaces introduces
by hoxworth
8ca9f3e
@avsej

HI all, @brianmario maybe my patch will help to accept this feature?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Mar 14, 2011
  1. @hoxworth

    I need to finally commit this to Github so I can access this elsewher…

    hoxworth authored
    …e and finally fix / implement this feature.
  2. @hoxworth
This page is out of date. Refresh to see the latest.
View
59 ext/yajl/yajl_ext.c
@@ -36,7 +36,7 @@
inline void yajl_check_and_fire_callback(void * ctx) {
yajl_parser_wrapper * wrapper;
GetParser((VALUE)ctx, wrapper);
-
+
/* No need to do any of this if the callback isn't even setup */
if (wrapper->parse_complete_callback != Qnil) {
int len = RARRAY_LEN(wrapper->builderStack);
@@ -314,7 +314,13 @@ static int yajl_found_end_hash(void * ctx) {
GetParser((VALUE)ctx, wrapper);
wrapper->nestedHashLevel--;
if (RARRAY_LEN(wrapper->builderStack) > 1) {
- rb_ary_pop(wrapper->builderStack);
+ VALUE popped = rb_ary_pop(wrapper->builderStack);
+
+ if (wrapper->processNestedCallback && (wrapper->nestedArrayLevel + wrapper->nestedHashLevel <= wrapper->nestedCallbackDepth || wrapper->nestedCallbackDepth == 0)) {
+ if ( wrapper->parse_nested_callback != Qnil) {
+ rb_funcall(wrapper->parse_nested_callback, intern_call, 2, popped, INT2NUM(wrapper->nestedArrayLevel + wrapper->nestedHashLevel));
+ }
+ }
}
yajl_check_and_fire_callback(ctx);
return 1;
@@ -333,7 +339,13 @@ static int yajl_found_end_array(void * ctx) {
GetParser((VALUE)ctx, wrapper);
wrapper->nestedArrayLevel--;
if (RARRAY_LEN(wrapper->builderStack) > 1) {
- rb_ary_pop(wrapper->builderStack);
+ VALUE popped = rb_ary_pop(wrapper->builderStack);
+
+ if (wrapper->processNestedCallback && (wrapper->nestedArrayLevel + wrapper->nestedHashLevel <= wrapper->nestedCallbackDepth || wrapper->nestedCallbackDepth == 0)) {
+ if ( wrapper->parse_nested_callback != Qnil) {
+ rb_funcall(wrapper->parse_nested_callback, intern_call, 2, popped, INT2NUM(wrapper->nestedArrayLevel + wrapper->nestedHashLevel));
+ }
+ }
}
yajl_check_and_fire_callback(ctx);
return 1;
@@ -360,12 +372,17 @@ static int yajl_found_end_array(void * ctx) {
* :allow_comments will turn on/off the check for comments inside the JSON stream, defaults to true.
*
* :check_utf8 will validate UTF8 characters found in the JSON stream, defaults to true.
+ *
+ * :process_nested will attempt to call the nested object callback on every nested object parsed, defaults to false.
+ *
+ * :nested_depth sets the maximum depth of objects that will fire the nested object callback when parsed.
+ * Defaults to 0, which is infinite depth.
*/
static VALUE rb_yajl_parser_new(int argc, VALUE * argv, VALUE klass) {
yajl_parser_wrapper * wrapper;
yajl_parser_config cfg;
VALUE opts, obj;
- int allowComments = 1, checkUTF8 = 1, symbolizeKeys = 0;
+ int allowComments = 1, checkUTF8 = 1, symbolizeKeys = 0, processNestedCallback = 0, nestedCallbackDepth = 0;
/* Scan off config vars */
if (rb_scan_args(argc, argv, "01", &opts) == 1) {
@@ -380,6 +397,12 @@ static VALUE rb_yajl_parser_new(int argc, VALUE * argv, VALUE klass) {
if (rb_hash_aref(opts, sym_symbolize_keys) == Qtrue) {
symbolizeKeys = 1;
}
+ if (rb_hash_aref(opts, sym_process_nested_callback) == Qtrue) {
+ processNestedCallback = 1;
+ }
+ if (rb_hash_aref(opts, sym_nested_callback_depth) != Qnil) {
+ nestedCallbackDepth = NUM2INT(rb_funcall(rb_hash_aref(opts, sym_nested_callback_depth), intern_to_i, 0));
+ }
}
cfg = (yajl_parser_config){allowComments, checkUTF8};
@@ -389,8 +412,11 @@ static VALUE rb_yajl_parser_new(int argc, VALUE * argv, VALUE klass) {
wrapper->nestedHashLevel = 0;
wrapper->objectsFound = 0;
wrapper->symbolizeKeys = symbolizeKeys;
+ wrapper->processNestedCallback = processNestedCallback;
+ wrapper->nestedCallbackDepth = nestedCallbackDepth;
wrapper->builderStack = rb_ary_new();
wrapper->parse_complete_callback = Qnil;
+ wrapper->parse_nested_callback = Qnil;
rb_obj_call_init(obj, 0, 0);
return obj;
}
@@ -497,12 +523,12 @@ static VALUE rb_yajl_parser_parse_chunk(VALUE self, VALUE chunk) {
rb_raise(cParseError, "Can't parse a nil string.");
}
- if (wrapper->parse_complete_callback != Qnil) {
+ if (wrapper->parse_complete_callback != Qnil || wrapper->parse_nested_callback) {
const char * cptr = RSTRING_PTR(chunk);
len = RSTRING_LEN(chunk);
yajl_parse_chunk((const unsigned char*)cptr, len, wrapper->parser);
} else {
- rb_raise(cParseError, "The on_parse_complete callback isn't setup, parsing useless.");
+ rb_raise(cParseError, "The on_parse_complete and on_parse_nested callbacks aren't setup, parsing useless.");
}
return Qnil;
@@ -525,6 +551,23 @@ static VALUE rb_yajl_parser_set_complete_cb(VALUE self, VALUE callback) {
}
/*
+ * Document-method: on_parse_nested=
+ *
+ * call-seq: on_parse_nested = Proc.new { |obj,depth| ... }
+ *
+ * This callback setter allows you to pass a Proc/lambda or any other object that responds to #call. The callback is only
+ * fired when the +process_nested+ option is set to +true+.
+ *
+ * It will pass two parameters, the ruby object built from the last parsed JSON object and the nested depth of the object
+ */
+static VALUE rb_yajl_parser_set_nested_cb(VALUE self, VALUE callback) {
+ yajl_parser_wrapper * wrapper;
+ GetParser(self, wrapper);
+ wrapper->parse_nested_callback = callback;
+ return Qnil;
+}
+
+/*
* Document-class: Yajl::Encoder
*
* This class contains methods for encoding a Ruby object into JSON, streaming it's output into an IO object.
@@ -866,6 +909,7 @@ void Init_yajl() {
rb_define_method(cParser, "parse_chunk", rb_yajl_parser_parse_chunk, 1);
rb_define_method(cParser, "<<", rb_yajl_parser_parse_chunk, 1);
rb_define_method(cParser, "on_parse_complete=", rb_yajl_parser_set_complete_cb, 1);
+ rb_define_method(cParser, "on_parse_nested=", rb_yajl_parser_set_nested_cb, 1);
cEncoder = rb_define_class_under(mYajl, "Encoder", rb_cObject);
rb_define_singleton_method(cEncoder, "new", rb_yajl_encoder_new, -1);
@@ -879,6 +923,7 @@ void Init_yajl() {
intern_call = rb_intern("call");
intern_keys = rb_intern("keys");
intern_to_s = rb_intern("to_s");
+ intern_to_i = rb_intern("to_i");
intern_to_json = rb_intern("to_json");
intern_to_sym = rb_intern("to_sym");
intern_has_key = rb_intern("has_key?");
@@ -891,6 +936,8 @@ void Init_yajl() {
sym_html_safe = ID2SYM(rb_intern("html_safe"));
sym_terminator = ID2SYM(rb_intern("terminator"));
sym_symbolize_keys = ID2SYM(rb_intern("symbolize_keys"));
+ sym_process_nested_callback = ID2SYM(rb_intern("process_nested"));
+ sym_nested_callback_depth = ID2SYM(rb_intern("nested_depth"));
#ifdef HAVE_RUBY_ENCODING_H
utf8Encoding = rb_utf8_encoding();
View
11 ext/yajl/yajl_ext.h
@@ -49,8 +49,11 @@ static rb_encoding *utf8Encoding;
static VALUE cParseError, cEncodeError, mYajl, cParser, cEncoder;
static ID intern_io_read, intern_call, intern_keys, intern_to_s,
- intern_to_json, intern_has_key, intern_to_sym, intern_as_json;
-static ID sym_allow_comments, sym_check_utf8, sym_pretty, sym_indent, sym_terminator, sym_symbolize_keys, sym_html_safe;
+ intern_to_json, intern_has_key, intern_to_sym, intern_as_json,
+ intern_to_i;
+static ID sym_allow_comments, sym_check_utf8, sym_pretty, sym_indent,
+ sym_terminator, sym_symbolize_keys, sym_html_safe,
+ sym_process_nested_callback, sym_nested_callback_depth;
#define GetParser(obj, sval) (sval = (yajl_parser_wrapper*)DATA_PTR(obj));
#define GetEncoder(obj, sval) (sval = (yajl_encoder_wrapper*)DATA_PTR(obj));
@@ -87,10 +90,13 @@ static yajl_callbacks callbacks = {
typedef struct {
VALUE builderStack;
VALUE parse_complete_callback;
+ VALUE parse_nested_callback;
int nestedArrayLevel;
int nestedHashLevel;
int objectsFound;
int symbolizeKeys;
+ int processNestedCallback;
+ int nestedCallbackDepth;
yajl_handle parser;
} yajl_parser_wrapper;
@@ -106,6 +112,7 @@ static VALUE rb_yajl_parser_init(int argc, VALUE * argv, VALUE self);
static VALUE rb_yajl_parser_parse(int argc, VALUE * argv, VALUE self);
static VALUE rb_yajl_parser_parse_chunk(VALUE self, VALUE chunk);
static VALUE rb_yajl_parser_set_complete_cb(VALUE self, VALUE callback);
+static VALUE rb_yajl_parser_set_nested_cb(VALUE self, VALUE callback);
static void yajl_parser_wrapper_free(void * wrapper);
static void yajl_parser_wrapper_mark(void * wrapper);
View
48 spec/parsing/nested_spec.rb
@@ -0,0 +1,48 @@
+# encoding: UTF-8
+require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
+
+describe "Nested parsing" do
+ before(:each) do
+ @nested_callback = lambda { |hash,depth|
+ # no-op
+ }
+ end
+
+ it "should parse a single nested hash" do
+ @parser = Yajl::Parser.new(:process_nested => true, :nested_depth => 1)
+ @parser.on_parse_nested = @nested_callback
+
+ @nested_callback.should_receive(:call).with({"abc" => 123},1)
+ @parser << '[{"abc": 123}]'
+ end
+
+
+ it "should parse a two-layer array" do
+ @parser = Yajl::Parser.new(:process_nested => true, :nested_depth => 2)
+ @parser.on_parse_nested = @nested_callback
+
+ @nested_callback.should_receive(:call).with({"abc" => 123},2)
+ @nested_callback.should_receive(:call).with([{"abc" => 123}],1)
+ @parser << '[[{"abc": 123}]]'
+ end
+
+
+ it "should parse a single-layer array multiple times" do
+ @parser = Yajl::Parser.new(:process_nested => true, :nested_depth => 1)
+ @parser.on_parse_nested = @nested_callback
+
+ @nested_callback.should_receive(:call).with({"abc" => 123},1)
+ @nested_callback.should_receive(:call).with({"def" => 456},1)
+ @parser << '[{"abc": 123},{"def": 456}]'
+ end
+
+
+ it "should handle a nested depth of 0" do
+ @parser = Yajl::Parser.new(:process_nested => true, :nested_depth => 0)
+ @parser.on_parse_nested = @nested_callback
+
+ @nested_callback.should_receive(:call).with({"abc" => 123},2)
+ @nested_callback.should_receive(:call).with([{"abc" => 123}],1)
+ @parser << '[[{"abc": 123}]]'
+ end
+end
Something went wrong with that request. Please try again.