Skip to content
This repository

Add a 'process_nested' option to fire a callback on nested objects #50

Open
wants to merge 2 commits into from

3 participants

Kenny Hoxworth Brian Lopez Sergey Avseyev
Kenny Hoxworth

A limitation I have run into in the past in using the yajl-ruby Gem for parsing large JSON documents is the case where I need to parse a very large array of objects but would like to have a callback fired on each object as opposed to the on_parse_complete callback at the end. I had been using a hacked-together patch personally for a while, but figured it made sense to push this to the main project.

This patch adds a process_nested option to theYajl::Parser.new call that will call a on_parse_nested (if set) callback when every object / array is parsed from the JSON along with the depth the object was parsed at. An additional option to the Yajl::Parser.new call, nested_depth, may be called to declare at what maximum depth the callback will be fired; 0, the default, will fire the callback on every object.

The on_parse_nested Proc receives two arguments, obj and depth. obj is the same object received by the on_parse_complete callback, and depth is the depth at which the nested object was parsed.

Let me know if you have any questions, or if you feel this doesn't make sense for the yajl-ruby project.

Brian Lopez
Owner

We probably don't need the process_nested boolean here if we can just infer that they want to use the feature based on if the nested_depth key exists in the options hash?

Sergey Avseyev avsej referenced this pull request from a commit in avsej/yajl-ruby December 08, 2011
Sergey Avseyev Replace 'process_nested' option with test of 'on_parse_nested' (issue #…
…50)

It uses the fact of availability 'on_parse_nested' callback instead of
'process_nested' option. This patch also remove trailing spaces introduces
by hoxworth
8ca9f3e
Sergey Avseyev

HI all, @brianmario maybe my patch will help to accept this feature?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
This page is out of date. Refresh to see the latest.
59  ext/yajl/yajl_ext.c
@@ -36,7 +36,7 @@
36 36
 inline void yajl_check_and_fire_callback(void * ctx) {
37 37
     yajl_parser_wrapper * wrapper;
38 38
     GetParser((VALUE)ctx, wrapper);
39  
-
  39
+    
40 40
     /* No need to do any of this if the callback isn't even setup */
41 41
     if (wrapper->parse_complete_callback != Qnil) {
42 42
         int len = RARRAY_LEN(wrapper->builderStack);
@@ -314,7 +314,13 @@ static int yajl_found_end_hash(void * ctx) {
314 314
     GetParser((VALUE)ctx, wrapper);
315 315
     wrapper->nestedHashLevel--;
316 316
     if (RARRAY_LEN(wrapper->builderStack) > 1) {
317  
-        rb_ary_pop(wrapper->builderStack);
  317
+        VALUE popped = rb_ary_pop(wrapper->builderStack);
  318
+
  319
+        if (wrapper->processNestedCallback && (wrapper->nestedArrayLevel + wrapper->nestedHashLevel <= wrapper->nestedCallbackDepth || wrapper->nestedCallbackDepth == 0)) {
  320
+            if ( wrapper->parse_nested_callback != Qnil) {
  321
+                rb_funcall(wrapper->parse_nested_callback, intern_call, 2, popped, INT2NUM(wrapper->nestedArrayLevel + wrapper->nestedHashLevel));
  322
+            } 
  323
+        }
318 324
     }
319 325
     yajl_check_and_fire_callback(ctx);
320 326
     return 1;
@@ -333,7 +339,13 @@ static int yajl_found_end_array(void * ctx) {
333 339
     GetParser((VALUE)ctx, wrapper);
334 340
     wrapper->nestedArrayLevel--;
335 341
     if (RARRAY_LEN(wrapper->builderStack) > 1) {
336  
-        rb_ary_pop(wrapper->builderStack);
  342
+        VALUE popped = rb_ary_pop(wrapper->builderStack);
  343
+        
  344
+        if (wrapper->processNestedCallback && (wrapper->nestedArrayLevel + wrapper->nestedHashLevel <= wrapper->nestedCallbackDepth || wrapper->nestedCallbackDepth == 0)) {
  345
+            if ( wrapper->parse_nested_callback != Qnil) {
  346
+                rb_funcall(wrapper->parse_nested_callback, intern_call, 2, popped, INT2NUM(wrapper->nestedArrayLevel + wrapper->nestedHashLevel));
  347
+            }
  348
+        }
337 349
     }
338 350
     yajl_check_and_fire_callback(ctx);
339 351
     return 1;
@@ -360,12 +372,17 @@ static int yajl_found_end_array(void * ctx) {
360 372
  * :allow_comments will turn on/off the check for comments inside the JSON stream, defaults to true.
361 373
  *
362 374
  * :check_utf8 will validate UTF8 characters found in the JSON stream, defaults to true.
  375
+ *
  376
+ * :process_nested will attempt to call the nested object callback on every nested object parsed, defaults to false.
  377
+ *
  378
+ * :nested_depth sets the maximum depth of objects that will fire the nested object callback when parsed.
  379
+ * Defaults to 0, which is infinite depth.
363 380
  */
364 381
 static VALUE rb_yajl_parser_new(int argc, VALUE * argv, VALUE klass) {
365 382
     yajl_parser_wrapper * wrapper;
366 383
     yajl_parser_config cfg;
367 384
     VALUE opts, obj;
368  
-    int allowComments = 1, checkUTF8 = 1, symbolizeKeys = 0;
  385
+    int allowComments = 1, checkUTF8 = 1, symbolizeKeys = 0, processNestedCallback = 0, nestedCallbackDepth = 0;
369 386
 
370 387
     /* Scan off config vars */
371 388
     if (rb_scan_args(argc, argv, "01", &opts) == 1) {
@@ -380,6 +397,12 @@ static VALUE rb_yajl_parser_new(int argc, VALUE * argv, VALUE klass) {
380 397
         if (rb_hash_aref(opts, sym_symbolize_keys) == Qtrue) {
381 398
             symbolizeKeys = 1;
382 399
         }
  400
+        if (rb_hash_aref(opts, sym_process_nested_callback) == Qtrue) {
  401
+            processNestedCallback = 1;
  402
+        }
  403
+        if (rb_hash_aref(opts, sym_nested_callback_depth) != Qnil) {
  404
+            nestedCallbackDepth = NUM2INT(rb_funcall(rb_hash_aref(opts, sym_nested_callback_depth), intern_to_i, 0));
  405
+        }
383 406
     }
384 407
     cfg = (yajl_parser_config){allowComments, checkUTF8};
385 408
 
@@ -389,8 +412,11 @@ static VALUE rb_yajl_parser_new(int argc, VALUE * argv, VALUE klass) {
389 412
     wrapper->nestedHashLevel = 0;
390 413
     wrapper->objectsFound = 0;
391 414
     wrapper->symbolizeKeys = symbolizeKeys;
  415
+    wrapper->processNestedCallback = processNestedCallback;
  416
+    wrapper->nestedCallbackDepth = nestedCallbackDepth;
392 417
     wrapper->builderStack = rb_ary_new();
393 418
     wrapper->parse_complete_callback = Qnil;
  419
+    wrapper->parse_nested_callback = Qnil;
394 420
     rb_obj_call_init(obj, 0, 0);
395 421
     return obj;
396 422
 }
@@ -497,12 +523,12 @@ static VALUE rb_yajl_parser_parse_chunk(VALUE self, VALUE chunk) {
497 523
         rb_raise(cParseError, "Can't parse a nil string.");
498 524
     }
499 525
 
500  
-    if (wrapper->parse_complete_callback != Qnil) {
  526
+    if (wrapper->parse_complete_callback != Qnil || wrapper->parse_nested_callback) {
501 527
         const char * cptr = RSTRING_PTR(chunk);
502 528
         len = RSTRING_LEN(chunk);
503 529
         yajl_parse_chunk((const unsigned char*)cptr, len, wrapper->parser);
504 530
     } else {
505  
-        rb_raise(cParseError, "The on_parse_complete callback isn't setup, parsing useless.");
  531
+        rb_raise(cParseError, "The on_parse_complete and on_parse_nested callbacks aren't setup, parsing useless.");
506 532
     }
507 533
 
508 534
     return Qnil;
@@ -525,6 +551,23 @@ static VALUE rb_yajl_parser_set_complete_cb(VALUE self, VALUE callback) {
525 551
 }
526 552
 
527 553
 /*
  554
+ * Document-method: on_parse_nested=
  555
+ *
  556
+ * call-seq: on_parse_nested = Proc.new { |obj,depth| ... }
  557
+ *
  558
+ * This callback setter allows you to pass a Proc/lambda or any other object that responds to #call. The callback is only 
  559
+ * fired when the +process_nested+ option is set to +true+.
  560
+ *
  561
+ * It will pass two parameters, the ruby object built from the last parsed JSON object and the nested depth of the object
  562
+ */
  563
+static VALUE rb_yajl_parser_set_nested_cb(VALUE self, VALUE callback) {
  564
+    yajl_parser_wrapper * wrapper;
  565
+    GetParser(self, wrapper);
  566
+    wrapper->parse_nested_callback = callback;
  567
+    return Qnil;
  568
+}
  569
+
  570
+/*
528 571
  * Document-class: Yajl::Encoder
529 572
  *
530 573
  * This class contains methods for encoding a Ruby object into JSON, streaming it's output into an IO object.
@@ -866,6 +909,7 @@ void Init_yajl() {
866 909
     rb_define_method(cParser, "parse_chunk", rb_yajl_parser_parse_chunk, 1);
867 910
     rb_define_method(cParser, "<<", rb_yajl_parser_parse_chunk, 1);
868 911
     rb_define_method(cParser, "on_parse_complete=", rb_yajl_parser_set_complete_cb, 1);
  912
+    rb_define_method(cParser, "on_parse_nested=", rb_yajl_parser_set_nested_cb, 1);
869 913
 
870 914
     cEncoder = rb_define_class_under(mYajl, "Encoder", rb_cObject);
871 915
     rb_define_singleton_method(cEncoder, "new", rb_yajl_encoder_new, -1);
@@ -879,6 +923,7 @@ void Init_yajl() {
879 923
     intern_call = rb_intern("call");
880 924
     intern_keys = rb_intern("keys");
881 925
     intern_to_s = rb_intern("to_s");
  926
+    intern_to_i = rb_intern("to_i");
882 927
     intern_to_json = rb_intern("to_json");
883 928
     intern_to_sym = rb_intern("to_sym");
884 929
     intern_has_key = rb_intern("has_key?");
@@ -891,6 +936,8 @@ void Init_yajl() {
891 936
     sym_html_safe = ID2SYM(rb_intern("html_safe"));
892 937
     sym_terminator = ID2SYM(rb_intern("terminator"));
893 938
     sym_symbolize_keys = ID2SYM(rb_intern("symbolize_keys"));
  939
+    sym_process_nested_callback = ID2SYM(rb_intern("process_nested"));
  940
+    sym_nested_callback_depth = ID2SYM(rb_intern("nested_depth"));
894 941
 
895 942
 #ifdef HAVE_RUBY_ENCODING_H
896 943
     utf8Encoding = rb_utf8_encoding();
11  ext/yajl/yajl_ext.h
@@ -49,8 +49,11 @@ static rb_encoding *utf8Encoding;
49 49
 
50 50
 static VALUE cParseError, cEncodeError, mYajl, cParser, cEncoder;
51 51
 static ID intern_io_read, intern_call, intern_keys, intern_to_s,
52  
-            intern_to_json, intern_has_key, intern_to_sym, intern_as_json;
53  
-static ID sym_allow_comments, sym_check_utf8, sym_pretty, sym_indent, sym_terminator, sym_symbolize_keys, sym_html_safe;
  52
+            intern_to_json, intern_has_key, intern_to_sym, intern_as_json, 
  53
+            intern_to_i;
  54
+static ID sym_allow_comments, sym_check_utf8, sym_pretty, sym_indent, 
  55
+            sym_terminator, sym_symbolize_keys, sym_html_safe, 
  56
+            sym_process_nested_callback, sym_nested_callback_depth;
54 57
 
55 58
 #define GetParser(obj, sval) (sval = (yajl_parser_wrapper*)DATA_PTR(obj));
56 59
 #define GetEncoder(obj, sval) (sval = (yajl_encoder_wrapper*)DATA_PTR(obj));
@@ -87,10 +90,13 @@ static yajl_callbacks callbacks = {
87 90
 typedef struct {
88 91
     VALUE builderStack;
89 92
     VALUE parse_complete_callback;
  93
+    VALUE parse_nested_callback;
90 94
     int nestedArrayLevel;
91 95
     int nestedHashLevel;
92 96
     int objectsFound;
93 97
     int symbolizeKeys;
  98
+    int processNestedCallback;
  99
+    int nestedCallbackDepth;
94 100
     yajl_handle parser;
95 101
 } yajl_parser_wrapper;
96 102
 
@@ -106,6 +112,7 @@ static VALUE rb_yajl_parser_init(int argc, VALUE * argv, VALUE self);
106 112
 static VALUE rb_yajl_parser_parse(int argc, VALUE * argv, VALUE self);
107 113
 static VALUE rb_yajl_parser_parse_chunk(VALUE self, VALUE chunk);
108 114
 static VALUE rb_yajl_parser_set_complete_cb(VALUE self, VALUE callback);
  115
+static VALUE rb_yajl_parser_set_nested_cb(VALUE self, VALUE callback);
109 116
 static void yajl_parser_wrapper_free(void * wrapper);
110 117
 static void yajl_parser_wrapper_mark(void * wrapper);
111 118
 
48  spec/parsing/nested_spec.rb
... ...
@@ -0,0 +1,48 @@
  1
+# encoding: UTF-8
  2
+require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
  3
+
  4
+describe "Nested parsing" do
  5
+  before(:each) do
  6
+    @nested_callback = lambda { |hash,depth|
  7
+      # no-op
  8
+    }
  9
+  end
  10
+
  11
+  it "should parse a single nested hash" do
  12
+    @parser = Yajl::Parser.new(:process_nested => true, :nested_depth => 1)
  13
+    @parser.on_parse_nested = @nested_callback
  14
+    
  15
+    @nested_callback.should_receive(:call).with({"abc" => 123},1)
  16
+    @parser << '[{"abc": 123}]'
  17
+  end
  18
+  
  19
+  
  20
+  it "should parse a two-layer array" do
  21
+    @parser = Yajl::Parser.new(:process_nested => true, :nested_depth => 2)
  22
+    @parser.on_parse_nested = @nested_callback
  23
+    
  24
+    @nested_callback.should_receive(:call).with({"abc" => 123},2)
  25
+    @nested_callback.should_receive(:call).with([{"abc" => 123}],1)
  26
+    @parser << '[[{"abc": 123}]]'
  27
+  end
  28
+  
  29
+  
  30
+  it "should parse a single-layer array multiple times" do
  31
+    @parser = Yajl::Parser.new(:process_nested => true, :nested_depth => 1)
  32
+    @parser.on_parse_nested = @nested_callback
  33
+    
  34
+    @nested_callback.should_receive(:call).with({"abc" => 123},1)
  35
+    @nested_callback.should_receive(:call).with({"def" => 456},1)
  36
+    @parser << '[{"abc": 123},{"def": 456}]'
  37
+  end
  38
+  
  39
+  
  40
+  it "should handle a nested depth of 0" do
  41
+    @parser = Yajl::Parser.new(:process_nested => true, :nested_depth => 0)
  42
+    @parser.on_parse_nested = @nested_callback
  43
+    
  44
+    @nested_callback.should_receive(:call).with({"abc" => 123},2)
  45
+    @nested_callback.should_receive(:call).with([{"abc" => 123}],1)
  46
+    @parser << '[[{"abc": 123}]]'
  47
+  end
  48
+end
Commit_comment_tip

Tip: You can add notes to lines in a file. Hover to the left of a line to make a note

Something went wrong with that request. Please try again.