diff --git a/lib/json_sequence/parser.rb b/lib/json_sequence/parser.rb index 94b2cac..b299ed2 100644 --- a/lib/json_sequence/parser.rb +++ b/lib/json_sequence/parser.rb @@ -18,7 +18,7 @@ def parse(chunk, &block) # Takes a String buffer to parse and returns String containing any # text remaining to parse when more data is available. def do_parse(buffer) - records = buffer.split(RS) + records = buffer.split(RS, -1) # -1 stops suppression of trailing null fields records.each_with_index do |record, i| # RFC7464 2.1 Multiple consecutive RS octets do not denote empty @@ -28,27 +28,29 @@ def do_parse(buffer) # Try to decode the record begin value = MultiJson.load(record) - result = handle_parsed(record, value) + result, remaining = handle_parsed(record, value, is_last_record: i == records.size - 1) rescue MultiJson::ParseError => err result, remaining = handle_err(record, err, is_last_record: i == records.size - 1) - return remaining if result.nil? end + return remaining if result.nil? yield result end '' end - def handle_parsed(record, value) + def handle_parsed(record, value, is_last_record:) case value when Numeric, TrueClass, FalseClass, NilClass # Check for truncation, if record was parsed but doesn't end in # whitespace it may be truncated - return JsonSequence::Result::MaybeTruncated.new(value) if record !~ /\s$/ + if record !~ /\s$/ + return is_last_record ? [nil, record] : [JsonSequence::Result::MaybeTruncated.new(value), ''] + end end - JsonSequence::Result::Json.new(value) + [JsonSequence::Result::Json.new(value), ''] end def handle_err(record, err, is_last_record:) diff --git a/spec/json_sequence/parser_spec.rb b/spec/json_sequence/parser_spec.rb index e368ecc..f1d5974 100644 --- a/spec/json_sequence/parser_spec.rb +++ b/spec/json_sequence/parser_spec.rb @@ -10,7 +10,7 @@ end it 'supports incremental parsing' do - expect { |b| parser.parse(%|\x1E{"some": "json"|, &b) }.not_to yield_with_no_args + expect { |b| parser.parse(%|\x1E{"some": "json"|, &b) }.not_to yield_control expect { |b| parser.parse(%|}\x0A|, &b) }.to yield_successive_args( JsonSequence::Result::Json.new('some' => 'json') ) @@ -23,7 +23,7 @@ end it 'parses multiple records at once' do - expect { |b| parser.parse(%|\x1E{"some": "json"|, &b) }.not_to yield_with_no_args + expect { |b| parser.parse(%|\x1E{"some": "json"|, &b) }.not_to yield_control expect { |b| parser.parse(%|}\x0A\x1E{"more": "json"}\x0A|, &b) }.to yield_successive_args( JsonSequence::Result::Json.new('some' => 'json'), JsonSequence::Result::Json.new('more' => 'json') @@ -31,7 +31,7 @@ end it 'yields invalid records and continues parsing' do - expect { |b| parser.parse(%|\x1E{"some": "json"|, &b) }.not_to yield_with_no_args + expect { |b| parser.parse(%|\x1E{"some": "json"|, &b) }.not_to yield_control expect { |b| parser.parse(%|\x0A\x1E{"more": "json"}\x0A|, &b) }.to yield_successive_args( JsonSequence::Result::ParseError, JsonSequence::Result::Json.new('more' => 'json') @@ -53,14 +53,32 @@ end it 'reports possibly trunctated values' do - expect { |b| parser.parse(%|\x1E123|, &b) }.to yield_successive_args( + expect { |b| parser.parse(%|\x1E123|, &b) }.not_to yield_control + expect { |b| parser.parse(%|\x1E|, &b) }.to yield_successive_args( JsonSequence::Result::MaybeTruncated.new(123), ) end + it "doesn't report trunctated values when value is split across chunks" do + expect { |b| parser.parse(%|\x1E123|, &b) }.not_to yield_control + expect { |b| parser.parse(%|456\x0A|, &b) }.to yield_successive_args( + JsonSequence::Result::Json.new(123456), + ) + end + it 'parses formatted json' do expect { |b| parser.parse(%|\x1E{"some": "json",\n"more": 1,\n"even more": []}\x0A|, &b) }.to yield_successive_args( JsonSequence::Result::Json.new('some' => 'json', 'more' => 1, 'even more' => []) ) end + + it 'handles many small chunks' do + expect { |b| parser.parse(%|\x1E{"|, &b) }.not_to yield_control + expect { |b| parser.parse(%|some|, &b) }.not_to yield_control + expect { |b| parser.parse(%|": "|, &b) }.not_to yield_control + expect { |b| parser.parse(%|js|, &b) }.not_to yield_control + expect { |b| parser.parse(%|on"}\x0A|, &b) }.to yield_successive_args( + JsonSequence::Result::Json.new('some' => 'json') + ) + end end