From dc279b4859273caa6d6f8a8937fc4b714ca14328 Mon Sep 17 00:00:00 2001 From: John Mettraux Date: Sun, 15 Apr 2012 17:49:02 +0900 Subject: [PATCH] get rid of Context Since the source is already getting passed around, let's put the memoization cache into it. Planning to store more in the source. --- example/ignore.rb | 4 +-- experiments/heredoc.rb | 24 +++++++-------- lib/parslet/atoms.rb | 1 - lib/parslet/atoms/alternative.rb | 4 +-- lib/parslet/atoms/base.rb | 15 +++------ lib/parslet/atoms/context.rb | 48 ----------------------------- lib/parslet/atoms/entity.rb | 4 +-- lib/parslet/atoms/lookahead.rb | 4 +-- lib/parslet/atoms/named.rb | 4 +-- lib/parslet/atoms/re.rb | 2 +- lib/parslet/atoms/repetition.rb | 4 +-- lib/parslet/atoms/sequence.rb | 4 +-- lib/parslet/atoms/str.rb | 2 +- lib/parslet/parser.rb | 4 +-- lib/parslet/source.rb | 26 ++++++++++++++++ spec/parslet/atoms/base_spec.rb | 53 ++++++++++++++++---------------- spec/parslet/atoms_spec.rb | 15 +++++---- 17 files changed, 94 insertions(+), 124 deletions(-) delete mode 100644 lib/parslet/atoms/context.rb diff --git a/example/ignore.rb b/example/ignore.rb index d4a1a28..e03012f 100644 --- a/example/ignore.rb +++ b/example/ignore.rb @@ -10,8 +10,8 @@ def initialize(parslet) def to_s_inner(prec) @parslet.to_s(prec) end - def try(source, context) - result = @parslet.try(source, context) + def try(source) + result = @parslet.try(source) return success(nil) unless result.error? return result diff --git a/experiments/heredoc.rb b/experiments/heredoc.rb index 1dfa2aa..78bd551 100644 --- a/experiments/heredoc.rb +++ b/experiments/heredoc.rb @@ -29,18 +29,18 @@ def initialize(parslet, name) @parslet, @name = parslet, name end - def try(source, context) # :nodoc: - parslet.try(source, context).tap { |result| - set_binding(context, name, + def try(source) # :nodoc: + parslet.try(source).tap { |result| + set_binding(source, name, flatten(result.result)) } end - def set_binding(context, name, value) - b = context.instance_variable_get('@bindings') || {} + def set_binding(source, name, value) + b = source.instance_variable_get('@bindings') || {} b.store name, value p b - context.instance_variable_set('@bindings', b) + source.instance_variable_set('@bindings', b) end def to_s_inner(prec) # :nodoc: @@ -55,13 +55,13 @@ def initialize(parslet, name) @parslet, @name = parslet, name end - def try(source, context) # :nodoc: - parslet.try(source, context).tap { |result| + def try(source) # :nodoc: + parslet.try(source).tap { |result| unless result.error? value = flatten(result.result) - p [value, bound_value(context, name), value == bound_value(context, name)] - unless value == bound_value(context, name) + p [value, bound_value(source, name), value == bound_value(source, name)] + unless value == bound_value(source, name) p :error_return return error(source, "Bound value doesn't match.") end @@ -69,8 +69,8 @@ def try(source, context) # :nodoc: } end - def bound_value(context, name) - b = context.instance_variable_get('@bindings') || {} + def bound_value(source, name) + b = source.instance_variable_get('@bindings') || {} b[name] end diff --git a/lib/parslet/atoms.rb b/lib/parslet/atoms.rb index ea731f7..a57e650 100644 --- a/lib/parslet/atoms.rb +++ b/lib/parslet/atoms.rb @@ -16,7 +16,6 @@ module Precedence # :nodoc: end require 'parslet/atoms/can_flatten' - require 'parslet/atoms/context' require 'parslet/atoms/dsl' require 'parslet/atoms/base' require 'parslet/atoms/named' diff --git a/lib/parslet/atoms/alternative.rb b/lib/parslet/atoms/alternative.rb index 1e3a6eb..67a4f45 100644 --- a/lib/parslet/atoms/alternative.rb +++ b/lib/parslet/atoms/alternative.rb @@ -30,9 +30,9 @@ def |(parslet) # :nodoc: self.class.new(*@alternatives + [parslet]) end - def try(source, context) # :nodoc: + def try(source) # :nodoc: alternatives.each { |a| - value = a.apply(source, context) + value = a.apply(source) return value unless value.error? } # If we reach this point, all alternatives have failed. diff --git a/lib/parslet/atoms/base.rb b/lib/parslet/atoms/base.rb index 3e38817..fdfc79d 100644 --- a/lib/parslet/atoms/base.rb +++ b/lib/parslet/atoms/base.rb @@ -31,10 +31,8 @@ def parse(io, prefix_parse=false) io : Parslet::Source.new(io) - context = Parslet::Atoms::Context.new - result = nil - value = apply(source, context) + value = apply(source) # If we didn't succeed the parse, raise an exception for the user. # Stack trace will be off, but the error tree should explain the reason @@ -72,12 +70,10 @@ def parse(io, prefix_parse=false) # Calls the #try method of this parslet. In case of a parse error, apply # leaves the source in the state it was before the attempt. #+++ - def apply(source, context) # :nodoc: + def apply(source) # :nodoc: old_pos = source.pos - result = context.cache(self, source) { - try(source, context) - } + result = source.try(self) # This has just succeeded, so last_cause must be empty unless result.error? @@ -93,12 +89,11 @@ def apply(source, context) # :nodoc: # Override this in your Atoms::Base subclasses to implement parsing # behaviour. # - def try(source, context) + def try(source) raise NotImplementedError, \ - "Atoms::Base doesn't have behaviour, please implement #try(source, context)." + "Atoms::Base doesn't have behaviour, please implement #try(source)." end - # Debug printing - in Treetop syntax. # def self.precedence(prec) # :nodoc: diff --git a/lib/parslet/atoms/context.rb b/lib/parslet/atoms/context.rb deleted file mode 100644 index da1971b..0000000 --- a/lib/parslet/atoms/context.rb +++ /dev/null @@ -1,48 +0,0 @@ -module Parslet::Atoms - # Helper class that implements a transient cache that maps position and - # parslet object to results. This is used for memoization in the packrat - # style. - # - class Context - def initialize - @cache = Hash.new { |h, k| h[k] = {} } - end - - # Caches a parse answer for obj at source.pos. Applying the same parslet - # at one position of input always yields the same result, unless the input - # has changed. - # - # We need the entire source here so we can ask for how many characters - # were consumed by a successful parse. Imitation of such a parse must - # advance the input pos by the same amount of bytes. - # - def cache(obj, source, &block) - beg = source.pos - - # Not in cache yet? Return early. - unless entry = lookup(obj, beg) - result = yield - - set obj, beg, [result, source.pos-beg] - return result - end - - # the condition in unless has returned true, so entry is not nil. - result, advance = entry - - # The data we're skipping here has been read before. (since it is in - # the cache) PLUS the actual contents are not interesting anymore since - # we know obj matches at beg. So skip reading. - source.pos = beg + advance - return result - end - - private - def lookup(obj, pos) - @cache[pos][obj] - end - def set(obj, pos, val) - @cache[pos][obj] = val - end - end -end diff --git a/lib/parslet/atoms/entity.rb b/lib/parslet/atoms/entity.rb index f5606af..9931bf3 100644 --- a/lib/parslet/atoms/entity.rb +++ b/lib/parslet/atoms/entity.rb @@ -17,8 +17,8 @@ def initialize(name, &block) # :nodoc: @block = block end - def try(source, context) # :nodoc: - parslet.apply(source, context) + def try(source) # :nodoc: + parslet.apply(source) end def parslet diff --git a/lib/parslet/atoms/lookahead.rb b/lib/parslet/atoms/lookahead.rb index 1f6cfdb..3bcaf3b 100644 --- a/lib/parslet/atoms/lookahead.rb +++ b/lib/parslet/atoms/lookahead.rb @@ -21,10 +21,10 @@ def initialize(bound_parslet, positive=true) # :nodoc: } end - def try(source, context) # :nodoc: + def try(source) # :nodoc: pos = source.pos - value = bound_parslet.apply(source, context) + value = bound_parslet.apply(source) return success(nil) if positive ^ value.error? return error(source, @error_msgs[:positive], pos) if positive diff --git a/lib/parslet/atoms/named.rb b/lib/parslet/atoms/named.rb index a98d3bf..40efb2d 100644 --- a/lib/parslet/atoms/named.rb +++ b/lib/parslet/atoms/named.rb @@ -13,8 +13,8 @@ def initialize(parslet, name) # :nodoc: @parslet, @name = parslet, name end - def apply(source, context) # :nodoc: - value = parslet.apply(source, context) + def apply(source) # :nodoc: + value = parslet.apply(source) return value if value.error? success( diff --git a/lib/parslet/atoms/re.rb b/lib/parslet/atoms/re.rb index ab78408..a869040 100644 --- a/lib/parslet/atoms/re.rb +++ b/lib/parslet/atoms/re.rb @@ -20,7 +20,7 @@ def initialize(match) # :nodoc: } end - def try(source, context) # :nodoc: + def try(source) # :nodoc: error_pos = source.pos s = source.read(1) diff --git a/lib/parslet/atoms/repetition.rb b/lib/parslet/atoms/repetition.rb index 3c1fbe4..25a3c14 100644 --- a/lib/parslet/atoms/repetition.rb +++ b/lib/parslet/atoms/repetition.rb @@ -19,12 +19,12 @@ def initialize(parslet, min, max, tag=:repetition) } end - def try(source, context) # :nodoc: + def try(source) # :nodoc: occ = 0 result = [@tag] # initialize the result array with the tag (for flattening) start_pos = source.pos loop do - value = parslet.apply(source, context) + value = parslet.apply(source) break if value.error? occ += 1 diff --git a/lib/parslet/atoms/sequence.rb b/lib/parslet/atoms/sequence.rb index f6a2d8b..27c172f 100644 --- a/lib/parslet/atoms/sequence.rb +++ b/lib/parslet/atoms/sequence.rb @@ -19,12 +19,12 @@ def >>(parslet) # :nodoc: self.class.new(* @parslets+[parslet]) end - def try(source, context) # :nodoc: + def try(source) # :nodoc: success([:sequence]+parslets.map { |p| # Save each parslet as potentially offending (raising an error). @offending_parslet = p - value = p.apply(source, context) + value = p.apply(source) return error(source, @error_msgs[:failed]) if value.error? diff --git a/lib/parslet/atoms/str.rb b/lib/parslet/atoms/str.rb index 5d2fdd7..9ea86b5 100644 --- a/lib/parslet/atoms/str.rb +++ b/lib/parslet/atoms/str.rb @@ -16,7 +16,7 @@ def initialize(str) } end - def try(source, context) # :nodoc: + def try(source) # :nodoc: # NOTE: Even though it doesn't look that way, this is the hotspot, the # contents of parslets inner loop. Changes here affect parslets speed # enormously. diff --git a/lib/parslet/parser.rb b/lib/parslet/parser.rb index f5ee9f1..8bba716 100644 --- a/lib/parslet/parser.rb +++ b/lib/parslet/parser.rb @@ -57,8 +57,8 @@ def root(name) end end - def try(source, context) # :nodoc: - root.try(source, context) + def try(source) # :nodoc: + root.try(source) end def error_tree # :nodoc: diff --git a/lib/parslet/source.rb b/lib/parslet/source.rb index 6f7d06a..1ee2be0 100644 --- a/lib/parslet/source.rb +++ b/lib/parslet/source.rb @@ -16,6 +16,32 @@ def initialize(io) @io = io @line_cache = LineCache.new + + @memo_cache = Hash.new { |h, k| h[k] = {} } + end + + # Wrapping atom.try(source) in a memoizing embrace. + # + def try(atom) + beg = self.pos + + # Not in cache yet? Return early. + unless entry = @memo_cache[beg][atom] + result = atom.try(self) + + @memo_cache[beg][atom] = [result, self.pos - beg] + return result + end + + # the condition in unless has returned true, so entry is not nil. + result, advance = entry + + # The data we're skipping here has been read before. (since it is in + # the cache) PLUS the actual contents are not interesting anymore since + # we know atom matches at beg. So skip reading. + self.pos = beg + advance + + result end # Reads n bytes from the input and returns a Range instance. If the n diff --git a/spec/parslet/atoms/base_spec.rb b/spec/parslet/atoms/base_spec.rb index 9027c57..439d180 100644 --- a/spec/parslet/atoms/base_spec.rb +++ b/spec/parslet/atoms/base_spec.rb @@ -2,20 +2,19 @@ describe Parslet::Atoms::Base do let(:parslet) { Parslet::Atoms::Base.new } - let(:context) { Parslet::Atoms::Context.new } describe "<- #try(io)" do it "should raise NotImplementedError" do lambda { - parslet.try(flexmock(:io), context) + parslet.try(flexmock(:io)) }.should raise_error(NotImplementedError) - end + end end describe "<- #error_tree" do it "should always return a tree" do parslet.cause.should be_nil parslet.error_tree.should_not be_nil - end + end end describe "<- #flatten_sequence" do [ @@ -25,16 +24,16 @@ ['a', 'b'], 'ab', # S S [['a'], ['b']], ['a', 'b'], # A A [{:a=>'a'}, {:b=>'b'}], {:a=>'a',:b=>'b'}, # H H - + [{:a=>'a'}, ['a']], [{:a=>'a'}, 'a'], # H A [{:a=>'a'}, 's'], {:a=>'a'}, # H S [['a'], {:a=>'a'}], ['a', {:a=>'a'}], # A H (symmetric to H A) - [['a'], 'b'], ['a'], # A S + [['a'], 'b'], ['a'], # A S ['a', {:b=>'b'}], {:b=>'b'}, # S H (symmetric to H S) ['a', ['b']], ['b'], # S A (symmetric to A S) - + [nil, ['a']], ['a'], # handling of lhs nil [nil, {:a=>'a'}], {:a=>'a'}, [['a'], nil], ['a'], # handling of rhs nil @@ -51,30 +50,30 @@ def unnamed(obj) parslet.flatten_repetition(obj, false) end - + it "should give subtrees precedence" do unnamed([[{:a=>"a"}, {:m=>"m"}], {:a=>"a"}]).should == [{:a=>"a"}] - end + end end describe '#parse(source)' do context "when given something that looks like a source" do - let(:source) { flexmock("source lookalike", - :line_and_column => [1,2], - :pos => 1, + let(:source) { flexmock("source lookalike", + :line_and_column => [1, 2], + :pos => 1, :eof? => true) } - + it "should not rewrap in a source" do flexmock(Parslet::Source). should_receive(:new => :source_created).never - + begin - parslet.parse(source) - rescue NotImplementedError + parslet.parse(source) + rescue NoMethodError end - end + end end end - + context "when the parse fails, the exception" do it "should contain a string" do begin @@ -82,7 +81,7 @@ def unnamed(obj) rescue Parslet::ParseFailed => ex ex.message.should be_kind_of(String) end - end + end end context "when not all input is consumed" do let(:parslet) { Parslet.str('foo') } @@ -92,28 +91,28 @@ def unnamed(obj) rescue Parslet::ParseFailed => ex ex.message.should == "Don't know what to do with bar at line 1 char 4." end - end + end end context "when a match succeeds" do context "when there is an error from a previous run" do before(:each) do catch(:error) { - parslet.send(:error, Parslet::Source.new('test'), 'cause') + parslet.send(:error, Parslet::Source.new('test'), 'cause') } parslet.cause.should == "cause at line 1 char 1." end - + it "should reset the #cause to nil" do success = flexmock(:success, :error? => false) flexmock(parslet). should_receive(:try => success) - - parslet.apply(Parslet::Source.new(''), context) - + + parslet.apply(Parslet::Source.new('')) + parslet.cause?.should == false parslet.cause.should be_nil - end + end end end -end \ No newline at end of file +end diff --git a/spec/parslet/atoms_spec.rb b/spec/parslet/atoms_spec.rb index ef13b4a..8bcd250 100644 --- a/spec/parslet/atoms_spec.rb +++ b/spec/parslet/atoms_spec.rb @@ -12,7 +12,6 @@ def not_parse extend Parslet def src(str); Parslet::Source.new str; end - let(:context) { Parslet::Atoms::Context.new } describe "match('[abc]')" do attr_reader :parslet @@ -92,7 +91,7 @@ def src(str); Parslet::Source.new str; end end it "should leave pos untouched if there is no foo" do source = src('bar') - parslet.apply(source, context) + parslet.apply(source) source.pos.should == 0 end it "should inspect as 'foo'?" do @@ -178,11 +177,11 @@ def src(str); Parslet::Source.new str; end end context "when fed 'foo'" do it "should parse" do - parslet.apply(src('foo'), context).should_not be_error + parslet.apply(src('foo')).should_not be_error end it "should not change input position" do source = src('foo') - parslet.apply(source, context) + parslet.apply(source) source.pos.should == 0 end end @@ -193,7 +192,7 @@ def src(str); Parslet::Source.new str; end end describe "<- #parse" do it "should return nil" do - parslet.apply(src('foo'), context).result.should == nil + parslet.apply(src('foo')).result.should == nil end end end @@ -208,11 +207,11 @@ def src(str); Parslet::Source.new str; end end context "when fed 'bar'" do it "should parse" do - parslet.apply(src('bar'), context).should_not be_error + parslet.apply(src('bar')).should_not be_error end it "should not change input position" do source = src('bar') - parslet.apply(source, context) + parslet.apply(source) source.pos.should == 0 end end @@ -250,7 +249,7 @@ def src(str); Parslet::Source.new str; end end it "should consume one char" do source = src('foo') - parslet.apply(source, context) + parslet.apply(source) source.pos.should == 1 end end