From 7afc9d08f76e1c860f797df71cc5e28079b4b958 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Sat, 13 Jun 2020 14:04:40 -0700 Subject: [PATCH] Change order of callbackk parameters for `terminal` and `production` for PEG parser, which allows seldom-used callback parameters to be left out. --- examples/ebnf-peg-parser/parser.rb | 50 ++++++++++++++---------------- lib/ebnf/peg/parser.rb | 34 ++++++++++---------- spec/peg/data/parser.rb | 46 +++++++++++++-------------- 3 files changed, 63 insertions(+), 67 deletions(-) diff --git a/examples/ebnf-peg-parser/parser.rb b/examples/ebnf-peg-parser/parser.rb index 675cafa..384917a 100644 --- a/examples/ebnf-peg-parser/parser.rb +++ b/examples/ebnf-peg-parser/parser.rb @@ -38,72 +38,72 @@ def inspect # # Terminals are defined with a symbol matching the associated rule name, and an optional (although strongly encouraged) regular expression used to match the head of the input stream. # - # The result of the terminal is the semantic value of that terminal, which if often a string, but may be any instance which reflects the semantic interpretation of that terminal. + # The result of the terminal block is the semantic value of that terminal, which if often a string, but may be any instance which reflects the semantic interpretation of that terminal. # # The `value` parameter is the value matched by the regexp, if defined, or by the sub-terminal rules otherwise. # # The `prod` parameter is the name of the parent rule for which this terminal is matched, which may have a bearing in some circumstances, although not used in this example. + # + # If no block is provided, then the value which would have been passed to the block is used as the result directly. # Match the Left hand side of a rule or terminal # # [11] LHS ::= ('[' SYMBOL+ ']' ' '+)? SYMBOL ' '* '::=' - terminal(:LHS, LHS) do |prod, value| + terminal(:LHS, LHS) do |value, prod| value.to_s.scan(/\[([^\]]+)\]\s*(\w+)\s*::=/).first end # Match `SYMBOL` terminal # # [12] SYMBOL ::= ([a-z] | [A-Z] | [0-9] | '_' | '.')+ - terminal(:SYMBOL, SYMBOL) do |prod, value| + terminal(:SYMBOL, SYMBOL) do |value| value.to_sym end # Match `HEX` terminal # # [13] HEX ::= #x' ([a-f] | [A-F] | [0-9])+ - terminal(:HEX, HEX) do |prod, value| - value - end + terminal(:HEX, HEX) # Terminal for `ENUM` is matched as part of a `primary` rule. # # [14] ENUM ::= ('[' R_CHAR+ | HEX+ ']') - LHS - terminal(:ENUM, ENUM) do |prod, value| + terminal(:ENUM, ENUM) do |value| [:range, value[1..-2]] end # Terminal for `O_ENUM` is matched as part of a `primary` rule. # # [15] O_ENUM ::= '[^' R_CHAR+ | HEX+ ']' - terminal(:O_ENUM, O_ENUM) do |prod, value| + terminal(:O_ENUM, O_ENUM) do |value| [:range, value[1..-2]] end # Terminal for `RANGE` is matched as part of a `primary` rule. # # [16] `RANGE` ::= '[' (R_CHAR '-' R_CHAR) | (HEX - HEX) ']' - terminal(:RANGE, RANGE) do |prod, value| + terminal(:RANGE, RANGE) do |value| [:range, value[1..-2]] end # Terminal for `O_RANGE` is matched as part of a `primary` rule. # # [17] O_RANGE ::= '[^' (R_CHAR '-' R_CHAR) | (HEX - HEX) ']' - terminal(:O_RANGE, O_RANGE) do |prod, value| + terminal(:O_RANGE, O_RANGE) do |value| [:range, value[1..-2]] end # Match double quote string # # [18] STRING1 ::= '"' (CHAR - '"')* '"' - terminal(:STRING1, STRING1) do |prod, value| + terminal(:STRING1, STRING1) do |value| value[1..-2] end # Match single quote string # # [19] STRING2 ::= "'" (CHAR - "'")* "'" - terminal(:STRING2, STRING2) do |prod, value| + terminal(:STRING2, STRING2) do |value| value[1..-2] end @@ -112,9 +112,7 @@ def inspect # Match `POSTFIX` terminal # # [22] POSTFIX ::= [?*+] - terminal(:POSTFIX, POSTFIX) do |prod, value| - value - end + terminal(:POSTFIX, POSTFIX) # The `PASS` productions is not used explicitly @@ -138,7 +136,7 @@ def inspect # `@pass` is ignored here. # # [2] declaration ::= '@terminals' | pass - production(:declaration) do |data, value, callback| + production(:declaration) do |value, data, callback| # value contains a declaration. # Invoke callback callback.call(:terminal) if value == '@terminals' @@ -151,7 +149,7 @@ def inspect # Create rule from expression value and pass to callback # # [3] rule ::= LHS expression - production(:rule) do |data, value, callback| + production(:rule) do |value, data, callback| # value contains an expression. # Invoke callback id, sym = value.first[:LHS] @@ -168,7 +166,7 @@ def inspect # [:alt foo bar] => [:alt foo bar] # # [4] expression ::= alt - production(:expression) do |data, value| + production(:expression) do |value| value.first[:alt] end @@ -183,7 +181,7 @@ def inspect # Note that this also may just pass through from `_alt_1` # # [5] alt ::= seq ('|' seq)* - production(:alt) do |data, value| + production(:alt) do |value| if value.last[:_alt_1].length > 0 [:alt, value.first[:seq]] + value.last[:_alt_1] else @@ -197,7 +195,7 @@ def inspect # The `value` parameter, is of the form `[{seq: ["v"]}]`. # # [5] _alt_1 ::= ('|' seq)* - production(:_alt_1) do |data, value| + production(:_alt_1) do |value| value.map {|a1| a1.last[:seq]}.compact # Get rid of '|' end @@ -212,7 +210,7 @@ def inspect # Note that this also may just pass through from `_seq_1` # # [6] seq ::= diff+ - production(:seq) do |data, value| + production(:seq) do |value| value.length == 1 ? value.first : ([:seq] + value) end @@ -221,7 +219,7 @@ def inspect # The `value` parameter, is of the form `[{postfix: "v"}, {_diff_1: "v"}]`. # # [7] diff ::= postfix ('-' postfix)? - production(:diff) do |data, value| + production(:diff) do |value| if value.last[:_diff_1] [:diff, value.first[:postfix], value.last[:_diff_1]] else @@ -229,7 +227,7 @@ def inspect end end - production(:_diff_1) do |data, value| + production(:_diff_1) do |value| value.last[:postfix] if value end @@ -244,7 +242,7 @@ def inspect # [:primary, '?'] => [:opt, :primary] # # [8] postfix ::= primary POSTFIX? - production(:postfix) do |data, value| + production(:postfix) do |value| # Push result onto input stack, as the `diff` production can have some number of `postfix` values that are applied recursively case value.last[:_postfix_1] when "*" then [:star, value.first[:primary]] @@ -270,14 +268,14 @@ def inspect # | STRING1 # | STRING2 # | '(' expression ')' - production(:primary) do |data, value| + production(:primary) do |value| Array(value).length > 2 ? value[1][:expression] : value end # Production for end of pass non-terminal. # # [10] pass ::= '@pass' expression - production(:pass) do |data, value, callback| + production(:pass) do |value, data, callback| # Invoke callback callback.call(:pass, value.last[:expression]) end diff --git a/lib/ebnf/peg/parser.rb b/lib/ebnf/peg/parser.rb index 125c8cc..9cdb5dc 100644 --- a/lib/ebnf/peg/parser.rb +++ b/lib/ebnf/peg/parser.rb @@ -24,6 +24,8 @@ def terminal_regexps; (@terminal_regexps ||= {}); end # to a previous production. Block is called in an evaluation block from # the enclosing parser. # + # If no block is provided, then the value which would have been passed to the block is used as the result directly. + # # @param [Symbol] term # The terminal name. # @param [Regexp] regexp (nil) @@ -36,17 +38,17 @@ def terminal_regexps; (@terminal_regexps ||= {}); end # their canonical value # @option options [Boolean] :unescape # Cause strings and codepoints to be unescaped. - # @yield [term, value] - # @yieldparam [Symbol] term - # A symbol indicating the production which referenced this terminal + # @yield [value, prod] # @yieldparam [String] value # The scanned terminal value. + # @yieldparam [Symbol] prod + # A symbol indicating the production which referenced this terminal # @yieldparam [Proc] block # Block passed to initialization for yielding to calling parser. # Should conform to the yield specs for #initialize def terminal(term, regexp = nil, **options, &block) terminal_regexps[term] = regexp if regexp - terminal_handlers[term] = block + terminal_handlers[term] = block if block_given? end ## @@ -79,13 +81,13 @@ def start_production(term, &block) # # @param [Symbol] term # Term which is a key in the branch table - # @yield [data, result, block] + # @yield [result, data, block] + # @yieldparam [Object] result + # The result from sucessfully parsing the production. # @yieldparam [Hash] data # A Hash defined for the current production, during :start # may be initialized with data to pass to further productions, # during :finish, it contains data placed by earlier productions - # @yieldparam [Object] result - # The result from sucessfully parsing the production. # @yieldparam [Proc] block # Block passed to initialization for yielding to calling parser. # Should conform to the yield specs for #initialize @@ -336,7 +338,7 @@ def onFinish(result, scanner: nil) data = @prod_data.pop result = begin self.class.eval_with_binding(self) { - handler.call(data, result, @parse_callback) + handler.call(result, data, @parse_callback) } rescue ArgumentError, Error => e error("finish", "#{e.class}: #{e.message}", production: prod) @@ -351,23 +353,23 @@ def onFinish(result, scanner: nil) # A terminal with a defined handler # # @param [Symbol] prod from the symbol of the associated rule - # @param [String] token the scanned string + # @param [String] value the scanned string # @return [String, Object] either the result from the handler, or the token - def onTerminal(prod, token, scanner: nil) + def onTerminal(prod, value, scanner: nil) parentProd = @productions.last handler = self.class.terminal_handlers[prod] - if handler && token != :unmatched - token = begin + if handler && value != :unmatched + value = begin self.class.eval_with_binding(self) { - handler.call(parentProd, token, @parse_callback) + handler.call(value, parentProd, @parse_callback) } rescue ArgumentError, Error => e - error("terminal", "#{e.class}: #{e.message}", token: token, production: prod) + error("terminal", "#{e.class}: #{e.message}", value: value, production: prod) @recovering = false end end - progress("#{prod}(:terminal)", "", depth: (depth + 2)) {"#{prod}: #{token.inspect}, lineno: #{scanner ? scanner.lineno : '?'}, pos: #{scanner ? scanner.pos : '?'}"} - token + progress("#{prod}(:terminal)", "", depth: (depth + 2)) {"#{prod}: #{value.inspect}, lineno: #{scanner ? scanner.lineno : '?'}, pos: #{scanner ? scanner.pos : '?'}"} + value end ## diff --git a/spec/peg/data/parser.rb b/spec/peg/data/parser.rb index 7d433e9..0e6a6d6 100644 --- a/spec/peg/data/parser.rb +++ b/spec/peg/data/parser.rb @@ -15,54 +15,50 @@ class EBNFPegParser # @return [Array] attr_reader :ast - terminal(:LHS, LHS) do |prod, value| + terminal(:LHS, LHS) do |value| # [id symbol] value.to_s.scan(/\[([^\]]+)\]\s*(\w+)\s*::=/).first end - terminal(:SYMBOL, SYMBOL) do |prod, value| + terminal(:SYMBOL, SYMBOL) do |value| value.to_sym end - terminal(:HEX, HEX) do |prod, value| - value - end + terminal(:HEX, HEX) - terminal(:ENUM, ENUM, unescape: true) do |prod, value| + terminal(:ENUM, ENUM, unescape: true) do |value| [:range, value[1..-2]] end - terminal(:O_ENUM, O_ENUM, unescape: true) do |prod, value| + terminal(:O_ENUM, O_ENUM, unescape: true) do |value| [:range, value[1..-2]] end - terminal(:RANGE, RANGE, unescape: true) do |prod, value| + terminal(:RANGE, RANGE, unescape: true) do |value| [:range, value[1..-2]] end - terminal(:O_RANGE, O_RANGE, unescape: true) do |prod, value| + terminal(:O_RANGE, O_RANGE, unescape: true) do |value| [:range, value[1..-2]] end - terminal(:STRING1, STRING1, unescape: true) do |prod, value| + terminal(:STRING1, STRING1, unescape: true) do |value| value[1..-2] end - terminal(:STRING2, STRING2, unescape: true) do |prod, value| + terminal(:STRING2, STRING2, unescape: true) do |value| value[1..-2] end - terminal(:POSTFIX, POSTFIX) do |prod, value| - value - end + terminal(:POSTFIX, POSTFIX) - production(:declaration) do |data, value, callback| + production(:declaration) do |value, data, callback| # current contains a declaration. # Invoke callback callback.call(:terminal) if value == '@terminals' end - production(:rule) do |data, value, callback| + production(:rule) do |value, data, callback| # current contains an expression. # Invoke callback id, sym = value.first[:LHS] @@ -70,11 +66,11 @@ class EBNFPegParser callback.call(:rule, EBNF::Rule.new(sym.to_sym, id, expression)) end - production(:expression) do |data, value, callback| + production(:expression) do |value| value.first[:alt] end - production(:alt) do |data, value, callback| + production(:alt) do |value| if value.last[:_alt_1].length > 0 [:alt, value.first[:seq]] + value.last[:_alt_1] else @@ -82,15 +78,15 @@ class EBNFPegParser end end - production(:_alt_1) do |data, value, callback| + production(:_alt_1) do |value| value.map {|a1| a1.last[:seq]}.compact # Get rid of '|' end - production(:seq) do |data, value| + production(:seq) do |value| value.length == 1 ? value.first : ([:seq] + value) end - production(:diff) do |data, value, callback| + production(:diff) do |value| if value.last[:_diff_1] [:diff, value.first[:postfix], value.last[:_diff_1]] else @@ -98,11 +94,11 @@ class EBNFPegParser end end - production(:_diff_1) do |data, value, callback| + production(:_diff_1) do |value| value.last[:postfix] if value end - production(:postfix) do |data, value, callback| + production(:postfix) do |value| # Push result onto input stack, as the `diff` production can have some number of `postfix` values that are applied recursively case value.last[:_postfix_1] when "*" then [:star, value.first[:primary]] @@ -112,11 +108,11 @@ class EBNFPegParser end end - production(:primary) do |data, value, callback| + production(:primary) do |value| Array(value).length > 2 ? value[1][:expression] : value end - production(:pass) do |data, value, callback| + production(:pass) do |value, data, callback| # Invoke callback callback.call(:pass, value.last[:expression]) end