Skip to content

Commit

Permalink
Change order of callbackk parameters for terminal and production
Browse files Browse the repository at this point in the history
…for PEG parser, which allows seldom-used callback parameters to be left out.
  • Loading branch information
gkellogg committed Jun 13, 2020
1 parent c4a9ca4 commit 7afc9d0
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 67 deletions.
50 changes: 24 additions & 26 deletions examples/ebnf-peg-parser/parser.rb
Expand Up @@ -38,72 +38,72 @@ def inspect
#
# Terminals are defined with a symbol matching the associated rule name, and an optional (although strongly encouraged) regular expression used to match the head of the input stream.
#
# The result of the terminal is the semantic value of that terminal, which if often a string, but may be any instance which reflects the semantic interpretation of that terminal.
# The result of the terminal block is the semantic value of that terminal, which if often a string, but may be any instance which reflects the semantic interpretation of that terminal.
#
# The `value` parameter is the value matched by the regexp, if defined, or by the sub-terminal rules otherwise.
#
# The `prod` parameter is the name of the parent rule for which this terminal is matched, which may have a bearing in some circumstances, although not used in this example.
#
# If no block is provided, then the value which would have been passed to the block is used as the result directly.

# Match the Left hand side of a rule or terminal
#
# [11] LHS ::= ('[' SYMBOL+ ']' ' '+)? SYMBOL ' '* '::='
terminal(:LHS, LHS) do |prod, value|
terminal(:LHS, LHS) do |value, prod|
value.to_s.scan(/\[([^\]]+)\]\s*(\w+)\s*::=/).first
end

# Match `SYMBOL` terminal
#
# [12] SYMBOL ::= ([a-z] | [A-Z] | [0-9] | '_' | '.')+
terminal(:SYMBOL, SYMBOL) do |prod, value|
terminal(:SYMBOL, SYMBOL) do |value|
value.to_sym
end

# Match `HEX` terminal
#
# [13] HEX ::= #x' ([a-f] | [A-F] | [0-9])+
terminal(:HEX, HEX) do |prod, value|
value
end
terminal(:HEX, HEX)

# Terminal for `ENUM` is matched as part of a `primary` rule.
#
# [14] ENUM ::= ('[' R_CHAR+ | HEX+ ']') - LHS
terminal(:ENUM, ENUM) do |prod, value|
terminal(:ENUM, ENUM) do |value|
[:range, value[1..-2]]
end

# Terminal for `O_ENUM` is matched as part of a `primary` rule.
#
# [15] O_ENUM ::= '[^' R_CHAR+ | HEX+ ']'
terminal(:O_ENUM, O_ENUM) do |prod, value|
terminal(:O_ENUM, O_ENUM) do |value|
[:range, value[1..-2]]
end

# Terminal for `RANGE` is matched as part of a `primary` rule.
#
# [16] `RANGE` ::= '[' (R_CHAR '-' R_CHAR) | (HEX - HEX) ']'
terminal(:RANGE, RANGE) do |prod, value|
terminal(:RANGE, RANGE) do |value|
[:range, value[1..-2]]
end

# Terminal for `O_RANGE` is matched as part of a `primary` rule.
#
# [17] O_RANGE ::= '[^' (R_CHAR '-' R_CHAR) | (HEX - HEX) ']'
terminal(:O_RANGE, O_RANGE) do |prod, value|
terminal(:O_RANGE, O_RANGE) do |value|
[:range, value[1..-2]]
end

# Match double quote string
#
# [18] STRING1 ::= '"' (CHAR - '"')* '"'
terminal(:STRING1, STRING1) do |prod, value|
terminal(:STRING1, STRING1) do |value|
value[1..-2]
end

# Match single quote string
#
# [19] STRING2 ::= "'" (CHAR - "'")* "'"
terminal(:STRING2, STRING2) do |prod, value|
terminal(:STRING2, STRING2) do |value|
value[1..-2]
end

Expand All @@ -112,9 +112,7 @@ def inspect
# Match `POSTFIX` terminal
#
# [22] POSTFIX ::= [?*+]
terminal(:POSTFIX, POSTFIX) do |prod, value|
value
end
terminal(:POSTFIX, POSTFIX)

# The `PASS` productions is not used explicitly

Expand All @@ -138,7 +136,7 @@ def inspect
# `@pass` is ignored here.
#
# [2] declaration ::= '@terminals' | pass
production(:declaration) do |data, value, callback|
production(:declaration) do |value, data, callback|
# value contains a declaration.
# Invoke callback
callback.call(:terminal) if value == '@terminals'
Expand All @@ -151,7 +149,7 @@ def inspect
# Create rule from expression value and pass to callback
#
# [3] rule ::= LHS expression
production(:rule) do |data, value, callback|
production(:rule) do |value, data, callback|
# value contains an expression.
# Invoke callback
id, sym = value.first[:LHS]
Expand All @@ -168,7 +166,7 @@ def inspect
# [:alt foo bar] => [:alt foo bar]
#
# [4] expression ::= alt
production(:expression) do |data, value|
production(:expression) do |value|
value.first[:alt]
end

Expand All @@ -183,7 +181,7 @@ def inspect
# Note that this also may just pass through from `_alt_1`
#
# [5] alt ::= seq ('|' seq)*
production(:alt) do |data, value|
production(:alt) do |value|
if value.last[:_alt_1].length > 0
[:alt, value.first[:seq]] + value.last[:_alt_1]
else
Expand All @@ -197,7 +195,7 @@ def inspect
# The `value` parameter, is of the form `[{seq: ["v"]}]`.
#
# [5] _alt_1 ::= ('|' seq)*
production(:_alt_1) do |data, value|
production(:_alt_1) do |value|
value.map {|a1| a1.last[:seq]}.compact # Get rid of '|'
end

Expand All @@ -212,7 +210,7 @@ def inspect
# Note that this also may just pass through from `_seq_1`
#
# [6] seq ::= diff+
production(:seq) do |data, value|
production(:seq) do |value|
value.length == 1 ? value.first : ([:seq] + value)
end

Expand All @@ -221,15 +219,15 @@ def inspect
# The `value` parameter, is of the form `[{postfix: "v"}, {_diff_1: "v"}]`.
#
# [7] diff ::= postfix ('-' postfix)?
production(:diff) do |data, value|
production(:diff) do |value|
if value.last[:_diff_1]
[:diff, value.first[:postfix], value.last[:_diff_1]]
else
value.first[:postfix]
end
end

production(:_diff_1) do |data, value|
production(:_diff_1) do |value|
value.last[:postfix] if value
end

Expand All @@ -244,7 +242,7 @@ def inspect
# [:primary, '?'] => [:opt, :primary]
#
# [8] postfix ::= primary POSTFIX?
production(:postfix) do |data, value|
production(:postfix) do |value|
# Push result onto input stack, as the `diff` production can have some number of `postfix` values that are applied recursively
case value.last[:_postfix_1]
when "*" then [:star, value.first[:primary]]
Expand All @@ -270,14 +268,14 @@ def inspect
# | STRING1
# | STRING2
# | '(' expression ')'
production(:primary) do |data, value|
production(:primary) do |value|
Array(value).length > 2 ? value[1][:expression] : value
end

# Production for end of pass non-terminal.
#
# [10] pass ::= '@pass' expression
production(:pass) do |data, value, callback|
production(:pass) do |value, data, callback|
# Invoke callback
callback.call(:pass, value.last[:expression])
end
Expand Down
34 changes: 18 additions & 16 deletions lib/ebnf/peg/parser.rb
Expand Up @@ -24,6 +24,8 @@ def terminal_regexps; (@terminal_regexps ||= {}); end
# to a previous production. Block is called in an evaluation block from
# the enclosing parser.
#
# If no block is provided, then the value which would have been passed to the block is used as the result directly.
#
# @param [Symbol] term
# The terminal name.
# @param [Regexp] regexp (nil)
Expand All @@ -36,17 +38,17 @@ def terminal_regexps; (@terminal_regexps ||= {}); end
# their canonical value
# @option options [Boolean] :unescape
# Cause strings and codepoints to be unescaped.
# @yield [term, value]
# @yieldparam [Symbol] term
# A symbol indicating the production which referenced this terminal
# @yield [value, prod]
# @yieldparam [String] value
# The scanned terminal value.
# @yieldparam [Symbol] prod
# A symbol indicating the production which referenced this terminal
# @yieldparam [Proc] block
# Block passed to initialization for yielding to calling parser.
# Should conform to the yield specs for #initialize
def terminal(term, regexp = nil, **options, &block)
terminal_regexps[term] = regexp if regexp
terminal_handlers[term] = block
terminal_handlers[term] = block if block_given?
end

##
Expand Down Expand Up @@ -79,13 +81,13 @@ def start_production(term, &block)
#
# @param [Symbol] term
# Term which is a key in the branch table
# @yield [data, result, block]
# @yield [result, data, block]
# @yieldparam [Object] result
# The result from sucessfully parsing the production.
# @yieldparam [Hash] data
# A Hash defined for the current production, during :start
# may be initialized with data to pass to further productions,
# during :finish, it contains data placed by earlier productions
# @yieldparam [Object] result
# The result from sucessfully parsing the production.
# @yieldparam [Proc] block
# Block passed to initialization for yielding to calling parser.
# Should conform to the yield specs for #initialize
Expand Down Expand Up @@ -336,7 +338,7 @@ def onFinish(result, scanner: nil)
data = @prod_data.pop
result = begin
self.class.eval_with_binding(self) {
handler.call(data, result, @parse_callback)
handler.call(result, data, @parse_callback)
}
rescue ArgumentError, Error => e
error("finish", "#{e.class}: #{e.message}", production: prod)
Expand All @@ -351,23 +353,23 @@ def onFinish(result, scanner: nil)
# A terminal with a defined handler
#
# @param [Symbol] prod from the symbol of the associated rule
# @param [String] token the scanned string
# @param [String] value the scanned string
# @return [String, Object] either the result from the handler, or the token
def onTerminal(prod, token, scanner: nil)
def onTerminal(prod, value, scanner: nil)
parentProd = @productions.last
handler = self.class.terminal_handlers[prod]
if handler && token != :unmatched
token = begin
if handler && value != :unmatched
value = begin
self.class.eval_with_binding(self) {
handler.call(parentProd, token, @parse_callback)
handler.call(value, parentProd, @parse_callback)
}
rescue ArgumentError, Error => e
error("terminal", "#{e.class}: #{e.message}", token: token, production: prod)
error("terminal", "#{e.class}: #{e.message}", value: value, production: prod)
@recovering = false
end
end
progress("#{prod}(:terminal)", "", depth: (depth + 2)) {"#{prod}: #{token.inspect}, lineno: #{scanner ? scanner.lineno : '?'}, pos: #{scanner ? scanner.pos : '?'}"}
token
progress("#{prod}(:terminal)", "", depth: (depth + 2)) {"#{prod}: #{value.inspect}, lineno: #{scanner ? scanner.lineno : '?'}, pos: #{scanner ? scanner.pos : '?'}"}
value
end

##
Expand Down

0 comments on commit 7afc9d0

Please sign in to comment.