Output rules as a formatted S-Expression
diff --git a/examples/ebnf-peg-parser/meta.rb b/examples/ebnf-peg-parser/meta.rb
index bae601f..2f28b89 100644
--- a/examples/ebnf-peg-parser/meta.rb
+++ b/examples/ebnf-peg-parser/meta.rb
@@ -16,14 +16,14 @@ module EBNFPegMeta
EBNF::Rule.new(:_diff_2, "7.2", [:seq, "-", :postfix]).extend(EBNF::PEG::Rule),
EBNF::Rule.new(:postfix, "8", [:seq, :primary, :_postfix_1]).extend(EBNF::PEG::Rule),
EBNF::Rule.new(:_postfix_1, "8.1", [:opt, :POSTFIX]).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:primary, "9", [:alt, :HEX, :SYMBOL, :ENUM, :O_ENUM, :RANGE, :O_RANGE, :STRING1, :STRING2, :_primary_1]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:primary, "9", [:alt, :HEX, :SYMBOL, :O_RANGE, :RANGE, :STRING1, :STRING2, :_primary_1]).extend(EBNF::PEG::Rule),
EBNF::Rule.new(:_primary_1, "9.1", [:seq, "(", :expression, ")"]).extend(EBNF::PEG::Rule),
EBNF::Rule.new(:pass, "10", [:seq, "@pass", :expression]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule),
EBNF::Rule.new(:LHS, "11", [:seq, :_LHS_1, :SYMBOL, :_LHS_2, "::="], kind: :terminal).extend(EBNF::PEG::Rule),
EBNF::Rule.new(:_LHS_1, "11.1", [:opt, :_LHS_3], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_LHS_3, "11.3", [:seq, "[", :_LHS_4, "]", :_LHS_5], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_LHS_4, "11.4", [:plus, :SYMBOL], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_LHS_5, "11.5", [:plus, " "], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_LHS_3, "11.3", [:seq, "[", :SYMBOL, "]", :_LHS_4], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_LHS_4, "11.4", [:plus, " "], kind: :terminal).extend(EBNF::PEG::Rule),
EBNF::Rule.new(:_LHS_2, "11.2", [:star, " "], kind: :terminal).extend(EBNF::PEG::Rule),
EBNF::Rule.new(:SYMBOL, "12", [:plus, :_SYMBOL_1], kind: :terminal).extend(EBNF::PEG::Rule),
EBNF::Rule.new(:_SYMBOL_1, "12.1", [:alt, :_SYMBOL_2, :_SYMBOL_3, :_SYMBOL_4, "_", "."], kind: :terminal).extend(EBNF::PEG::Rule),
@@ -36,63 +36,55 @@ module EBNFPegMeta
EBNF::Rule.new(:_HEX_3, "13.3", [:range, "a-f"], kind: :terminal).extend(EBNF::PEG::Rule),
EBNF::Rule.new(:_HEX_4, "13.4", [:range, "A-F"], kind: :terminal).extend(EBNF::PEG::Rule),
EBNF::Rule.new(:_HEX_5, "13.5", [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:ENUM, "14", [:diff, :_ENUM_1, :LHS], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_ENUM_1, "14.1", [:alt, :_ENUM_2, :_ENUM_3], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_ENUM_2, "14.2", [:seq, "[", :_ENUM_4], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_ENUM_4, "14.4", [:plus, :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_ENUM_3, "14.3", [:seq, :_ENUM_5, "]"], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_ENUM_5, "14.5", [:plus, :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:O_ENUM, "15", [:alt, :_O_ENUM_1, :_O_ENUM_2], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_O_ENUM_1, "15.1", [:seq, "[^", :_O_ENUM_3], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_O_ENUM_3, "15.3", [:plus, :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_O_ENUM_2, "15.2", [:seq, :_O_ENUM_4, "]"], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_O_ENUM_4, "15.4", [:plus, :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:RANGE, "16", [:alt, :_RANGE_1, :_RANGE_2], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_RANGE_1, "16.1", [:seq, "[", :_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_RANGE_3, "16.3", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_RANGE_2, "16.2", [:seq, :_RANGE_4, "]"], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_RANGE_4, "16.4", [:diff, :HEX, :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:O_RANGE, "17", [:alt, :_O_RANGE_1, :_O_RANGE_2], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_O_RANGE_1, "17.1", [:seq, "[^", :_O_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_O_RANGE_3, "17.3", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_O_RANGE_2, "17.2", [:seq, :_O_RANGE_4, "]"], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_O_RANGE_4, "17.4", [:diff, :HEX, :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:STRING1, "18", [:seq, "\"", :_STRING1_1, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_STRING1_1, "18.1", [:star, :_STRING1_2], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_STRING1_2, "18.2", [:diff, :CHAR, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:STRING2, "19", [:seq, "'", :_STRING2_1, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_STRING2_1, "19.1", [:star, :_STRING2_2], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_STRING2_2, "19.2", [:diff, :CHAR, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:CHAR, "20", [:alt, :_CHAR_1, :_CHAR_2, :_CHAR_3, :_CHAR_4], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_CHAR_1, "20.1", [:range, "#x9#xA#xD"], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_CHAR_2, "20.2", [:range, "#x20-#xD7FF"], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_CHAR_3, "20.3", [:range, "#xE000-#xFFFD"], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_CHAR_4, "20.4", [:range, "#x10000-#x10FFFF"], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:POSTFIX, "22", [:range, "?*+"], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:PASS, "23", [:plus, :_PASS_1], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_PASS_1, "23.1", [:alt, :_PASS_2, :_PASS_3, :_PASS_4, :_PASS_5], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_PASS_2, "23.2", [:range, "#x00-#x20"], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_PASS_3, "23.3", [:seq, :_PASS_6, :_PASS_7], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_PASS_6, "23.6", [:alt, :_PASS_8, "//"], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_PASS_8, "23.8", [:diff, "#", "#x"], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_PASS_7, "23.7", [:star, :_PASS_9], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_PASS_9, "23.9", [:range, "^#x0A#x0Dx"], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_PASS_4, "23.4", [:seq, "/*", :_PASS_10, "*/"], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_PASS_10, "23.10", [:star, :_PASS_11], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_PASS_11, "23.11", [:alt, :_PASS_12, :_PASS_13], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_PASS_12, "23.12", [:opt, :_PASS_14], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_PASS_14, "23.14", [:seq, "*", :_PASS_15], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_PASS_15, "23.15", [:range, "^/"], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_PASS_13, "23.13", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_PASS_5, "23.5", [:seq, "(*", :_PASS_16, "*)"], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_PASS_16, "23.16", [:star, :_PASS_17], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_PASS_17, "23.17", [:alt, :_PASS_18, :_PASS_19], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_PASS_18, "23.18", [:opt, :_PASS_20], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_PASS_20, "23.20", [:seq, "*", :_PASS_21], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_PASS_21, "23.21", [:range, "^)"], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(:_PASS_19, "23.19", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule),
- EBNF::Rule.new(nil, nil, [:seq, :PASS], kind: :pass).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:RANGE, "14", [:seq, "[", :_RANGE_1, :_RANGE_2, :_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_RANGE_1, "14.1", [:plus, :_RANGE_4], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_RANGE_4, "14.4", [:alt, :_RANGE_5, :_RANGE_6, :R_CHAR, :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_RANGE_5, "14.5", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_RANGE_6, "14.6", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_RANGE_2, "14.2", [:opt, "-"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_RANGE_3, "14.3", [:diff, "]", :LHS], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:O_RANGE, "15", [:seq, "[^", :_O_RANGE_1, :_O_RANGE_2, "]"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_O_RANGE_1, "15.1", [:plus, :_O_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_O_RANGE_3, "15.3", [:alt, :_O_RANGE_4, :_O_RANGE_5, :R_CHAR, :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_O_RANGE_4, "15.4", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_O_RANGE_5, "15.5", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_O_RANGE_2, "15.2", [:opt, "-"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:STRING1, "16", [:seq, "\"", :_STRING1_1, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_STRING1_1, "16.1", [:star, :_STRING1_2], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_STRING1_2, "16.2", [:diff, :CHAR, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:STRING2, "17", [:seq, "'", :_STRING2_1, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_STRING2_1, "17.1", [:star, :_STRING2_2], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_STRING2_2, "17.2", [:diff, :CHAR, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:CHAR, "18", [:alt, :_CHAR_1, :_CHAR_2, :_CHAR_3, :_CHAR_4], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_CHAR_1, "18.1", [:range, "#x9#xA#xD"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_CHAR_2, "18.2", [:range, "#x20-#xD7FF"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_CHAR_3, "18.3", [:range, "#xE000-#xFFFD"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_CHAR_4, "18.4", [:range, "#x10000-#x10FFFF"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:R_CHAR, "19", [:diff, :CHAR, :_R_CHAR_1], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_R_CHAR_1, "19.1", [:alt, "]", "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:POSTFIX, "20", [:range, "?*+"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:PASS, "21", [:alt, :_PASS_1, :_PASS_2, :_PASS_3, :_PASS_4], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_1, "21.1", [:range, "#x9#xA#xD#x20"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_2, "21.2", [:seq, :_PASS_5, :_PASS_6], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_5, "21.5", [:alt, :_PASS_7, "//"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_7, "21.7", [:diff, "#", "#x"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_6, "21.6", [:star, :_PASS_8], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_8, "21.8", [:range, "^#xA#xD"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_3, "21.3", [:seq, "/*", :_PASS_9, "*/"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_9, "21.9", [:star, :_PASS_10], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_10, "21.10", [:alt, :_PASS_11, :_PASS_12], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_11, "21.11", [:opt, :_PASS_13], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_13, "21.13", [:seq, "*", :_PASS_14], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_14, "21.14", [:range, "^/"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_12, "21.12", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_4, "21.4", [:seq, "(*", :_PASS_15, "*)"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_15, "21.15", [:star, :_PASS_16], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_16, "21.16", [:alt, :_PASS_17, :_PASS_18], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_17, "21.17", [:opt, :_PASS_19], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_19, "21.19", [:seq, "*", :_PASS_20], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_20, "21.20", [:range, "^)"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_18, "21.18", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_pass, nil, [:seq, :PASS], kind: :pass).extend(EBNF::PEG::Rule),
]
end
diff --git a/examples/ebnf-peg-parser/parser.rb b/examples/ebnf-peg-parser/parser.rb
index 94606ec..0088051 100644
--- a/examples/ebnf-peg-parser/parser.rb
+++ b/examples/ebnf-peg-parser/parser.rb
@@ -12,23 +12,6 @@ class EBNFPegParser
include EBNF::PEG::Parser
include EBNF::Terminals
- class ProdResult
- attr_accessor :prod
- attr_accessor :values
-
- def initialize(prod, *values)
- @prod, @values = prod, values
- end
-
- def to_ary
- values.map {|v| v.respond_to?(:to_ary) ? v.to_ary : v}.unshift(@prod)
- end
-
- def inspect
- "(#{prod} #{values.map(&:inspect).join(' ')})"
- end
- end
-
# Abstract syntax tree from parse
#
# @return [Array]
@@ -51,7 +34,7 @@ def inspect
#
# [11] LHS ::= ('[' SYMBOL+ ']' ' '+)? SYMBOL ' '* '::='
terminal(:LHS, LHS) do |value, prod|
- value.to_s.scan(/\[([^\]]+)\]\s*(\w+)\s*::=/).first
+ value.to_s.scan(/(?:\[([^\]]+)\])?\s*(\w+)\s*::=/).first
end
# Match `SYMBOL` terminal
@@ -64,46 +47,34 @@ def inspect
# Match `HEX` terminal
#
# [13] HEX ::= #x' ([a-f] | [A-F] | [0-9])+
- terminal(:HEX, HEX)
-
- # Terminal for `ENUM` is matched as part of a `primary` rule.
- #
- # [14] ENUM ::= ('[' R_CHAR+ | HEX+ ']') - LHS
- terminal(:ENUM, ENUM) do |value|
- [:range, value[1..-2]]
- end
-
- # Terminal for `O_ENUM` is matched as part of a `primary` rule.
- #
- # [15] O_ENUM ::= '[^' R_CHAR+ | HEX+ ']'
- terminal(:O_ENUM, O_ENUM) do |value|
- [:range, value[1..-2]]
+ terminal(:HEX, HEX) do |value|
+ [:hex, value]
end
# Terminal for `RANGE` is matched as part of a `primary` rule.
#
- # [16] `RANGE` ::= '[' (R_CHAR '-' R_CHAR) | (HEX - HEX) ']'
+ # [14] `RANGE` ::= '[' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']'
terminal(:RANGE, RANGE) do |value|
[:range, value[1..-2]]
end
# Terminal for `O_RANGE` is matched as part of a `primary` rule.
#
- # [17] O_RANGE ::= '[^' (R_CHAR '-' R_CHAR) | (HEX - HEX) ']'
+ # [15] O_RANGE ::= '[^' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']'
terminal(:O_RANGE, O_RANGE) do |value|
[:range, value[1..-2]]
end
# Match double quote string
#
- # [18] STRING1 ::= '"' (CHAR - '"')* '"'
+ # [16] STRING1 ::= '"' (CHAR - '"')* '"'
terminal(:STRING1, STRING1) do |value|
value[1..-2]
end
# Match single quote string
#
- # [19] STRING2 ::= "'" (CHAR - "'")* "'"
+ # [17] STRING2 ::= "'" (CHAR - "'")* "'"
terminal(:STRING2, STRING2) do |value|
value[1..-2]
end
@@ -112,7 +83,7 @@ def inspect
# Match `POSTFIX` terminal
#
- # [22] POSTFIX ::= [?*+]
+ # [20] POSTFIX ::= [?*+]
terminal(:POSTFIX, POSTFIX)
# The `PASS` productions is not used explicitly
@@ -142,24 +113,25 @@ def inspect
production(:declaration, clear_packrat: true) do |value, data, callback|
# value contains a declaration.
# Invoke callback
- callback.call(:terminal) if value == '@terminals'
+ callback.call(:terminals) if value == '@terminals'
nil
end
# Production for end of `rule` non-terminal.
#
- # The `value` parameter, is of the form `[{LHS: "v"}, {expression: "v"}]`.
+ # By setting `as_hash: true` in the `start_production`, the `value` parameter will be in the form `{LHS: "v", expression: "v"}`. Otherwise, it would be expressed using an array of hashes of the form `[{LHS: "v"}, {expression: "v"}]`.
#
# Clears the packrat parser when called.
#
# Create rule from expression value and pass to callback
#
# [3] rule ::= LHS expression
+ start_production(:rule, as_hash: true)
production(:rule, clear_packrat: true) do |value, data, callback|
# value contains an expression.
# Invoke callback
- id, sym = value.first[:LHS]
- expression = value.last[:expression]
+ id, sym = value[:LHS]
+ expression = value[:expression]
callback.call(:rule, EBNF::Rule.new(sym.to_sym, id, expression))
nil
end
@@ -180,7 +152,7 @@ def inspect
# Production for end of `alt` non-terminal.
# Passes through the optimized value of the seq production as follows:
#
- # The `value` parameter, is of the form `[{seq: "v"}, {_alt_1: "v"}]`.
+ # The `value` parameter, is of the form `{seq: "v", _alt_1: "v"}`.
#
# [:seq foo] => foo
# [:seq foo bar] => [:seq foo bar]
@@ -188,11 +160,12 @@ def inspect
# Note that this also may just pass through from `_alt_1`
#
# [5] alt ::= seq ('|' seq)*
+ start_production(:alt, as_hash: true)
production(:alt) do |value|
- if value.last[:_alt_1].length > 0
- [:alt, value.first[:seq]] + value.last[:_alt_1]
+ if value[:_alt_1].length > 0
+ [:alt, value[:seq]] + value[:_alt_1]
else
- value.first[:seq]
+ value[:seq]
end
end
@@ -223,14 +196,15 @@ def inspect
# `Diff` production returns concatenated postfix values
#
- # The `value` parameter, is of the form `[{postfix: "v"}, {_diff_1: "v"}]`.
+ # The `value` parameter, is of the form `{postfix: "v", _diff_1: "v"}`.
#
# [7] diff ::= postfix ('-' postfix)?
+ start_production(:diff, as_hash: true)
production(:diff) do |value|
- if value.last[:_diff_1]
- [:diff, value.first[:postfix], value.last[:_diff_1]]
+ if value[:_diff_1]
+ [:diff, value[:postfix], value[:_diff_1]]
else
- value.first[:postfix]
+ value[:postfix]
end
end
@@ -241,7 +215,7 @@ def inspect
# Production for end of `postfix` non-terminal.
# Either returns the `primary` production value, or as modified by the `postfix`.
#
- # The `value` parameter, is of the form `[{primary: "v"}, {_postfix_1: "v"}]`.
+ # The `value` parameter, is of the form `{primary: "v", _postfix_1: "v"}`.
#
# [:primary] => [:primary]
# [:primary, '*'] => [:star, :primary]
@@ -249,13 +223,14 @@ def inspect
# [:primary, '?'] => [:opt, :primary]
#
# [8] postfix ::= primary POSTFIX?
+ start_production(:postfix, as_hash: true)
production(:postfix) do |value|
# Push result onto input stack, as the `diff` production can have some number of `postfix` values that are applied recursively
- case value.last[:_postfix_1]
- when "*" then [:star, value.first[:primary]]
- when "+" then [:plus, value.first[:primary]]
- when "?" then [:opt, value.first[:primary]]
- else value.first[:primary]
+ case value[:_postfix_1]
+ when "*" then [:star, value[:primary]]
+ when "+" then [:plus, value[:primary]]
+ when "?" then [:opt, value[:primary]]
+ else value[:primary]
end
end
@@ -314,11 +289,11 @@ def initialize(input, **options, &block)
**options
) do |context, *data|
rule = case context
- when :terminal
+ when :terminals
# After parsing `@terminals`
# This changes the state of the parser to treat subsequent rules as terminals.
parsing_terminals = true
- rule = EBNF::Rule.new(nil, nil, data.first, kind: :terminal)
+ rule = EBNF::Rule.new(nil, nil, data.first, kind: :terminals)
when :pass
# After parsing `@pass`
# This defines a specific rule for whitespace.
diff --git a/examples/isoebnf/README.md b/examples/isoebnf/README.md
new file mode 100644
index 0000000..957fb6e
--- /dev/null
+++ b/examples/isoebnf/README.md
@@ -0,0 +1,136 @@
+# ISO EBNF Parser example
+
+This example implements an [ISO/IEC 14977][] parser which parses compatible grammars into [S-Expressions][S-Expression]. This allows the resulting [S-Expressions][S-Expression] to drive a [PEG][]/[Packrat][] Parser to parser documents defined using [ISO/IEC 14977][].
+
+## Parsing the Grammar
+
+ require 'ebnf'
+
+ ebnf = ISOEBNFPegParser.new(File.open("examples/ebnf.isoebnf"))
+
+Output rules and terminals as [S-Expressions][S-Expression]:
+
+ puts ebnf.to_sxp
+
+This generates a [S-Expression][] form of the grammar suitable for use by {EBNF}.
+
+ (
+ (rule syntax (star syntax_rule))
+ (rule syntax_rule
+ (seq meta_identifier defining_symbol definitions_list terminator_symbol))
+ (rule definitions_list
+ (seq single_definition (star (seq definition_separator_symbol definitions_list))))
+ (rule single_definition (seq term (star (seq "," term))))
+ (rule term (seq factor (opt (seq "-" exception))))
+ (rule exception (seq factor))
+ (rule factor (seq (opt (seq integer "*")) primary))
+ (rule primary
+ (alt optional_sequence repeated_sequence special_sequence grouped_sequence
+ meta_identifier terminal_string empty ))
+ (rule optional_sequence
+ (seq start_option_symbol definitions_list end_option_symbol))
+ (rule repeated_sequence
+ (seq start_repeat_symbol definitions_list end_repeat_symbol))
+ (rule grouped_sequence (seq "(" definitions_list ")"))
+ (rule letter
+ (alt "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R"
+ "S" "T" "U" "V" "W" "X" "Y" "Z" "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k"
+ "l" "m" "n" "o" "p" "q" "r" "s" "t" "u" "v" "w" "x" "y" "z" ))
+ (rule decimal_digit (alt "0" "1" "2" "3" "4" "5" "6" "7" "8" "9"))
+ (rule integer (seq decimal_digit (star decimal_digit)))
+ (rule meta_identifier (seq letter (star meta_identifier_character)))
+ (rule meta_identifier_character (alt letter decimal_digit "_"))
+ (rule terminal_string
+ (alt
+ (seq (seq "'" first_terminal_character (star first_terminal_character) "'"))
+ (seq (seq "\"" second_terminal_character (star second_terminal_character) "\""))) )
+ (rule first_terminal_character (seq terminal_character))
+ (rule second_terminal_character (seq terminal_character))
+ (rule special_sequence (seq "?" (star special_sequence_character) "?"))
+ (rule special_sequence_character (seq terminal_character))
+ (rule terminal_character
+ (alt letter decimal_digit concatenate_symbol defining_symbol
+ definition_separator_symbol end_comment_symbol end_group_symbol
+ end_option_symbol end_repeat_symbol except_symbol first_quote_symbol
+ repetition_symbol second_quote_symbol special_sequence_symbol
+ start_comment_symbol start_group_symbol start_option_symbol
+ start_repeat_symbol terminator_symbol other_character ))
+ (rule other_character
+ (alt " " ":" "+" "_" "%" "@" "&" "#" "$" "<" ">" "\\" "^" "`" "~"))
+ (rule empty (seq ""))
+ (rule defining_symbol (alt "=" ":"))
+ (rule definition_separator_symbol (alt "|" "/" "!"))
+ (rule terminator_symbol (alt ";" "."))
+ (rule start_option_symbol (alt "[" "(/"))
+ (rule end_option_symbol (alt "]" "/)"))
+ (rule start_repeat_symbol (alt "{" "(:"))
+ (rule end_repeat_symbol (alt "}" ":)")))
+
+This can then be used as input to {EBNF.parse} to transform [EBNF][] to [PEG][] for parsing examples of the grammar using {EBNF::PEG::Parser}.
+
+ ebnf --input-format sxp --peg ebnf.sxp -o ebnf.peg.sxp
+
+Note, however, that [ISO EBNF][ISO/IEC 14977] doesn't distinguish between terminal rules and non-terminal rules, so all rules are parsed as non-terminal rules with strings the only terminals. Whereas, the W3C [EBNF][] {file:iso-ebnf.ebnf definition of the grammar} does use terminal rules.
+
+When parsing files with this grammar, rules that are all capitalized _will_ be treated as terminal productions, although this is an proprietary interpretation of the specification.
+
+## Example Walkthrough
+
+This example uses the [EBNF][] grammar from {file:iso-ebnf.ebnf} to generate {file:meta}, which includes the resulting `RULES` table, used by {file:parser} to implement a parser for the grammar.
+
+The first step is defining regular expressions for terminals used within the grammar. Note that the parser can operate without terminal definitions, but this can greatly improve parser performance.
+
+The {file:parser} is implemented using the {ISOEBNFPegParser} class, which includes {EBNF::PEG::Parser}.
+
+### Parser basics
+The parser operates directly using the rules from the abstract syntax tree generated by turning the original [ISO EBNF][ISO/IEC 14977] grammar using {EBNF::PEG#make_peg}. Tokens are derived from terminal rules defined in the grammar or contained inline through non-terminal rule definitions. Tokens are either strings, which must be matched exactly, or symbols, which identify a regular expression used to match the terminal and yield a token. The association between terminal symbols and their regular expressions along with processing rules to invoke when they are identified are described in [Terminal definitions](#Terminal_definitions).
+
+The parser starts with the specified rule, `syntax` in this case, and executes that rule, which is expected to completely parse the input file potentially leaving some whitespace.
+
+Non-terminal rules have an expression using one of the following:
+
+`seq`
+: A sequence of rules or terminals. If any (other than `opt` or `star`) to not parse, the rule is terminated as unmatched.
+`opt`
+: An optional rule or terminal. It either results in the matching rule or returns `nil`.
+`alt`
+: A list of alternative rules, which are attempted in order. It terminates with the first matching rule, or is terminated as unmatched, if no such rule is found.
+`plus`
+: A sequence of one or more of the matching rule. If there is no such rule, it is terminated as unmatched; otherwise, the result is an array containing all matched input.
+`rept m n`
+: A sequence of at lest `m` and at most `n` of the matching rule. It will always return an array.
+`star`
+: A sequence of zero or more of the matching rule. It will always return an array.
+
+The starting rule will typically be of the form `(star sub_rule)` which will attempt to parse that sub rule until the end of input.
+
+If a rule matches, it enters a _production_, which may invoke a _start production before matching is attempted, and will call any _production_ either if matched, or unmatched. That _production_ may choose to evaluate the returned abstract syntax tree to simplify the result, or create some semantic representation of that value.
+
+Due to the nature of [PEG][] parsers, the same rule may be attempted at the same input location many times; this is optimized by use of a [Packrat][] memoizing cache, which remembers the result of a previous successful evaluation and short-circuits further execution.
+
+Processing continues by continuing to look for productions sequence and pushing those productions onto the stack. When a production is complete, any associated _production handler_ is invoked, after popping off the top of the `prod_data` stack. The just removed hash is passed as `current` to the _production handler_. This is typically where the work of the parser happens. See [Production definitions](#Production_definitions) for more information.
+
+### Terminal definitions
+The {file:parser} uses a DSL to specify `terminals` and `productions` associated with rules in the grammar. Each `terminal` specifies the rule name, associated regular expression, and a block which is invoked when the parser recognizes the terminal:
+
+ terminal(:integer, /\d+/) do |value, prod|
+ value.to_i
+ end
+
+In this terminal definition, the `integer` terminal is recognized using the `/\d+/`. When found, the value of the integer is returned for use by productions which include it.
+
+### Production definitions
+Looking at the grammar itself, we can see that the first declaration is
+
+ [1] syntax ::= syntax_rule*
+
+[Ruby]: https://ruby-lang.org/
+[YARD]: https://yardoc.org/
+[YARD-GS]: https://rubydoc.info/docs/yard/file/docs/GettingStarted.md
+[PDD]: https://lists.w3.org/Archives/Public/public-rdf-ruby/2010May/0013.html
+[EBNF]: https://www.w3.org/TR/REC-xml/#sec-notation
+[EBNF doc]: https://rubydoc.info/github/dryruby/ebnf/
+[Packrat]: https://pdos.csail.mit.edu/~baford/packrat/thesis/
+[PEG]: https://en.wikipedia.org/wiki/Parsing_expression_grammar
+[ISO/IEC 14977]:https://www.iso.org/standard/26153.html
+[S-expression]: https://en.wikipedia.org/wiki/S-expression
diff --git a/examples/isoebnf/Rakefile b/examples/isoebnf/Rakefile
new file mode 100644
index 0000000..ee6b779
--- /dev/null
+++ b/examples/isoebnf/Rakefile
@@ -0,0 +1,32 @@
+task default: ['iso-ebnf.sxp', 'iso-ebnf.peg.sxp', :meta, :doc]
+
+desc 'Build rules table'
+task meta: "meta.rb"
+
+file "meta.rb" => "../../etc/iso-ebnf.ebnf" do |t|
+ sh %{
+ ebnf --peg --format rb \
+ --mod-name ISOEBNFMeta \
+ --output meta.rb \
+ #{t.prerequisites.first}
+ }
+end
+
+file 'iso-ebnf.sxp' => "../../etc/iso-ebnf.ebnf" do |t|
+ sh %{
+ ebnf --output iso-ebnf.sxp #{t.prerequisites.first}
+ }
+end
+
+file 'iso-ebnf.peg.sxp' => "../../etc/iso-ebnf.ebnf" do |t|
+ sh %{
+ ebnf --peg --output iso-ebnf.peg.sxp #{t.prerequisites.first}
+ }
+end
+
+desc "Generate literal documentation for parser"
+task doc: %w(doc/parser.html)
+
+file "doc/parser.html" => "parser.rb" do
+ `rocco -t doc/layout.mustache parser.rb -o doc`
+end
diff --git a/examples/isoebnf/doc/layout.mustache b/examples/isoebnf/doc/layout.mustache
new file mode 100644
index 0000000..c62137d
--- /dev/null
+++ b/examples/isoebnf/doc/layout.mustache
@@ -0,0 +1,491 @@
+
+
+
+
+ {{ title }}
+
+
+
+
+
+ {{#sources?}}
+
+ {{/sources?}}
+
+
+
+ {{ title }} |
+ |
+
+
+
+ {{#sections}}
+
+
+
+ {{{ docs }}}
+ |
+
+
+ |
+
+ {{/sections}}
+
+
+
diff --git a/examples/isoebnf/doc/parser.html b/examples/isoebnf/doc/parser.html
new file mode 100644
index 0000000..1186a99
--- /dev/null
+++ b/examples/isoebnf/doc/parser.html
@@ -0,0 +1,999 @@
+
+
+
+
+ parser.rb
+
+
+
+
+
+
+
+
+ parser.rb |
+ |
+
+
+
+
+
+
+ EBNF Parser for EISO BNF.
+
+Produces an Abstract Synatx Tree in S-Expression form for the input grammar file
+ |
+
+ require 'ebnf'
+require 'ebnf/terminals'
+require 'ebnf/peg/parser'
+require 'meta'
+require 'sxp'
+require 'logger'
+
+class ISOEBNFPegParser
+ include EBNF::PEG::Parser
+ |
+
+
+
+
+ The base for terminal-character, which omits "'", '"', and '?'.
+Could be more optimized, and one might quible
+with the overly-strictly defined character set,
+but it is correct.
+ |
+
+ TERMINAL_CHARACTER_BASE = %r{
+ [a-zA-Z0-9] | # letter | decimal digit
+ , | # concatenate symbol
+ = | # defining symbol
+ [\|\/!] | # definition separator symbol
+ \*\) | # end comment symbol
+ \) | # end group symbol
+ \] | # end option symbol
+ \} | # end repeat symbol
+ \- | # except symbol
+
+# DIVIDER
+
+ \* | # repetition symbol
+
+# DIVIDER
+
+ \(\* | # start comment symbol
+ \( | # start group symbol
+ \[ | # start option symbol
+ \{ | # start repeat symbol
+ [;\.] | # terminator symbol
+ [:+_%@&$<>^\x20\x23\\`~] # other character
+ }x
+
+ TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"\?]}
+ FIRST_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|["\?]}
+ SECOND_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['\?]}
+ SPECIAL_SEQUENCE_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"]}
+ |
+
+
+
+
+ \' | # first quote symbol
+ |
+
+
+ |
+
+
+
+
+ \" | # second quote symbol
+\? | # special sequence symbol
+ |
+
+ terminal(:integer, /\d+/) do |value, prod|
+ value.to_i
+ end
+ |
+
+
+
+
+ Abstract syntax tree from parse
+
+@return [ArrayEBNF::Rule]
+ |
+
+ terminal(:meta_identifier, /[a-zA-Z][a-zA-Z0-9_]*/) do |value|
+ value.to_sym
+ end
+ |
+
+
+
+
+ [14] integer ::= decimal_digit+
+ |
+
+ terminal(:terminal_string, /(?:'#{FIRST_TERMINAL_CHARACTER}+')|(?:"#{SECOND_TERMINAL_CHARACTER}+")/x) do |value|
+ value[1..-2]
+ end
+ |
+
+
+
+
+ [15] meta_identifier ::= letter meta_identifier_character*
+ |
+
+ terminal(:special_sequence, /\?#{SPECIAL_SEQUENCE_CHARACTER}+\?/)
+ |
+
+
+
+
+ [17] terminal_string ::= ("'" first_terminal_character+ "'")
+| ('"' second_terminal_character+ '"')
+ |
+
+ terminal(:terminal_character, TERMINAL_CHARACTER)
+ |
+
+
+
+
+ [20] special_sequence ::= '?' special_sequence_character* '?'
+ |
+
+ terminal(:empty, //)
+
+
+# DIVIDER
+
+ terminal(:definition_separator_symbol, /[\|\/!]/)
+ |
+
+
+
+
+ [22] terminal_character ::= [a-zA-Z0-9]
+| [,=;*}#x2d?([{;]
+| '*)'
+| '(*'
+| ']'
+| other_character
+ |
+
+ terminal(:terminator_symbol, /[;\.]/)
+ |
+
+
+
+
+ [25] empty ::= ''
+ |
+
+ terminal(:start_option_symbol, /\[|(?:\(\/)/)
+ |
+
+
+
+
+ [26] definition_separator_symbol ::= '|' | '/' | '!'
+ |
+
+ terminal(:end_option_symbol, /\]/)
+ |
+
+
+
+
+ [27] terminator_symbol ::= ';' | '.'
+ |
+
+ terminal(:start_repeat_symbol, /{|\(:/)
+ |
+
+
+
+
+ `[28] startoptionsymbol ::= '['
+ |
+
+ terminal(:end_repeat_symbol, /}|:\)/)
+ |
+
+
+
+
+ [29] end_option_symbol ::= ']'
+ |
+
+
+ |
+
+
+
+
+ [30] start_repeat_symbol ::= '{' | '(:'
+ |
+
+ production(:syntax_rule, clear_packrat: true) do |value, data, callback|
+ |
+
+
+
+
+ [31] end_repeat_symbol ::= '}' | ':)'
+ |
+
+ sym = value[0][:meta_identifier]
+ definitions_list = value[2][:definitions_list]
+ callback.call(:rule, EBNF::Rule.new(sym.to_sym, nil, definitions_list))
+ nil
+ end
+ |
+
+
+
+
+ Non-terminal productions
+ |
+
+ start_production(:definitions_list, as_hash: true)
+ production(:definitions_list) do |value|
+ if value[:_definitions_list_1].length > 0
+ [:alt, value[:single_definition]] + value[:_definitions_list_1]
+ else
+ value[:single_definition]
+ end
+ end
+ production(:_definitions_list_1) do |value|
+ Array(value.first)
+ end
+ start_production(:_definitions_list_2, as_hash: true)
+ production(:_definitions_list_2) do |value|
+ if Array(value[:definitions_list]).first == :alt
+ value[:definitions_list][1..-1]
+ else
+ [value[:definitions_list]]
+ end
+ end
+ |
+
+
+
+
+ [2] syntax_rule ::= meta_identifier '=' definitions_list terminator_symbol
+ |
+
+ start_production(:single_definition, as_hash: true)
+ production(:single_definition) do |value|
+ if value[:_single_definition_1].length > 0
+ [:seq, value[:term]] + value[:_single_definition_1]
+ else
+ value[:term]
+ end
+ end
+ production(:_single_definition_1) do |value|
+ value.map {|a1| a1.last[:term]}.compact # Get rid of '|'
+ end
+ |
+
+
+
+
+ value contains an expression.
+Invoke callback
+ |
+
+ start_production(:term, as_hash: true)
+ production(:term) do |value|
+ if value[:_term_1]
+ [:diff, value[:factor], value[:_term_1]]
+ else
+ value[:factor]
+ end
+ end
+ production(:_term_1) do |value|
+ value.last[:exception] if value
+ end
+ |
+
+
+
+
+ Setting as_hash: true in the start production makes the value of the form of a hash, rather than an array of hashes.
+
+[3] definitions_list ::= single_definition (definition_separator_symbol definitions_list)*
+ |
+
+ start_production(:exception, as_hash: true)
+ production(:exception) do |value|
+ value[:factor]
+ end
+ |
+
+
+
+
+ [4] single_definition ::= term (',' term)*
+ |
+
+ start_production(:factor, as_hash: true)
+ production(:factor) do |value|
+ if value[:_factor_1]
+ [:rept, value[:_factor_1], value[:_factor_1], value[:primary]]
+ else
+ value[:primary]
+ end
+ end
+ production(:_factor_2) do |value|
+ value.first[:integer]
+ end
+ |
+
+
+
+
+ [5] term ::= factor ('-' exception)?
+ |
+
+ production(:optional_sequence) do |value|
+ [:opt, value[1][:definitions_list]]
+ end
+ |
+
+
+
+
+ [6] exception ::= factor
+ |
+
+ production(:repeated_sequence) do |value|
+ [:star, value[1][:definitions_list]]
+ end
+ |
+
+
+
+
+ [7] factor ::= (integer '*')? primary
+ |
+
+ production(:grouped_sequence) do |value|
+ [:seq, value[1][:definitions_list]]
+ end
+ |
+
+
+
+
+ [9] optional_sequence ::= start_option_symbol definitions_list end_option_symbol
+ |
+
+ def initialize(input, **options, &block)
+ |
+
+
+
+
+ [10] repeated_sequence ::= start_repeat_symbol definitions_list end_repeat_symbol
+ |
+
+ if options.has_key?(:level)
+ options[:logger] = Logger.new(STDERR)
+ options[:logger].level = options[:level]
+ options[:logger].formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"}
+ end
+ |
+
+
+
+
+ [11] grouped_sequence ::= '(' definitions_list ')'
+ |
+
+ @input = input.respond_to?(:read) ? input.read : input.to_s
+
+ parsing_terminals = false
+ @ast = []
+ parse(@input,
+ :syntax,
+ ISOEBNFMeta::RULES,
+ whitespace: %r{([\x09-\x0d\x20]|(?:\(\*(?:(?:\*[^\)])|[^*])*\*\)))+},
+ **options
+ ) do |context, *data|
+ rule = case context
+ when :rule
+ |
+
+
+
+
+ Parser invocation.
+
+On start, yield ourselves if a block is given, otherwise, return this parser instance
+
+@param [#read, #to_s] input
+@param [Hash{Symbol => Object}] options
+@option options [Boolean] :level
+ Trace level. 0(debug), 1(info), 2(warn), 3(error).
+@return [EBNFParser]
+ |
+
+ rule = data.first
+ rule.kind = :terminal if parsing_terminals
+ rule
+ end
+ @ast << rule if rule
+ end
+ @ast
+ end
+ |
+
+
+
+
+ If the level option is set, instantiate a logger for collecting trace information.
+ |
+
+ def to_sxp
+ require 'sxp' unless defined?(SXP)
+ |
+
+
+
+
+ Read input, if necessary, which will be used in a Scanner.
+ |
+
+ SXP::Generator.string(@ast.map(&:for_sxp))
+ end
+end
+ |
+
+
+
+
+ A rule which has already been turned into a Rule object.
+ |
+
+
+ |
+
+
+
+
+ Output formatted S-Expression of grammar
+ |
+
+
+ |
+
+
+
+
+ Output rules as a formatted S-Expression
+
+ |
+
+
+ |
+
+
+
+
diff --git a/examples/isoebnf/examples/ebnf.isoebnf b/examples/isoebnf/examples/ebnf.isoebnf
new file mode 100644
index 0000000..06e2fa3
--- /dev/null
+++ b/examples/isoebnf/examples/ebnf.isoebnf
@@ -0,0 +1,28 @@
+letter = "A" | "B" | "C" | "D" | "E" | "F" | "G"
+ | "H" | "I" | "J" | "K" | "L" | "M" | "N"
+ | "O" | "P" | "Q" | "R" | "S" | "T" | "U"
+ | "V" | "W" | "X" | "Y" | "Z" | "a" | "b"
+ | "c" | "d" | "e" | "f" | "g" | "h" | "i"
+ | "j" | "k" | "l" | "m" | "n" | "o" | "p"
+ | "q" | "r" | "s" | "t" | "u" | "v" | "w"
+ | "x" | "y" | "z" ;
+digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
+symbol = "[" | "]" | "{" | "}" | "(" | ")" | "<" | ">"
+ | "'" | '"' | "=" | "|" | "." | "," | ";" ;
+character = letter | digit | symbol | "_" ;
+
+identifier = letter , { letter | digit | "_" } ;
+terminal = "'" , character , { character } , "'"
+ | '"' , character , { character } , '"' ;
+
+lhs = identifier ;
+rhs = identifier
+ | terminal
+ | "[" , rhs , "]"
+ | "{" , rhs , "}"
+ | "(" , rhs , ")"
+ | rhs , "|" , rhs
+ | rhs , "," , rhs ;
+
+rule = lhs , "=" , rhs , ";" ;
+grammar = { rule } ;
diff --git a/examples/isoebnf/examples/html.isoebnf b/examples/isoebnf/examples/html.isoebnf
new file mode 100644
index 0000000..035d0b3
--- /dev/null
+++ b/examples/isoebnf/examples/html.isoebnf
@@ -0,0 +1,77 @@
+(* from https://tomassetti.me/ebnf/ *)
+htmlDocument
+ = {scriptlet | SEA_WS}, [xml], {scriptlet | SEA_WS}, [dtd], {scriptlet | SEA_WS}, {htmlElements}
+ ;
+
+htmlElements
+ : {htmlMisc}, htmlElement, {htmlMisc}
+ ;
+
+htmlElement
+ : TAG_OPEN, htmlTagName, {htmlAttribute}, TAG_CLOSE, htmlContent, TAG_OPEN, TAG_SLASH, htmlTagName, TAG_CLOSE
+ | TAG_OPEN, htmlTagName, {htmlAttribute}, TAG_SLASH_CLOSE
+ | TAG_OPEN, htmlTagName, {htmlAttribute}, TAG_CLOSE
+ | scriptlet
+ | script
+ | style
+ ;
+
+htmlContent
+ : [htmlChardata], {(htmlElement | xhtmlCDATA | htmlComment), [htmlChardata]}
+ ;
+
+htmlAttribute
+ : htmlAttributeName, TAG_EQUALS, htmlAttributeValue
+ | htmlAttributeName
+ ;
+
+htmlAttributeName
+ : TAG_NAME
+ ;
+
+htmlAttributeValue
+ : ATTVALUE_VALUE
+ ;
+
+htmlTagName
+ : TAG_NAME
+ ;
+
+htmlChardata
+ : HTML_TEXT
+ | SEA_WS
+ ;
+
+htmlMisc
+ : htmlComment
+ | SEA_WS
+ ;
+
+htmlComment
+ : HTML_COMMENT
+ | HTML_CONDITIONAL_COMMENT
+ ;
+
+xhtmlCDATA
+ : CDATA
+ ;
+
+dtd
+ : DTD
+ ;
+
+xml
+ : XML_DECLARATION
+ ;
+
+scriptlet
+ : SCRIPTLET
+ ;
+
+script
+ : SCRIPT_OPEN, ( SCRIPT_BODY | SCRIPT_SHORT_BODY)
+ ;
+
+style
+ : STYLE_OPEN, ( STYLE_BODY | STYLE_SHORT_BODY)
+ ;
\ No newline at end of file
diff --git a/examples/isoebnf/examples/pascal.isoebnf b/examples/isoebnf/examples/pascal.isoebnf
new file mode 100644
index 0000000..acd114b
--- /dev/null
+++ b/examples/isoebnf/examples/pascal.isoebnf
@@ -0,0 +1,17 @@
+ (* a simple program syntax in EBNF − Wikipedia *)
+ program = 'PROGRAM', white_space, identifier, white_space,
+ 'BEGIN', white_space,
+ { assignment, ";", white_space },
+ 'END.' ;
+ identifier = alphabetic_character, { alphabetic_character | digit } ;
+ number = [ "-" ], digit, { digit } ;
+ string = '"' , { all_characters - '"' }, '"' ;
+ assignment = identifier , ":=" , ( number | identifier | string ) ;
+ alphabetic_character = "A" | "B" | "C" | "D" | "E" | "F" | "G"
+ | "H" | "I" | "J" | "K" | "L" | "M" | "N"
+ | "O" | "P" | "Q" | "R" | "S" | "T" | "U"
+ | "V" | "W" | "X" | "Y" | "Z" ;
+ digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
+ white_space = ? white_space characters ? ;
+ all_characters = ? all visible characters ? ;
+
\ No newline at end of file
diff --git a/examples/isoebnf/examples/postal-address.isoebnf b/examples/isoebnf/examples/postal-address.isoebnf
new file mode 100644
index 0000000..2c36d5b
--- /dev/null
+++ b/examples/isoebnf/examples/postal-address.isoebnf
@@ -0,0 +1,29 @@
+postal_address = name_part, street, zip_part ;
+
+name_part = {personal_part, SP}, last_name, [SP, suffix], CRLF
+ | personal_part, CRLF
+ ;
+
+personal_part = first_name | (initial, ".") ;
+first_name = {ALPHA} ;
+initial = ALPHA ;
+last_name = {ALPHA} ;
+suffix = ("Jr." | "Sr." | ("I" | "V" | "X"), {"I" | "V" | "X"}) ;
+
+street = [apt, SP], house_num, SP, street_name, CRLF ;
+apt = DIGIT, [DIGIT, [DIGIT, [DIGIT]]] ;
+house_num = (DIGIT | ALPHA),
+ [(DIGIT | ALPHA),
+ [(DIGIT | ALPHA),
+ [(DIGIT | ALPHA),
+ [(DIGIT | ALPHA),
+ [(DIGIT | ALPHA),
+ [(DIGIT | ALPHA),
+ [(DIGIT | ALPHA)]]]]]]]
+ ;
+street_name = VCHAR, {VCHAR} ;
+
+zip_part = town_name, ",", SP, state, SP, [SP], zip_code, CRLF ;
+town_name = (ALPHA | SP), {ALPHA | SP} ;
+state = 2*ALPHA ;
+zip_code = 5*DIGIT, ["-", 4*DIGIT] ;
\ No newline at end of file
diff --git a/examples/isoebnf/iso-ebnf.peg.sxp b/examples/isoebnf/iso-ebnf.peg.sxp
new file mode 100644
index 0000000..e82bc11
--- /dev/null
+++ b/examples/isoebnf/iso-ebnf.peg.sxp
@@ -0,0 +1,81 @@
+(
+ (rule syntax (star syntax_rule))
+ (rule syntax_rule
+ (seq meta_identifier defining_symbol definitions_list terminator_symbol))
+ (rule definitions_list (seq single_definition _definitions_list_1))
+ (rule _definitions_list_1 (star _definitions_list_2))
+ (rule _definitions_list_2 (seq definition_separator_symbol definitions_list))
+ (rule single_definition (seq term _single_definition_1))
+ (rule _single_definition_1 (star _single_definition_2))
+ (rule _single_definition_2 (seq "," term))
+ (rule term (seq factor _term_1))
+ (rule _term_1 (opt _term_2))
+ (rule _term_2 (seq "-" exception))
+ (rule exception (seq factor))
+ (rule factor (seq _factor_1 primary))
+ (rule _factor_1 (opt _factor_2))
+ (rule _factor_2 (seq integer "*"))
+ (rule primary
+ (alt optional_sequence repeated_sequence special_sequence grouped_sequence
+ meta_identifier terminal_string empty ))
+ (rule optional_sequence
+ (seq start_option_symbol definitions_list end_option_symbol))
+ (rule repeated_sequence
+ (seq start_repeat_symbol definitions_list end_repeat_symbol))
+ (rule grouped_sequence (seq "(" definitions_list ")"))
+ (terminals _terminals (seq))
+ (terminal terminal_string (alt _terminal_string_1 _terminal_string_2))
+ (rule _terminal_string_1 (seq "'" _terminal_string_3 "'"))
+ (rule _terminal_string_3 (plus first_terminal_character))
+ (rule _terminal_string_2 (seq "\"" _terminal_string_4 "\""))
+ (rule _terminal_string_4 (plus second_terminal_character))
+ (terminal meta_identifier (seq letter _meta_identifier_1))
+ (rule _meta_identifier_1 (star meta_identifier_character))
+ (terminal integer (plus decimal_digit))
+ (terminal special_sequence (seq "?" _special_sequence_1 "?"))
+ (rule _special_sequence_1 (star special_sequence_character))
+ (terminal comment (seq start_comment_symbol _comment_1 end_comment_symbol))
+ (rule _comment_1 (star comment_symbol))
+ (terminal comment_symbol (alt comment commentless_symbol other_character))
+ (terminal commentless_symbol
+ (alt terminal_character meta_identifier integer terminal_string special_sequence))
+ (terminal letter (range "a-zA-Z"))
+ (terminal decimal_digit (range "0-9"))
+ (terminal meta_identifier_character (alt letter decimal_digit "_"))
+ (terminal first_terminal_character (diff terminal_character "'"))
+ (terminal second_terminal_character (diff terminal_character "\""))
+ (terminal special_sequence_character (diff terminal_character "?"))
+ (terminal terminal_character
+ (alt letter decimal_digit concatenate_symbol defining_symbol
+ definition_separator_symbol end_comment_symbol end_group_symbol
+ end_option_symbol end_repeat_symbol except_symbol first_quote_symbol
+ repetition_symbol second_quote_symbol special_sequence_symbol
+ start_comment_symbol start_group_symbol start_option_symbol
+ start_repeat_symbol terminator_symbol other_character ))
+ (terminal other_character (alt _other_character_1 "\\"))
+ (terminal _other_character_1 (range ":+_%@&$<>^` ̃#x20#x23"))
+ (terminal gap_separator (range "#x9#xa#xb#xc#xd#x20"))
+ (pass _pass (alt __pass_1 comment))
+ (rule __pass_1 (plus gap_separator))
+ (terminal empty (seq ""))
+ (terminal concatenate_symbol (seq ","))
+ (terminal repetition_symbol (seq "*"))
+ (terminal except_symbol (seq "-"))
+ (terminal first_quote_symbol (seq "'"))
+ (terminal second_quote_symbol (seq "\""))
+ (terminal start_comment_symbol (seq "(*"))
+ (terminal end_comment_symbol (seq "*)"))
+ (terminal start_group_symbol (seq "("))
+ (terminal end_group_symbol (seq ")"))
+ (terminal special_sequence_symbol (seq "?"))
+ (terminal defining_symbol (alt "=" ":"))
+ (terminal definition_separator_symbol (alt "|" "/" "!"))
+ (terminal terminator_symbol (alt ";" "."))
+ (terminal start_option_symbol (seq "["))
+ (terminal end_option_symbol (seq "]"))
+ (terminal start_repeat_symbol (alt "{" "(:"))
+ (terminal end_repeat_symbol (alt "}" ":)"))
+ (terminal gap_free_symbol (alt _gap_free_symbol_1 terminal_string))
+ (rule _gap_free_symbol_1 (seq _gap_free_symbol_3 terminal_character))
+ (rule _gap_free_symbol_3 (not _gap_free_symbol_2))
+ (terminal _gap_free_symbol_2 (range "'\"")))
diff --git a/examples/isoebnf/iso-ebnf.sxp b/examples/isoebnf/iso-ebnf.sxp
new file mode 100644
index 0000000..7f6fe5c
--- /dev/null
+++ b/examples/isoebnf/iso-ebnf.sxp
@@ -0,0 +1,65 @@
+(
+ (rule syntax (star syntax_rule))
+ (rule syntax_rule
+ (seq meta_identifier defining_symbol definitions_list terminator_symbol))
+ (rule definitions_list
+ (seq single_definition (star (seq definition_separator_symbol definitions_list))))
+ (rule single_definition (seq term (star (seq "," term))))
+ (rule term (seq factor (opt (seq "-" exception))))
+ (rule exception (seq factor))
+ (rule factor (seq (opt (seq integer "*")) primary))
+ (rule primary
+ (alt optional_sequence repeated_sequence special_sequence grouped_sequence
+ meta_identifier terminal_string empty ))
+ (rule optional_sequence
+ (seq start_option_symbol definitions_list end_option_symbol))
+ (rule repeated_sequence
+ (seq start_repeat_symbol definitions_list end_repeat_symbol))
+ (rule grouped_sequence (seq "(" definitions_list ")"))
+ (terminals _terminals (seq))
+ (terminal terminal_string
+ (alt
+ (seq "'" (plus first_terminal_character) "'")
+ (seq "\"" (plus second_terminal_character) "\"")) )
+ (terminal meta_identifier (seq letter (star meta_identifier_character)))
+ (terminal integer (plus decimal_digit))
+ (terminal special_sequence (seq "?" (star special_sequence_character) "?"))
+ (terminal comment (seq start_comment_symbol (star comment_symbol) end_comment_symbol))
+ (terminal comment_symbol (alt comment commentless_symbol other_character))
+ (terminal commentless_symbol
+ (alt terminal_character meta_identifier integer terminal_string special_sequence))
+ (terminal letter (range "a-zA-Z"))
+ (terminal decimal_digit (range "0-9"))
+ (terminal meta_identifier_character (alt letter decimal_digit "_"))
+ (terminal first_terminal_character (diff terminal_character "'"))
+ (terminal second_terminal_character (diff terminal_character "\""))
+ (terminal special_sequence_character (diff terminal_character "?"))
+ (terminal terminal_character
+ (alt letter decimal_digit concatenate_symbol defining_symbol
+ definition_separator_symbol end_comment_symbol end_group_symbol
+ end_option_symbol end_repeat_symbol except_symbol first_quote_symbol
+ repetition_symbol second_quote_symbol special_sequence_symbol
+ start_comment_symbol start_group_symbol start_option_symbol
+ start_repeat_symbol terminator_symbol other_character ))
+ (terminal other_character (alt (range ":+_%@&$<>^` ̃#x20#x23") "\\"))
+ (terminal gap_separator (range "#x9#xa#xb#xc#xd#x20"))
+ (pass _pass (alt (plus gap_separator) comment))
+ (terminal empty (seq ""))
+ (terminal concatenate_symbol (seq ","))
+ (terminal repetition_symbol (seq "*"))
+ (terminal except_symbol (seq "-"))
+ (terminal first_quote_symbol (seq "'"))
+ (terminal second_quote_symbol (seq "\""))
+ (terminal start_comment_symbol (seq "(*"))
+ (terminal end_comment_symbol (seq "*)"))
+ (terminal start_group_symbol (seq "("))
+ (terminal end_group_symbol (seq ")"))
+ (terminal special_sequence_symbol (seq "?"))
+ (terminal defining_symbol (alt "=" ":"))
+ (terminal definition_separator_symbol (alt "|" "/" "!"))
+ (terminal terminator_symbol (alt ";" "."))
+ (terminal start_option_symbol (seq "["))
+ (terminal end_option_symbol (seq "]"))
+ (terminal start_repeat_symbol (alt "{" "(:"))
+ (terminal end_repeat_symbol (alt "}" ":)"))
+ (terminal gap_free_symbol (alt (diff terminal_character (range "'\"")) terminal_string)))
diff --git a/examples/isoebnf/meta.rb b/examples/isoebnf/meta.rb
new file mode 100644
index 0000000..85b067a
--- /dev/null
+++ b/examples/isoebnf/meta.rb
@@ -0,0 +1,75 @@
+# This file is automatically generated by ebnf version 2.0.0
+# Derived from ../../etc/iso-ebnf.ebnf
+module ISOEBNFMeta
+ RULES = [
+ EBNF::Rule.new(:syntax, nil, [:star, :syntax_rule]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:syntax_rule, nil, [:seq, :meta_identifier, :defining_symbol, :definitions_list, :terminator_symbol]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:definitions_list, nil, [:seq, :single_definition, :_definitions_list_1]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_definitions_list_1, nil, [:star, :_definitions_list_2]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_definitions_list_2, nil, [:seq, :definition_separator_symbol, :definitions_list]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:single_definition, nil, [:seq, :term, :_single_definition_1]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_single_definition_1, nil, [:star, :_single_definition_2]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_single_definition_2, nil, [:seq, ",", :term]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:term, nil, [:seq, :factor, :_term_1]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_term_1, nil, [:opt, :_term_2]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_term_2, nil, [:seq, "-", :exception]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:exception, nil, [:seq, :factor]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:factor, nil, [:seq, :_factor_1, :primary]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_factor_1, nil, [:opt, :_factor_2]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_factor_2, nil, [:seq, :integer, "*"]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:primary, nil, [:alt, :optional_sequence, :repeated_sequence, :special_sequence, :grouped_sequence, :meta_identifier, :terminal_string, :empty]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:optional_sequence, nil, [:seq, :start_option_symbol, :definitions_list, :end_option_symbol]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:repeated_sequence, nil, [:seq, :start_repeat_symbol, :definitions_list, :end_repeat_symbol]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:grouped_sequence, nil, [:seq, "(", :definitions_list, ")"]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:terminal_string, nil, [:alt, :_terminal_string_1, :_terminal_string_2], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_terminal_string_1, nil, [:seq, "'", :_terminal_string_3, "'"]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_terminal_string_3, nil, [:plus, :first_terminal_character]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_terminal_string_2, nil, [:seq, "\"", :_terminal_string_4, "\""]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_terminal_string_4, nil, [:plus, :second_terminal_character]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:meta_identifier, nil, [:seq, :letter, :_meta_identifier_1], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_meta_identifier_1, nil, [:star, :meta_identifier_character]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:integer, nil, [:plus, :decimal_digit], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:special_sequence, nil, [:seq, "?", :_special_sequence_1, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_special_sequence_1, nil, [:star, :special_sequence_character]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:comment, nil, [:seq, :start_comment_symbol, :_comment_1, :end_comment_symbol], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_comment_1, nil, [:star, :comment_symbol]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:comment_symbol, nil, [:alt, :comment, :commentless_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:commentless_symbol, nil, [:alt, :terminal_character, :meta_identifier, :integer, :terminal_string, :special_sequence], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:letter, nil, [:range, "a-zA-Z"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:decimal_digit, nil, [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:meta_identifier_character, nil, [:alt, :letter, :decimal_digit, "_"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:first_terminal_character, nil, [:diff, :terminal_character, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:second_terminal_character, nil, [:diff, :terminal_character, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:special_sequence_character, nil, [:diff, :terminal_character, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:terminal_character, nil, [:alt, :letter, :decimal_digit, :concatenate_symbol, :defining_symbol, :definition_separator_symbol, :end_comment_symbol, :end_group_symbol, :end_option_symbol, :end_repeat_symbol, :except_symbol, :first_quote_symbol, :repetition_symbol, :second_quote_symbol, :special_sequence_symbol, :start_comment_symbol, :start_group_symbol, :start_option_symbol, :start_repeat_symbol, :terminator_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:other_character, nil, [:alt, :_other_character_1, "\\"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_other_character_1, nil, [:range, ":+_%@&$<>^` ̃#x20#x23"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:gap_separator, nil, [:range, "#x9#xa#xb#xc#xd#x20"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_pass, nil, [:alt, :__pass_1, :comment], kind: :pass).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:__pass_1, nil, [:plus, :gap_separator]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:empty, nil, [:seq, ""], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:concatenate_symbol, nil, [:seq, ","], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:repetition_symbol, nil, [:seq, "*"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:except_symbol, nil, [:seq, "-"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:first_quote_symbol, nil, [:seq, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:second_quote_symbol, nil, [:seq, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:start_comment_symbol, nil, [:seq, "(*"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:end_comment_symbol, nil, [:seq, "*)"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:start_group_symbol, nil, [:seq, "("], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:end_group_symbol, nil, [:seq, ")"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:special_sequence_symbol, nil, [:seq, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:defining_symbol, nil, [:alt, "=", ":"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:definition_separator_symbol, nil, [:alt, "|", "/", "!"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:terminator_symbol, nil, [:alt, ";", "."], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:start_option_symbol, nil, [:seq, "["], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:end_option_symbol, nil, [:seq, "]"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:start_repeat_symbol, nil, [:alt, "{", "(:"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:end_repeat_symbol, nil, [:alt, "}", ":)"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:gap_free_symbol, nil, [:alt, :_gap_free_symbol_1, :terminal_string], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_gap_free_symbol_1, nil, [:seq, :_gap_free_symbol_3, :terminal_character]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_gap_free_symbol_3, nil, [:not, :_gap_free_symbol_2]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_gap_free_symbol_2, nil, [:range, "'\""], kind: :terminal).extend(EBNF::PEG::Rule),
+ ]
+end
+
diff --git a/examples/isoebnf/parse b/examples/isoebnf/parse
new file mode 100755
index 0000000..c00eae1
--- /dev/null
+++ b/examples/isoebnf/parse
@@ -0,0 +1,53 @@
+#!/usr/bin/env ruby
+# parse --- Process EBNF to generate AST S-Expression
+
+$:.unshift(File.expand_path("../../../lib", __FILE__))
+$:.unshift(File.expand_path("..", __FILE__))
+require 'rubygems'
+require 'getoptlong'
+require 'parser'
+require 'sxp'
+
+out = STDOUT
+
+OPT_ARGS = [
+ ["--evaluate","-e", GetoptLong::REQUIRED_ARGUMENT, "Evaluate argument"],
+ ["--trace", GetoptLong::OPTIONAL_ARGUMENT, "Trace output level (0-3)"],
+ ["--help", "-?", GetoptLong::NO_ARGUMENT, "This message"]
+]
+def usage
+ require 'ebnf'
+ STDERR.puts %{#{$0} Version #{EBNF::VERSION}}
+ STDERR.puts %{Usage: #{$0} [options] file ...}
+ width = OPT_ARGS.map do |o|
+ l = o.first.length
+ l += o[1].length + 2 if o[1].is_a?(String)
+ l
+ end.max
+ OPT_ARGS.each do |o|
+ s = " %-*s " % [width, (o[1].is_a?(String) ? "#{o[0,2].join(', ')}" : o[0])]
+ s += o.last
+ STDERR.puts s
+ end
+ exit(1)
+end
+
+options = {}
+input = nil
+
+opts = GetoptLong.new(*OPT_ARGS.map {|o| o[0..-2]})
+
+opts.each do |opt, arg|
+ case opt
+ when '--evaluate' then input = arg
+ when '--trace' then options[:level] = arg.to_i
+ when '--help' then usage
+ end
+end
+
+input = File.open(ARGV[0]) if ARGV[0]
+
+# Collect rules
+ebnf = ISOEBNFPegParser.new(input || STDIN, **options)
+
+puts ebnf.to_sxp
diff --git a/examples/isoebnf/parser.rb b/examples/isoebnf/parser.rb
new file mode 100644
index 0000000..3123876
--- /dev/null
+++ b/examples/isoebnf/parser.rb
@@ -0,0 +1,238 @@
+# # EBNF Parser for EISO BNF.
+#
+# Produces an Abstract Synatx Tree in S-Expression form for the input grammar file
+require 'ebnf'
+require 'ebnf/terminals'
+require 'ebnf/peg/parser'
+require 'meta'
+require 'sxp'
+require 'logger'
+
+class ISOEBNFPegParser
+ include EBNF::PEG::Parser
+
+ # The base for terminal-character, which omits "'", '"', and '?'.
+ # Could be more optimized, and one might quible
+ # with the overly-strictly defined character set,
+ # but it is correct.
+ TERMINAL_CHARACTER_BASE = %r{
+ [a-zA-Z0-9] | # letter | decimal digit
+ , | # concatenate symbol
+ = | # defining symbol
+ [\|\/!] | # definition separator symbol
+ \*\) | # end comment symbol
+ \) | # end group symbol
+ \] | # end option symbol
+ \} | # end repeat symbol
+ \- | # except symbol
+ #\' | # first quote symbol
+ \* | # repetition symbol
+ #\" | # second quote symbol
+ #\? | # special sequence symbol
+ \(\* | # start comment symbol
+ \( | # start group symbol
+ \[ | # start option symbol
+ \{ | # start repeat symbol
+ [;\.] | # terminator symbol
+ [:+_%@&$<>^\x20\x23\\`~] # other character
+ }x
+
+ TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"\?]}
+ FIRST_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|["\?]}
+ SECOND_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['\?]}
+ SPECIAL_SEQUENCE_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"]}
+
+ # Abstract syntax tree from parse
+ #
+ # @return [Array]
+ attr_reader :ast
+
+ # `[14] integer ::= decimal_digit+`
+ terminal(:integer, /\d+/) do |value, prod|
+ value.to_i
+ end
+
+ # `[15] meta_identifier ::= letter meta_identifier_character*`
+ terminal(:meta_identifier, /[a-zA-Z][a-zA-Z0-9_]*/) do |value|
+ value.to_sym
+ end
+
+ # `[17] terminal_string ::= ("'" first_terminal_character+ "'")`
+ # ` | ('"' second_terminal_character+ '"')`
+ terminal(:terminal_string, /(?:'#{FIRST_TERMINAL_CHARACTER}+')|(?:"#{SECOND_TERMINAL_CHARACTER}+")/x) do |value|
+ value[1..-2]
+ end
+
+ # `[20] special_sequence ::= '?' special_sequence_character* '?'`
+ terminal(:special_sequence, /\?#{SPECIAL_SEQUENCE_CHARACTER}+\?/)
+
+ # `[22] terminal_character ::= [a-zA-Z0-9]`
+ # ` | [,=;*}#x2d?([{;]`
+ # ` | '*)'`
+ # ` | '(*'`
+ # ` | ']'`
+ # ` | other_character`
+ terminal(:terminal_character, TERMINAL_CHARACTER)
+
+ # `[25] empty ::= ''`
+ terminal(:empty, //)
+
+ # `[26] definition_separator_symbol ::= '|' | '/' | '!'`
+ terminal(:definition_separator_symbol, /[\|\/!]/)
+
+ # `[27] terminator_symbol ::= ';' | '.'`
+ terminal(:terminator_symbol, /[;\.]/)
+
+ # `[28] start_option_symbol ::= '['
+ terminal(:start_option_symbol, /\[|(?:\(\/)/)
+
+ # `[29] end_option_symbol ::= ']'`
+ terminal(:end_option_symbol, /\]/)
+
+ # `[30] start_repeat_symbol ::= '{' | '(:'`
+ terminal(:start_repeat_symbol, /{|\(:/)
+
+ # `[31] end_repeat_symbol ::= '}' | ':)'`
+ terminal(:end_repeat_symbol, /}|:\)/)
+
+ # ## Non-terminal productions
+
+ # `[2] syntax_rule ::= meta_identifier '=' definitions_list terminator_symbol`
+ production(:syntax_rule, clear_packrat: true) do |value, data, callback|
+ # value contains an expression.
+ # Invoke callback
+ sym = value[0][:meta_identifier]
+ definitions_list = value[2][:definitions_list]
+ callback.call(:rule, EBNF::Rule.new(sym.to_sym, nil, definitions_list))
+ nil
+ end
+
+ # Setting `as_hash: true` in the start production makes the value of the form of a hash, rather than an array of hashes.
+ #
+ # `[3] definitions_list ::= single_definition (definition_separator_symbol definitions_list)*`
+ start_production(:definitions_list, as_hash: true)
+ production(:definitions_list) do |value|
+ if value[:_definitions_list_1].length > 0
+ [:alt, value[:single_definition]] + value[:_definitions_list_1]
+ else
+ value[:single_definition]
+ end
+ end
+ production(:_definitions_list_1) do |value|
+ Array(value.first)
+ end
+ start_production(:_definitions_list_2, as_hash: true)
+ production(:_definitions_list_2) do |value|
+ if Array(value[:definitions_list]).first == :alt
+ value[:definitions_list][1..-1]
+ else
+ [value[:definitions_list]]
+ end
+ end
+
+ # `[4] single_definition ::= term (',' term)*`
+ start_production(:single_definition, as_hash: true)
+ production(:single_definition) do |value|
+ if value[:_single_definition_1].length > 0
+ [:seq, value[:term]] + value[:_single_definition_1]
+ else
+ value[:term]
+ end
+ end
+ production(:_single_definition_1) do |value|
+ value.map {|a1| a1.last[:term]}.compact # Get rid of '|'
+ end
+
+ # `[5] term ::= factor ('-' exception)?`
+ start_production(:term, as_hash: true)
+ production(:term) do |value|
+ if value[:_term_1]
+ [:diff, value[:factor], value[:_term_1]]
+ else
+ value[:factor]
+ end
+ end
+ production(:_term_1) do |value|
+ value.last[:exception] if value
+ end
+
+ # `[6] exception ::= factor`
+ start_production(:exception, as_hash: true)
+ production(:exception) do |value|
+ value[:factor]
+ end
+
+ # `[7] factor ::= (integer '*')? primary`
+ start_production(:factor, as_hash: true)
+ production(:factor) do |value|
+ if value[:_factor_1]
+ [:rept, value[:_factor_1], value[:_factor_1], value[:primary]]
+ else
+ value[:primary]
+ end
+ end
+ production(:_factor_2) do |value|
+ value.first[:integer]
+ end
+
+ # `[9] optional_sequence ::= start_option_symbol definitions_list end_option_symbol`
+ production(:optional_sequence) do |value|
+ [:opt, value[1][:definitions_list]]
+ end
+
+ # `[10] repeated_sequence ::= start_repeat_symbol definitions_list end_repeat_symbol`
+ production(:repeated_sequence) do |value|
+ [:star, value[1][:definitions_list]]
+ end
+
+ # `[11] grouped_sequence ::= '(' definitions_list ')'`
+ production(:grouped_sequence) do |value|
+ [:seq, value[1][:definitions_list]]
+ end
+
+ # ## Parser invocation.
+ # On start, yield ourselves if a block is given, otherwise, return this parser instance
+ #
+ # @param [#read, #to_s] input
+ # @param [Hash{Symbol => Object}] options
+ # @option options [Boolean] :level
+ # Trace level. 0(debug), 1(info), 2(warn), 3(error).
+ # @return [EBNFParser]
+ def initialize(input, **options, &block)
+ # If the `level` option is set, instantiate a logger for collecting trace information.
+ if options.has_key?(:level)
+ options[:logger] = Logger.new(STDERR)
+ options[:logger].level = options[:level]
+ options[:logger].formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"}
+ end
+
+ # Read input, if necessary, which will be used in a Scanner.
+ @input = input.respond_to?(:read) ? input.read : input.to_s
+
+ parsing_terminals = false
+ @ast = []
+ parse(@input,
+ :syntax,
+ ISOEBNFMeta::RULES,
+ whitespace: %r{([\x09-\x0d\x20]|(?:\(\*(?:(?:\*[^\)])|[^*])*\*\)))+},
+ **options
+ ) do |context, *data|
+ rule = case context
+ when :rule
+ # A rule which has already been turned into a `Rule` object.
+ rule = data.first
+ rule.kind = :terminal if parsing_terminals
+ rule
+ end
+ @ast << rule if rule
+ end
+ @ast
+ end
+
+ # Output formatted S-Expression of grammar
+ def to_sxp
+ require 'sxp' unless defined?(SXP)
+ # Output rules as a formatted S-Expression
+ SXP::Generator.string(@ast.map(&:for_sxp))
+ end
+end
diff --git a/lib/ebnf.rb b/lib/ebnf.rb
index 75bb5bc..dce92bd 100755
--- a/lib/ebnf.rb
+++ b/lib/ebnf.rb
@@ -1,7 +1,10 @@
module EBNF
+ autoload :ABNF, "ebnf/abnf"
autoload :Base, "ebnf/base"
autoload :BNF, "ebnf/bnf"
+ autoload :ISOEBNF, "ebnf/isoebnf"
autoload :LL1, "ebnf/ll1"
+ autoload :Native, "ebnf/native"
autoload :Parser, "ebnf/parser"
autoload :PEG, "ebnf/peg"
autoload :Rule, "ebnf/rule"
diff --git a/lib/ebnf/abnf.rb b/lib/ebnf/abnf.rb
new file mode 100644
index 0000000..ac06d8c
--- /dev/null
+++ b/lib/ebnf/abnf.rb
@@ -0,0 +1,301 @@
+require_relative 'abnf/core'
+require_relative 'abnf/meta'
+require 'logger'
+
+# ABNF parser
+# Parses ABNF into an array of {EBNF::Rule}.
+module EBNF
+ class ABNF
+ include EBNF::PEG::Parser
+
+ # Regular expressions for both "Core" and ABNF-specific terminals.
+ ALPHA = %r{[\x41-\x5A\x61-\x7A]}
+ VCHAR = %r{[\x20-\x7E]}
+ WSP = %r{[\x20\x09]}
+ CRLF = %r{\x0D?\x0A}
+ COMMENT = %r{;(?:#{WSP}|#{VCHAR})*#{CRLF}}
+ C_NL = %r{#{COMMENT}|#{CRLF}}
+ C_WSP = %r{#{WSP}|(?:#{C_NL}#{WSP})}
+
+ ##
+ # Hash of generated {EBNF::Rule} objects by symbol
+ #
+ # @return [Hash{Symbol => EBNF::Rule}]
+ attr_reader :parsed_rules
+
+ ##
+ # The following ABNF grammar rules are treated as terminals.
+
+ # `rulename ::= ALPHA (ALPHA | DIGIT | "-")*`
+ terminal(:rulename, /#{ALPHA}(?:#{ALPHA}|[0-9-])*/) do |value|
+ value.to_sym
+ end
+
+ # `defined_as ::= c_wsp* ("=" | "=/") c_wsp*`
+ terminal(:defined_as, /#{C_WSP}*=\/?#{C_WSP}*/) {|value| value.strip}
+
+ # `quoted_string::= DQUOTE [#x20-#x21#x23-#x7E]* DQUOTE`
+ terminal(:quoted_string, /"[\x20-\x21\x23-\x7E]*"/) do |value|
+ value[1..-2]
+ end
+
+ # `bin_val ::= "b" BIT+ (("." BIT+)+ | ("-" BIT+))?`
+ terminal(:bin_val, /b[01]+(?:(?:(?:\.[01]+)+)|(?:-[01]+))?/) do |value|
+ if value.include?('.')
+ # Interpret segments in binary creating a sequence of hex characters or a string
+ hex_or_string(value[1..-1].split('.').map {|b| b.to_i(base=2).chr(Encoding::UTF_8)})
+ elsif value.include?('-')
+ # Interpret as a range
+ [:range, value[1..-1].split('-').map {|b| "#x%x" % b.to_i(base=2)}.join("-")]
+ else
+ # Interpret as a single HEX character
+ [:hex, "#x%x" % value[1..-1].to_i(base=2)]
+ end
+ end
+
+ # `dec_val ::= "d" DIGIT+ (("." DIGIT+)+ | ("-" DIGIT+))?`
+ terminal(:dec_val, /d[0-9]+(?:(?:(?:\.[0-9]+)+)|(?:-[0-9]+))?/) do |value|
+ if value.include?('.')
+ # Interpret segments in decimal creating a sequence of hex characters or a string
+ hex_or_string(value[1..-1].split('.').map {|b| b.to_i.chr(Encoding::UTF_8)})
+ elsif value.include?('-')
+ # Interpret as a range
+ [:range, value[1..-1].split('-').map {|d| "#x%x" % d.to_i}.join("-")]
+ else
+ # Interpret as a single HEX character
+ [:hex, "#x%x" % value[1..-1].to_i]
+ end
+ end
+
+ # `hex_val ::= "x" HEXDIG+ (("." HEXDIG+)+ | ("-" HEXDIG+))?`
+ terminal(:hex_val, /x[0-9A-F]+(?:(?:(?:\.[0-9A-F]+)+)|(?:-[0-9A-F]+))?/i) do |value|
+ if value.include?('.')
+ # Interpret segments in hexadecimal creating a sequence of hex characters or a string
+ hex_or_string(value[1..-1].split('.').map {|b| b.to_i(base=16).chr(Encoding::UTF_8)})
+ elsif value.include?('-')
+ # Interpret as a range
+ [:range, value[1..-1].split('-').map {|h| "#x%x" % h.to_i(base=16)}.join("-")]
+ else
+ # Interpret as a single HEX character
+ [:hex, "#x#{value[1..-1]}"]
+ end
+ end
+
+ # `c_wsp ::= WSP | (c_nl WSP)`
+ terminal(:c_wsp, C_WSP)
+
+ # `c_nl ::= comment | CRLF`
+ terminal(:c_nl, C_NL)
+
+ # `DIGIT ::= [#x30-#x39]`
+ terminal(:DIGIT, /\d/)
+
+ # ## Non-terminal productions
+
+ # The `start_production` on `:rule` allows the parser to present the value as a single Hash, rather than an array of individual hashes.
+ start_production(:rule, as_hash: true)
+
+ # `rule ::= rulename defined_as elements c_nl`
+ production(:rule) do |value|
+ # value contains an expression.
+ # Invoke callback
+ sym = value[:rulename]
+ elements = value[:elements]
+
+ if value[:defined_as] == "=/"
+ # append to rule alternate
+ rule = parsed_rules.fetch(sym) {raise "No existing rule found for #{sym}"}
+ rule.expr = [:alt, rule.expr] unless rule.alt?
+ if elements.is_a?(Array) && elements.first == :alt
+ # append alternatives to rule
+ rule.expr.concat(elements[1..-1])
+ else
+ # add elements as last alternative
+ rule.expr.push(elements)
+ end
+ else
+ # There shouldn't be an existing rule
+ raise "Redefining rule #{sym}" if parsed_rules.has_key?(sym)
+ parsed_rules[sym] = EBNF::Rule.new(sym.to_sym, nil, elements)
+ end
+ progress(:rule, level: 2) {parsed_rules[sym].to_sxp}
+ sym
+ end
+
+ # `elements ::= alternation c_wsp*`
+ production(:elements) do |value|
+ value.first[:alternation]
+ end
+
+ # `alternation ::= concatenation (c_wsp* "/" c_wsp* concatenation)*`
+ production(:alternation) do |value|
+ unless value.last[:_alternation_1].empty?
+ [:alt, value.first[:concatenation]] + value.last[:_alternation_1]
+ else
+ value.first[:concatenation]
+ end
+ end
+
+ # The `_aleteration_2` rule comes from the expanded PEG grammar and serves as an opportunity to custommize the values presented to the `aleteration` rule.
+ production(:_alternation_2) do |value|
+ if Array(value.last[:concatenation]).first == :alt
+ value.last[:concatenation][1..-1]
+ else
+ [value.last[:concatenation]]
+ end
+ value.last[:concatenation]
+ end
+
+ # `concatenation::= repetition (c_wsp+ repetition)*`
+ production(:concatenation) do |value|
+ unless value.last[:_concatenation_1].empty?
+ [:seq, value.first[:repetition]] + value.last[:_concatenation_1]
+ else
+ value.first[:repetition]
+ end
+ end
+ start_production(:_concatenation_2, as_hash: true)
+ production(:_concatenation_2) do |value|
+ value[:repetition]
+ end
+
+ # `repetition ::= repeat? element`
+ production(:repetition) do |value|
+ rept = value.first[:_repetition_1]
+ elt = value.last[:element]
+ case rept
+ when [0, '*'] then [:star, elt]
+ when [1, '*'] then [:plus, elt]
+ when nil then elt
+ else
+ [:rept, rept.first, rept.last, elt]
+ end
+ end
+
+ # `repeat ::= DIGIT+ | (DIGIT* "*" DIGIT*)`
+ production(:repeat) do |value|
+ if value.is_a?(Integer)
+ [value, value]
+ else
+ [value.first, value.last]
+ end
+ end
+ start_production(:_repeat_1, as_hash: true)
+ production(:_repeat_1) {|value| value.values}
+ production(:_repeat_2) {|value| value.join("").to_i}
+ production(:_repeat_3) {|value| value.join("").to_i}
+ production(:_repeat_4) {|value| value.length > 0 ? value.join("").to_i : '*'}
+
+ # `element ::= rulename | group | option | char_val | num_val | prose_val`
+ production(:element) do |value|
+ value
+ end
+
+ # `group ::= "(" c_wsp* alternation c_wsp* ")"`
+ start_production(:group, as_hash: true)
+ production(:group) do |value|
+ value[:alternation]
+ end
+
+ # `option ::= "[" c_wsp* alternation c_wsp* "]"`
+ start_production(:option, as_hash: true)
+ production(:option) do |value|
+ [:opt, value[:alternation]]
+ end
+
+ # `case_insensitive_string ::= "%i"? quoted_string`
+ production(:case_insensitive_string) do |value|
+ str = value.last[:quoted_string]
+ if str.match?(/[[:alpha:]]/)
+ # Only need to use case-insensitive if there are alphabetic characters in the string.
+ [:istr, value.last[:quoted_string]]
+ else
+ value.last[:quoted_string]
+ end
+ end
+
+ # `case_sensitive_string ::= "%s" quoted_string`
+ production(:case_sensitive_string) do |value|
+ value.last[:quoted_string]
+ end
+
+ # `num_val ::= "%" (bin_val | dec_val | hex_val)`
+ production(:num_val) do |value|
+ value.last[:_num_val_1]
+ end
+
+ # ## Parser invocation.
+ # On start, yield ourselves if a block is given, otherwise, return this parser instance
+ #
+ # @param [#read, #to_s] input
+ # @param [Hash{Symbol => Object}] options
+ # @option options [Boolean] :level
+ # Trace level. 0(debug), 1(info), 2(warn), 3(error).
+ # @return [EBNFParser]
+ def initialize(input, **options)
+ # If the `level` option is set, instantiate a logger for collecting trace information.
+ if options.has_key?(:level)
+ options[:logger] = Logger.new(STDERR)
+ options[:logger].level = options[:level]
+ options[:logger].formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"}
+ end
+
+ # Read input, if necessary, which will be used in a Scanner.
+ @input = input.respond_to?(:read) ? input.read : input.to_s
+
+ @parsed_rules = {}
+
+ # Parses into `@parsed_rules`
+ parse(@input,
+ :rulelist, # Starting rule
+ ABNFMeta::RULES, # PEG rules
+ whitespace: '', # No implicit whitespace
+ **options)
+ rescue EBNF::PEG::Parser::Error => e
+ raise SyntaxError, e.message
+ end
+
+ ##
+ # The AST includes the parsed rules along with built-in rules for ABNF used within the parsed grammar.
+ #
+ # @return [Array]
+ def ast
+ # Add built-in rules for standard ABNF rules not
+ parsed_rules.values.map(&:symbols).flatten.uniq.each do |sym|
+ rule = ABNFCore::RULES.detect {|r| r.sym == sym}
+ parsed_rules[sym] ||= rule if rule
+ end
+
+ parsed_rules.values
+ end
+
+ private
+ # Generate a combination of seq and string to represent a sequence of characters
+ #
+ # @param [Array] characters
+ # @return [String,Array]
+ def hex_or_string(characters)
+ seq = [:seq]
+ str_result = ""
+ characters.each do |c|
+ if VCHAR.match?(c)
+ str_result << c
+ else
+ if str_result.length > 0
+ seq << str_result
+ str_result = ""
+ end
+ seq << [:hex, "#x%x" % c.codepoints.first]
+ end
+ end
+ seq << str_result if str_result.length > 0
+
+ # Either return the sequence, or a string
+ if seq.length == 2 && seq.last.is_a?(String)
+ seq.last
+ else
+ seq
+ end
+ end
+ end
+end
\ No newline at end of file
diff --git a/lib/ebnf/abnf/core.rb b/lib/ebnf/abnf/core.rb
new file mode 100644
index 0000000..935343e
--- /dev/null
+++ b/lib/ebnf/abnf/core.rb
@@ -0,0 +1,23 @@
+# This file is automatically generated by ebnf version 2.0.0
+# Derived from etc/abnf-core.ebnf
+module ABNFCore
+ RULES = [
+ EBNF::Rule.new(:ALPHA, nil, [:range, "#x41-#x5A#x61-#x7A"], kind: :terminal),
+ EBNF::Rule.new(:BIT, nil, [:alt, "0", "1"], kind: :terminal),
+ EBNF::Rule.new(:CHAR, nil, [:range, "#x01-#x7F"], kind: :terminal),
+ EBNF::Rule.new(:CR, nil, [:hex, "#x0D"], kind: :terminal),
+ EBNF::Rule.new(:CRLF, nil, [:seq, [:opt, :CR], :LF], kind: :terminal),
+ EBNF::Rule.new(:CTL, nil, [:alt, [:range, "#x00-#x1F"], [:hex, "#x7F"]], kind: :terminal),
+ EBNF::Rule.new(:DIGIT, nil, [:range, "#x30-#x39"], kind: :terminal),
+ EBNF::Rule.new(:DQUOTE, nil, [:hex, "#x22"], kind: :terminal),
+ EBNF::Rule.new(:HEXDIG, nil, [:alt, :DIGIT, [:range, "A-F"]], kind: :terminal),
+ EBNF::Rule.new(:HTAB, nil, [:hex, "#x09"], kind: :terminal),
+ EBNF::Rule.new(:LF, nil, [:hex, "#x0A"], kind: :terminal),
+ EBNF::Rule.new(:LWSP, nil, [:star, [:alt, :WSP, [:seq, :CRLF, :WSP]]], kind: :terminal),
+ EBNF::Rule.new(:OCTET, nil, [:range, "#x00-#xFF"], kind: :terminal),
+ EBNF::Rule.new(:SP, nil, [:hex, "#x20"], kind: :terminal),
+ EBNF::Rule.new(:VCHAR, nil, [:range, "#x21-#x7E"], kind: :terminal),
+ EBNF::Rule.new(:WSP, nil, [:alt, :SP, :HTAB], kind: :terminal),
+ ]
+end
+
diff --git a/lib/ebnf/abnf/meta.rb b/lib/ebnf/abnf/meta.rb
new file mode 100644
index 0000000..b5ce638
--- /dev/null
+++ b/lib/ebnf/abnf/meta.rb
@@ -0,0 +1,111 @@
+# This file is automatically generated by ebnf version 2.0.0
+# Derived from abnf.ebnf
+module ABNFMeta
+ RULES = [
+ EBNF::Rule.new(:rulelist, nil, [:plus, :_rulelist_1]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_rulelist_1, nil, [:alt, :rule, :_rulelist_2]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_rulelist_2, nil, [:seq, :_rulelist_3, :c_nl]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_rulelist_3, nil, [:star, :c_wsp]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:rule, nil, [:seq, :rulename, :defined_as, :elements, :c_nl]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:elements, nil, [:seq, :alternation, :_elements_1]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_elements_1, nil, [:star, :c_wsp]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:alternation, nil, [:seq, :concatenation, :_alternation_1]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_alternation_1, nil, [:star, :_alternation_2]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_alternation_2, nil, [:seq, :_alternation_3, "/", :_alternation_4, :concatenation]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_alternation_3, nil, [:star, :c_wsp]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_alternation_4, nil, [:star, :c_wsp]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:concatenation, nil, [:seq, :repetition, :_concatenation_1]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_concatenation_1, nil, [:star, :_concatenation_2]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_concatenation_2, nil, [:seq, :_concatenation_3, :repetition]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_concatenation_3, nil, [:plus, :c_wsp]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:repetition, nil, [:seq, :_repetition_1, :element]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_repetition_1, nil, [:opt, :repeat]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:repeat, nil, [:alt, :_repeat_1, :_repeat_2]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_repeat_1, nil, [:seq, :_repeat_3, "*", :_repeat_4]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_repeat_3, nil, [:star, :DIGIT]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_repeat_4, nil, [:star, :DIGIT]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_repeat_2, nil, [:plus, :DIGIT]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:element, nil, [:alt, :rulename, :group, :option, :char_val, :num_val, :prose_val]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:group, nil, [:seq, "(", :_group_1, :alternation, :_group_2, ")"]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_group_1, nil, [:star, :c_wsp]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_group_2, nil, [:star, :c_wsp]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:option, nil, [:seq, "[", :_option_1, :alternation, :_option_2, "]"]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_option_1, nil, [:star, :c_wsp]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_option_2, nil, [:star, :c_wsp]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:char_val, nil, [:alt, :case_insensitive_string, :case_sensitive_string]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:case_insensitive_string, nil, [:seq, :_case_insensitive_string_1, :quoted_string]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_case_insensitive_string_1, nil, [:opt, "%i"]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:case_sensitive_string, nil, [:seq, "%s", :quoted_string]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:num_val, nil, [:seq, "%", :_num_val_1]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_num_val_1, nil, [:alt, :bin_val, :dec_val, :hex_val]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:rulename, nil, [:seq, :ALPHA, :_rulename_1], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_rulename_1, nil, [:star, :_rulename_2]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_rulename_2, nil, [:alt, :ALPHA, :DIGIT, "-"]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:defined_as, nil, [:seq, :_defined_as_1, :_defined_as_2, :_defined_as_3], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_defined_as_1, nil, [:star, :c_wsp]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_defined_as_2, nil, [:alt, "=", "=/"]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_defined_as_3, nil, [:star, :c_wsp]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:c_wsp, nil, [:alt, :WSP, :_c_wsp_1], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_c_wsp_1, nil, [:seq, :c_nl, :WSP]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:c_nl, nil, [:alt, :COMMENT, :CRLF], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:comment, nil, [:seq, ";", :_comment_1, :CRLF], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_comment_1, nil, [:star, :_comment_2]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_comment_2, nil, [:alt, :WSP, :VCHAR]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:quoted_string, nil, [:seq, :DQUOTE, :_quoted_string_1, :DQUOTE], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_quoted_string_1, nil, [:star, :_quoted_string_2]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_quoted_string_2, nil, [:range, "#x20-#x21#x23-#x7E"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:bin_val, nil, [:seq, "b", :_bin_val_1, :_bin_val_2], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_bin_val_1, nil, [:plus, :BIT]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_bin_val_2, nil, [:opt, :_bin_val_3]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_bin_val_3, nil, [:alt, :_bin_val_4, :_bin_val_5]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_bin_val_4, nil, [:plus, :_bin_val_6]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_bin_val_6, nil, [:seq, ".", :_bin_val_7]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_bin_val_7, nil, [:plus, :BIT]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_bin_val_5, nil, [:seq, "-", :_bin_val_8]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_bin_val_8, nil, [:plus, :BIT]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:dec_val, nil, [:seq, "d", :_dec_val_1, :_dec_val_2], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_dec_val_1, nil, [:plus, :DIGIT]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_dec_val_2, nil, [:opt, :_dec_val_3]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_dec_val_3, nil, [:alt, :_dec_val_4, :_dec_val_5]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_dec_val_4, nil, [:plus, :_dec_val_6]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_dec_val_6, nil, [:seq, ".", :_dec_val_7]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_dec_val_7, nil, [:plus, :DIGIT]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_dec_val_5, nil, [:seq, "-", :_dec_val_8]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_dec_val_8, nil, [:plus, :DIGIT]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:hex_val, nil, [:seq, "x", :_hex_val_1, :_hex_val_2], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_hex_val_1, nil, [:plus, :HEXDIG]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_hex_val_2, nil, [:opt, :_hex_val_3]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_hex_val_3, nil, [:alt, :_hex_val_4, :_hex_val_5]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_hex_val_4, nil, [:plus, :_hex_val_6]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_hex_val_6, nil, [:seq, ".", :_hex_val_7]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_hex_val_7, nil, [:plus, :HEXDIG]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_hex_val_5, nil, [:seq, "-", :_hex_val_8]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_hex_val_8, nil, [:plus, :HEXDIG]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:prose_val, nil, [:seq, "<", :_prose_val_1, ">"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_prose_val_1, nil, [:star, :_prose_val_2]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_prose_val_2, nil, [:range, "#x20-#x3D#x3F-#x7E"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:ALPHA, nil, [:range, "#x41-#x5A#x61-#x7A"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:BIT, nil, [:alt, "0", "1"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:CHAR, nil, [:range, "#x01-#x7F"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:CR, nil, [:hex, "#x0D"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:CRLF, nil, [:seq, :_CRLF_1, :LF], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_CRLF_1, nil, [:opt, :CR], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:CTL, nil, [:alt, :_CTL_1, :_CTL_2], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_CTL_1, nil, [:range, "#x00-#x1F"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_CTL_2, nil, [:hex, "#x7F"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:DIGIT, nil, [:range, "#x30-#x39"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:DQUOTE, nil, [:hex, "#x22"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:HEXDIG, nil, [:alt, :DIGIT, :_HEXDIG_1], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_HEXDIG_1, nil, [:range, "A-F"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:HTAB, nil, [:hex, "#x09"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:LF, nil, [:hex, "#x0A"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:LWSP, nil, [:star, :_LWSP_1], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_LWSP_1, nil, [:alt, :WSP, :_LWSP_2], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_LWSP_2, nil, [:seq, :CRLF, :WSP], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:OCTET, nil, [:range, "#x00-#xFF"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:SP, nil, [:hex, "#x20"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:VCHAR, nil, [:range, "#x21-#x7E"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:WSP, nil, [:alt, :SP, :HTAB], kind: :terminal).extend(EBNF::PEG::Rule),
+ ]
+end
+
diff --git a/lib/ebnf/base.rb b/lib/ebnf/base.rb
index d16edd4..90d8f71 100644
--- a/lib/ebnf/base.rb
+++ b/lib/ebnf/base.rb
@@ -66,22 +66,6 @@
# [Cwm Release 1.1.0rc1]: https://lists.w3.org/Archives/Public/public-cwm-announce/2005JulSep/0000.html
# [bnf-rules.n3]: https://www.w3.org/2000/10/swap/grammar/bnf-rules.n3
#
-# Open Issues and Future Work
-# ---------------------------
-#
-# The yacker output also has the terminals compiled to elaborate regular
-# expressions. The best strategy for dealing with lexical tokens is not
-# yet clear. Many tokens in SPARQL are case insensitive; this is not yet
-# captured formally.
-#
-# The schema for the EBNF vocabulary used here (``g:seq``, ``g:alt``, ...)
-# is not yet published; it should be aligned with [swap/grammar/bnf][]
-# and the [bnf2html.n3][] rules (and/or the style of linked XHTML grammar
-# in the SPARQL and XML specificiations).
-#
-# It would be interesting to corroborate the claim in the SPARQL spec
-# that the grammar is LL(1) with a mechanical proof based on N3 rules.
-#
# [swap/grammar/bnf]: https://www.w3.org/2000/10/swap/grammar/bnf
# [bnf2html.n3]: https://www.w3.org/2000/10/swap/grammar/bnf2html.n3
#
@@ -100,7 +84,7 @@ module EBNF
class Base
include BNF
include LL1
- include Parser
+ include Native
include PEG
# Abstract syntax tree from parse
@@ -118,23 +102,32 @@ class Base
#
# @param [#read, #to_s] input
# @param [Symbol] format (:ebnf)
- # Format of input, one of :ebnf, or :sxp
+ # Format of input, one of `:abnf`, `:ebnf`, `:isoebnf`, `:isoebnf`, `:native`, or `:sxp`.
+ # Use `:native` for the native EBNF parser, rather than the PEG parser.
# @param [Hash{Symbol => Object}] options
# @option options [Boolean, Array] :debug
# Output debug information to an array or $stdout.
+ # @option options [Boolean, Array] :validate
+ # Validate resulting grammar.
def initialize(input, format: :ebnf, **options)
@options = options.dup
@lineno, @depth, @errors = 1, 0, []
- terminal = false
@ast = []
input = input.respond_to?(:read) ? input.read : input.to_s
case format
- when :sxp
- require 'sxp' unless defined?(SXP)
- @ast = SXP::Reader::Basic.read(input).map {|e| Rule.from_sxp(e)}
+ when :abnf
+ abnf = ABNF.new(input, **options)
+ @ast = abnf.ast
when :ebnf
+ ebnf = Parser.new(input, **options)
+ @ast = ebnf.ast
+ when :isoebnf
+ iso = ISOEBNF.new(input, **options)
+ @ast = iso.ast
+ when :native
+ terminals = false
scanner = StringScanner.new(input)
eachRule(scanner) do |r|
@@ -142,7 +135,9 @@ def initialize(input, format: :ebnf, **options)
case r
when /^@terminals/
# Switch mode to parsing terminals
- terminal = true
+ terminals = true
+ rule = Rule.new(nil, nil, nil, kind: :terminals, ebnf: self)
+ @ast << rule
when /^@pass\s*(.*)$/m
expr = expression($1).first
rule = Rule.new(nil, nil, expr, kind: :pass, ebnf: self)
@@ -151,14 +146,49 @@ def initialize(input, format: :ebnf, **options)
else
rule = depth {ruleParts(r)}
- rule.kind = :terminal if terminal # Override after we've parsed @terminals
+ rule.kind = :terminal if terminals # Override after we've parsed @terminals
rule.orig = r
@ast << rule
end
end
+ when :sxp
+ require 'sxp' unless defined?(SXP)
+ @ast = SXP::Reader::Basic.read(input).map {|e| Rule.from_sxp(e)}
else
raise "unknown input format #{format.inspect}"
end
+
+ validate! if @options[:validate]
+ end
+
+ ##
+ # Validate the grammar.
+ #
+ # Makes sure that rules reference either strings or other defined rules.
+ #
+ # @raise [RangeError]
+ def validate!
+ ast.each do |rule|
+ begin
+ rule.validate!(@ast)
+ rescue SyntaxError => e
+ error("In rule #{rule.sym}: #{e.message}")
+ end
+ end
+ raise SyntaxError, errors.join("\n") unless errors.empty?
+ end
+
+ ##
+ # Is the grammar valid?
+ #
+ # Uses `#validate!` and catches `RangeError`
+ #
+ # @return [Boolean]
+ def valid?
+ validate!
+ true
+ rescue SyntaxError
+ false
end
# Iterate over each rule or terminal, except empty
@@ -174,21 +204,25 @@ def each(kind, &block)
# @return [String]
def to_sxp
require 'sxp' unless defined?(SXP)
- SXP::Generator.string(ast.sort_by{|r| r.id.to_f}.map(&:for_sxp))
+ SXP::Generator.string(ast.map(&:for_sxp))
end
##
# Output formatted EBNF
+ #
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
# @return [String]
- def to_s
- Writer.string(*ast)
+ def to_s(format: :ebnf)
+ Writer.string(*ast, format: format)
end
##
# Output formatted EBNF as HTML
+ #
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
# @return [String]
- def to_html
- Writer.html(*ast)
+ def to_html(format: :ebnf)
+ Writer.html(*ast, format: format)
end
##
@@ -210,28 +244,22 @@ def to_ruby(output = $stdout, grammarFile: nil, mod_name: 'Meta', **options)
end
# Either output LL(1) BRANCH tables or rules for PEG parsing
- if ast.first.is_a?(EBNF::PEG::Rule)
- to_ruby_peg(output)
- else
+ if ast.first.first
to_ruby_ll1(output)
+ else
+ to_ruby_peg(output)
end
unless output == $stdout
output.puts "end"
end
end
- def dup
- new_obj = super
- new_obj.instance_variable_set(:@ast, @ast.dup)
- new_obj
- end
-
##
- # Find a rule given a symbol
- # @param [Symbol] sym
- # @return [Rule]
- def find_rule(sym)
- (@find ||= {})[sym] ||= ast.detect {|r| r.sym == sym}
+ # Renumber, rule identifiers
+ def renumber!
+ ast.each_with_index do |rule, index|
+ rule.id = (index + 1).to_s
+ end
end
##
@@ -242,6 +270,7 @@ def find_rule(sym)
def to_ttl(prefix = nil, ns = "http://example.org/")
unless ast.empty?
[
+ "@prefix dc: .",
"@prefix rdf: .",
"@prefix rdfs: .",
("@prefix #{prefix}: <#{ns}>." if prefix),
@@ -254,7 +283,21 @@ def to_ttl(prefix = nil, ns = "http://example.org/")
].compact
end.join("\n") +
- ast.sort.map(&:to_ttl).join("\n")
+ ast.map(&:to_ttl).join("\n")
+ end
+
+ def dup
+ new_obj = super
+ new_obj.instance_variable_set(:@ast, @ast.dup)
+ new_obj
+ end
+
+ ##
+ # Find a rule given a symbol
+ # @param [Symbol] sym
+ # @return [Rule]
+ def find_rule(sym)
+ (@find ||= {})[sym] ||= ast.detect {|r| r.sym == sym}
end
def depth
diff --git a/lib/ebnf/ebnf/meta.rb b/lib/ebnf/ebnf/meta.rb
new file mode 100644
index 0000000..7492826
--- /dev/null
+++ b/lib/ebnf/ebnf/meta.rb
@@ -0,0 +1,90 @@
+# This file is automatically generated by ebnf version 2.0.0
+# Derived from etc/ebnf.ebnf
+module EBNFMeta
+ RULES = [
+ EBNF::Rule.new(:ebnf, "1", [:star, :_ebnf_1]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_ebnf_1, "1.1", [:alt, :declaration, :rule]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:declaration, "2", [:alt, "@terminals", :pass]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:rule, "3", [:seq, :LHS, :expression]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:expression, "4", [:seq, :alt]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:alt, "5", [:seq, :seq, :_alt_1]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_alt_1, "5.1", [:star, :_alt_2]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_alt_2, "5.2", [:seq, "|", :seq]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:seq, "6", [:plus, :diff]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:diff, "7", [:seq, :postfix, :_diff_1]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_diff_1, "7.1", [:opt, :_diff_2]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_diff_2, "7.2", [:seq, "-", :postfix]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:postfix, "8", [:seq, :primary, :_postfix_1]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_postfix_1, "8.1", [:opt, :POSTFIX]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:primary, "9", [:alt, :HEX, :SYMBOL, :O_RANGE, :RANGE, :STRING1, :STRING2, :_primary_1]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_primary_1, "9.1", [:seq, "(", :expression, ")"]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:pass, "10", [:seq, "@pass", :expression]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:LHS, "11", [:seq, :_LHS_1, :SYMBOL, :_LHS_2, "::="], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_LHS_1, "11.1", [:opt, :_LHS_3], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_LHS_3, "11.3", [:seq, "[", :SYMBOL, "]", :_LHS_4], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_LHS_4, "11.4", [:plus, " "], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_LHS_2, "11.2", [:star, " "], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:SYMBOL, "12", [:plus, :_SYMBOL_1], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_SYMBOL_1, "12.1", [:alt, :_SYMBOL_2, :_SYMBOL_3, :_SYMBOL_4, "_", "."], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_SYMBOL_2, "12.2", [:range, "a-z"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_SYMBOL_3, "12.3", [:range, "A-Z"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_SYMBOL_4, "12.4", [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:HEX, "13", [:seq, "#x", :_HEX_1], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_HEX_1, "13.1", [:plus, :_HEX_2], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_HEX_2, "13.2", [:alt, :_HEX_3, :_HEX_4, :_HEX_5], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_HEX_3, "13.3", [:range, "a-f"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_HEX_4, "13.4", [:range, "A-F"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_HEX_5, "13.5", [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:RANGE, "14", [:seq, "[", :_RANGE_1, :_RANGE_2, :_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_RANGE_1, "14.1", [:plus, :_RANGE_4], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_RANGE_4, "14.4", [:alt, :_RANGE_5, :_RANGE_6, :R_CHAR, :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_RANGE_5, "14.5", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_RANGE_6, "14.6", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_RANGE_2, "14.2", [:opt, "-"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_RANGE_3, "14.3", [:diff, "]", :LHS], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:O_RANGE, "15", [:seq, "[^", :_O_RANGE_1, :_O_RANGE_2, "]"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_O_RANGE_1, "15.1", [:plus, :_O_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_O_RANGE_3, "15.3", [:alt, :_O_RANGE_4, :_O_RANGE_5, :R_CHAR, :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_O_RANGE_4, "15.4", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_O_RANGE_5, "15.5", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_O_RANGE_2, "15.2", [:opt, "-"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:STRING1, "16", [:seq, "\"", :_STRING1_1, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_STRING1_1, "16.1", [:star, :_STRING1_2], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_STRING1_2, "16.2", [:diff, :CHAR, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:STRING2, "17", [:seq, "'", :_STRING2_1, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_STRING2_1, "17.1", [:star, :_STRING2_2], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_STRING2_2, "17.2", [:diff, :CHAR, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:CHAR, "18", [:alt, :_CHAR_1, :_CHAR_2, :_CHAR_3, :_CHAR_4], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_CHAR_1, "18.1", [:range, "#x9#xA#xD"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_CHAR_2, "18.2", [:range, "#x20-#xD7FF"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_CHAR_3, "18.3", [:range, "#xE000-#xFFFD"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_CHAR_4, "18.4", [:range, "#x10000-#x10FFFF"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:R_CHAR, "19", [:diff, :CHAR, :_R_CHAR_1], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_R_CHAR_1, "19.1", [:alt, "]", "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:POSTFIX, "20", [:range, "?*+"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:PASS, "21", [:alt, :_PASS_1, :_PASS_2, :_PASS_3, :_PASS_4], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_1, "21.1", [:range, "#x9#xA#xD#x20"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_2, "21.2", [:seq, :_PASS_5, :_PASS_6], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_5, "21.5", [:alt, :_PASS_7, "//"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_7, "21.7", [:diff, "#", "#x"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_6, "21.6", [:star, :_PASS_8], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_8, "21.8", [:range, "^#xA#xD"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_3, "21.3", [:seq, "/*", :_PASS_9, "*/"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_9, "21.9", [:star, :_PASS_10], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_10, "21.10", [:alt, :_PASS_11, :_PASS_12], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_11, "21.11", [:opt, :_PASS_13], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_13, "21.13", [:seq, "*", :_PASS_14], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_14, "21.14", [:range, "^/"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_12, "21.12", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_4, "21.4", [:seq, "(*", :_PASS_15, "*)"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_15, "21.15", [:star, :_PASS_16], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_16, "21.16", [:alt, :_PASS_17, :_PASS_18], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_17, "21.17", [:opt, :_PASS_19], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_19, "21.19", [:seq, "*", :_PASS_20], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_20, "21.20", [:range, "^)"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_PASS_18, "21.18", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_pass, nil, [:seq, :PASS], kind: :pass).extend(EBNF::PEG::Rule),
+ ]
+end
+
diff --git a/lib/ebnf/isoebnf.rb b/lib/ebnf/isoebnf.rb
new file mode 100644
index 0000000..8ad644a
--- /dev/null
+++ b/lib/ebnf/isoebnf.rb
@@ -0,0 +1,229 @@
+require_relative 'isoebnf/meta'
+require 'logger'
+
+# ISO EBNF parser
+# Parses ISO EBNF into an array of {EBNF::Rule}.
+module EBNF
+ class ISOEBNF
+ include EBNF::PEG::Parser
+
+ # The base for terminal-character, which omits "'", '"', and '?'.
+ # Could be more optimized, and one might quible
+ # with the overly-strictly defined character set,
+ # but it is correct.
+ TERMINAL_CHARACTER_BASE = %r{
+ [a-zA-Z0-9] | # letter | decimal digit
+ , | # concatenate symbol
+ = | # defining symbol
+ [\|\/!] | # definition separator symbol
+ \*\) | # end comment symbol
+ \) | # end group symbol
+ \] | # end option symbol
+ \} | # end repeat symbol
+ \- | # except symbol
+ #\' | # first quote symbol
+ \* | # repetition symbol
+ #\" | # second quote symbol
+ #\? | # special sequence symbol
+ \(\* | # start comment symbol
+ \( | # start group symbol
+ \[ | # start option symbol
+ \{ | # start repeat symbol
+ [;\.] | # terminator symbol
+ [:+_%@&$<>^\x20\x23\\`~] # other character
+ }x
+
+ TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"\?]}
+ FIRST_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|["\?]}
+ SECOND_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['\?]}
+ SPECIAL_SEQUENCE_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"]}
+
+ # Abstract syntax tree from parse
+ #
+ # @return [Array]
+ attr_reader :ast
+
+ # `[14] integer ::= decimal_digit+`
+ terminal(:integer, /\d+/) do |value, prod|
+ value.to_i
+ end
+
+ # `[15] meta_identifier ::= letter meta_identifier_character*`
+ terminal(:meta_identifier, /[a-zA-Z][a-zA-Z0-9_]*/) do |value|
+ value.to_sym
+ end
+
+ # `[17] terminal_string ::= ("'" first_terminal_character+ "'")`
+ # ` | ('"' second_terminal_character+ '"')`
+ terminal(:terminal_string, /(?:'#{FIRST_TERMINAL_CHARACTER}+')|(?:"#{SECOND_TERMINAL_CHARACTER}+")/x) do |value|
+ value[1..-2]
+ end
+
+ # `[20] special_sequence ::= '?' special_sequence_character* '?'`
+ terminal(:special_sequence, /\?#{SPECIAL_SEQUENCE_CHARACTER}+\?/)
+
+ # `[22] terminal_character ::= [a-zA-Z0-9]`
+ # ` | [,=;*}#x2d?([{;]`
+ # ` | '*)'`
+ # ` | '(*'`
+ # ` | ']'`
+ # ` | other_character`
+ terminal(:terminal_character, TERMINAL_CHARACTER)
+
+ # `[25] empty ::= ''`
+ terminal(:empty, //)
+
+ # `[26] definition_separator_symbol ::= '|' | '/' | '!'`
+ terminal(:definition_separator_symbol, /[\|\/!]/)
+
+ # `[27] terminator_symbol ::= ';' | '.'`
+ terminal(:terminator_symbol, /[;\.]/)
+
+ # `[28] start_option_symbol ::= '['
+ terminal(:start_option_symbol, /\[|(?:\(\/)/)
+
+ # `[29] end_option_symbol ::= ']'`
+ terminal(:end_option_symbol, /\]/)
+
+ # `[30] start_repeat_symbol ::= '{' | '(:'`
+ terminal(:start_repeat_symbol, /{|\(:/)
+
+ # `[31] end_repeat_symbol ::= '}' | ':)'`
+ terminal(:end_repeat_symbol, /}|:\)/)
+
+ # ## Non-terminal productions
+
+ # `[2] syntax_rule ::= meta_identifier '=' definitions_list terminator_symbol`
+ production(:syntax_rule, clear_packrat: true) do |value, data, callback|
+ # value contains an expression.
+ # Invoke callback
+ sym = value[0][:meta_identifier]
+ definitions_list = value[2][:definitions_list]
+ callback.call(:rule, EBNF::Rule.new(sym.to_sym, nil, definitions_list))
+ nil
+ end
+
+ # Setting `as_hash: true` in the start production makes the value of the form of a hash, rather than an array of hashes.
+ #
+ # `[3] definitions_list ::= single_definition (definition_separator_symbol definitions_list)*`
+ start_production(:definitions_list, as_hash: true)
+ production(:definitions_list) do |value|
+ if value[:_definitions_list_1].length > 0
+ [:alt, value[:single_definition]] + value[:_definitions_list_1]
+ else
+ value[:single_definition]
+ end
+ end
+ production(:_definitions_list_1) do |value|
+ Array(value.first)
+ end
+ start_production(:_definitions_list_2, as_hash: true)
+ production(:_definitions_list_2) do |value|
+ if Array(value[:definitions_list]).first == :alt
+ value[:definitions_list][1..-1]
+ else
+ [value[:definitions_list]]
+ end
+ end
+
+ # `[4] single_definition ::= term (',' term)*`
+ start_production(:single_definition, as_hash: true)
+ production(:single_definition) do |value|
+ if value[:_single_definition_1].length > 0
+ [:seq, value[:term]] + value[:_single_definition_1]
+ else
+ value[:term]
+ end
+ end
+ production(:_single_definition_1) do |value|
+ value.map {|a1| a1.last[:term]}.compact # Get rid of '|'
+ end
+
+ # `[5] term ::= factor ('-' exception)?`
+ start_production(:term, as_hash: true)
+ production(:term) do |value|
+ if value[:_term_1]
+ [:diff, value[:factor], value[:_term_1]]
+ else
+ value[:factor]
+ end
+ end
+ production(:_term_1) do |value|
+ value.last[:exception] if value
+ end
+
+ # `[6] exception ::= factor`
+ start_production(:exception, as_hash: true)
+ production(:exception) do |value|
+ value[:factor]
+ end
+
+ # `[7] factor ::= (integer '*')? primary`
+ start_production(:factor, as_hash: true)
+ production(:factor) do |value|
+ if value[:_factor_1]
+ [:rept, value[:_factor_1], value[:_factor_1], value[:primary]]
+ else
+ value[:primary]
+ end
+ end
+ production(:_factor_2) do |value|
+ value.first[:integer]
+ end
+
+ # `[9] optional_sequence ::= start_option_symbol definitions_list end_option_symbol`
+ production(:optional_sequence) do |value|
+ [:opt, value[1][:definitions_list]]
+ end
+
+ # `[10] repeated_sequence ::= start_repeat_symbol definitions_list end_repeat_symbol`
+ production(:repeated_sequence) do |value|
+ [:star, value[1][:definitions_list]]
+ end
+
+ # `[11] grouped_sequence ::= '(' definitions_list ')'`
+ production(:grouped_sequence) do |value|
+ [:seq, value[1][:definitions_list]]
+ end
+
+ # ## Parser invocation.
+ # On start, yield ourselves if a block is given, otherwise, return this parser instance
+ #
+ # @param [#read, #to_s] input
+ # @param [Hash{Symbol => Object}] options
+ # @option options [Boolean] :level
+ # Trace level. 0(debug), 1(info), 2(warn), 3(error).
+ # @return [EBNFParser]
+ def initialize(input, **options, &block)
+ # If the `level` option is set, instantiate a logger for collecting trace information.
+ if options.has_key?(:level)
+ options[:logger] = Logger.new(STDERR)
+ options[:logger].level = options[:level]
+ options[:logger].formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"}
+ end
+
+ # Read input, if necessary, which will be used in a Scanner.
+ @input = input.respond_to?(:read) ? input.read : input.to_s
+
+ parsing_terminals = false
+ @ast = []
+ parse(@input,
+ :syntax,
+ ISOEBNFMeta::RULES,
+ whitespace: %r{([\x09-\x0d\x20]|(?:\(\*(?:(?:\*[^\)])|[^*])*\*\)))+},
+ **options
+ ) do |context, *data|
+ rule = case context
+ when :rule
+ # A rule which has already been turned into a `Rule` object.
+ rule = data.first
+ rule.kind = :terminal if parsing_terminals
+ rule
+ end
+ @ast << rule if rule
+ end
+ rescue EBNF::PEG::Parser::Error => e
+ raise SyntaxError, e.message
+ end
+ end
+end
diff --git a/lib/ebnf/isoebnf/meta.rb b/lib/ebnf/isoebnf/meta.rb
new file mode 100644
index 0000000..753398f
--- /dev/null
+++ b/lib/ebnf/isoebnf/meta.rb
@@ -0,0 +1,75 @@
+# This file is automatically generated by ebnf version 2.0.0
+# Derived from etc/iso-ebnf.ebnf
+module ISOEBNFMeta
+ RULES = [
+ EBNF::Rule.new(:syntax, nil, [:star, :syntax_rule]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:syntax_rule, nil, [:seq, :meta_identifier, :defining_symbol, :definitions_list, :terminator_symbol]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:definitions_list, nil, [:seq, :single_definition, :_definitions_list_1]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_definitions_list_1, nil, [:star, :_definitions_list_2]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_definitions_list_2, nil, [:seq, :definition_separator_symbol, :definitions_list]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:single_definition, nil, [:seq, :term, :_single_definition_1]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_single_definition_1, nil, [:star, :_single_definition_2]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_single_definition_2, nil, [:seq, ",", :term]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:term, nil, [:seq, :factor, :_term_1]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_term_1, nil, [:opt, :_term_2]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_term_2, nil, [:seq, "-", :exception]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:exception, nil, [:seq, :factor]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:factor, nil, [:seq, :_factor_1, :primary]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_factor_1, nil, [:opt, :_factor_2]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_factor_2, nil, [:seq, :integer, "*"]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:primary, nil, [:alt, :optional_sequence, :repeated_sequence, :special_sequence, :grouped_sequence, :meta_identifier, :terminal_string, :empty]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:optional_sequence, nil, [:seq, :start_option_symbol, :definitions_list, :end_option_symbol]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:repeated_sequence, nil, [:seq, :start_repeat_symbol, :definitions_list, :end_repeat_symbol]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:grouped_sequence, nil, [:seq, "(", :definitions_list, ")"]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:terminal_string, nil, [:alt, :_terminal_string_1, :_terminal_string_2], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_terminal_string_1, nil, [:seq, "'", :_terminal_string_3, "'"]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_terminal_string_3, nil, [:plus, :first_terminal_character]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_terminal_string_2, nil, [:seq, "\"", :_terminal_string_4, "\""]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_terminal_string_4, nil, [:plus, :second_terminal_character]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:meta_identifier, nil, [:seq, :letter, :_meta_identifier_1], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_meta_identifier_1, nil, [:star, :meta_identifier_character]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:integer, nil, [:plus, :decimal_digit], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:special_sequence, nil, [:seq, "?", :_special_sequence_1, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_special_sequence_1, nil, [:star, :special_sequence_character]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:comment, nil, [:seq, :start_comment_symbol, :_comment_1, :end_comment_symbol], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_comment_1, nil, [:star, :comment_symbol]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:comment_symbol, nil, [:alt, :comment, :commentless_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:commentless_symbol, nil, [:alt, :terminal_character, :meta_identifier, :integer, :terminal_string, :special_sequence], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:letter, nil, [:range, "a-zA-Z"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:decimal_digit, nil, [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:meta_identifier_character, nil, [:alt, :letter, :decimal_digit, "_"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:first_terminal_character, nil, [:diff, :terminal_character, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:second_terminal_character, nil, [:diff, :terminal_character, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:special_sequence_character, nil, [:diff, :terminal_character, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:terminal_character, nil, [:alt, :letter, :decimal_digit, :concatenate_symbol, :defining_symbol, :definition_separator_symbol, :end_comment_symbol, :end_group_symbol, :end_option_symbol, :end_repeat_symbol, :except_symbol, :first_quote_symbol, :repetition_symbol, :second_quote_symbol, :special_sequence_symbol, :start_comment_symbol, :start_group_symbol, :start_option_symbol, :start_repeat_symbol, :terminator_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:other_character, nil, [:alt, :_other_character_1, "\\"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_other_character_1, nil, [:range, ":+_%@&$<>^` ̃#x20#x23"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:gap_separator, nil, [:range, "#x9#xa#xb#xc#xd#x20"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_pass, nil, [:alt, :__pass_1, :comment], kind: :pass).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:__pass_1, nil, [:plus, :gap_separator]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:empty, nil, [:seq, ""], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:concatenate_symbol, nil, [:seq, ","], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:repetition_symbol, nil, [:seq, "*"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:except_symbol, nil, [:seq, "-"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:first_quote_symbol, nil, [:seq, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:second_quote_symbol, nil, [:seq, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:start_comment_symbol, nil, [:seq, "(*"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:end_comment_symbol, nil, [:seq, "*)"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:start_group_symbol, nil, [:seq, "("], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:end_group_symbol, nil, [:seq, ")"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:special_sequence_symbol, nil, [:seq, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:defining_symbol, nil, [:alt, "=", ":"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:definition_separator_symbol, nil, [:alt, "|", "/", "!"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:terminator_symbol, nil, [:alt, ";", "."], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:start_option_symbol, nil, [:seq, "["], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:end_option_symbol, nil, [:seq, "]"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:start_repeat_symbol, nil, [:alt, "{", "(:"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:end_repeat_symbol, nil, [:alt, "}", ":)"], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:gap_free_symbol, nil, [:alt, :_gap_free_symbol_1, :terminal_string], kind: :terminal).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_gap_free_symbol_1, nil, [:seq, :_gap_free_symbol_3, :terminal_character]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_gap_free_symbol_3, nil, [:not, :_gap_free_symbol_2]).extend(EBNF::PEG::Rule),
+ EBNF::Rule.new(:_gap_free_symbol_2, nil, [:range, "'\""], kind: :terminal).extend(EBNF::PEG::Rule),
+ ]
+end
+
diff --git a/lib/ebnf/ll1.rb b/lib/ebnf/ll1.rb
index 0265f1e..34214e7 100644
--- a/lib/ebnf/ll1.rb
+++ b/lib/ebnf/ll1.rb
@@ -9,9 +9,7 @@ module EBNF
# BRANCH = {
# :alt => {
# "(" => [:seq, :_alt_1],
- # :ENUM => [:seq, :_alt_1],
# :HEX => [:seq, :_alt_1],
- # :O_ENUM => [:seq, :_alt_1],
# :O_RANGE => [:seq, :_alt_1],
# :RANGE => [:seq, :_alt_1],
# :STRING1 => [:seq, :_alt_1],
@@ -38,8 +36,6 @@ module EBNF
# :alt => [
# :HEX,
# :SYMBOL,
- # :ENUM,
- # :O_ENUM,
# :RANGE,
# :O_RANGE,
# :STRING1,
@@ -54,7 +50,7 @@ module EBNF
#
# TERMINALS = ["(", ")", "-",
# "@pass", "@terminals",
- # :ENUM, :HEX, :LHS, :O_ENUM, :O_RANGE,:POSTFIX,
+ # :HEX, :LHS, :O_RANGE,:POSTFIX,
# :RANGE, :STRING1, :STRING2, :SYMBOL,"|"
# ].freeze
#
@@ -214,8 +210,9 @@ def first_follow(*starts)
firsts, follows = 0, 0
# add Fi(wi) to Fi(Ai) for every rule Ai → wi
#
- # For sequences, this is the first rule in the sequence.
- # For alts, this is every rule in the sequence
+ # * For sequences, this is the first rule in the sequence.
+ # * For alts, this is every rule in the sequence
+ # * Other rules don't matter, as they don't appear in strict BNF
each(:rule) do |ai|
# Fi(a w' ) = { a } for every terminal a
ai.terminals(ast).each do |t|
diff --git a/lib/ebnf/ll1/parser.rb b/lib/ebnf/ll1/parser.rb
index 2970881..e8f5159 100644
--- a/lib/ebnf/ll1/parser.rb
+++ b/lib/ebnf/ll1/parser.rb
@@ -576,15 +576,23 @@ def progress(node, *args, &block)
# @option options [Integer] :depth
# Recursion depth for indenting output
# @yieldreturn [String] additional string appended to `message`.
- def debug(*args)
+ def debug(*args, &block)
return unless @options[:logger]
options = args.last.is_a?(Hash) ? args.pop : {}
lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
level = options.fetch(:level, 0)
-
depth = options[:depth] || self.depth
- args << yield if block_given?
- @options[:logger].add(level, "[#{@lineno}]" + (" " * depth) + args.join(" "))
+
+ if self.respond_to?(:log_debug)
+ level = [:debug, :info, :warn, :error, :fatal][level]
+ log_debug(*args, **options.merge(level: level, lineno: lineno, depth: depth), &block)
+ elsif @options[:logger].respond_to?(:add)
+ args << yield if block_given?
+ @options[:logger].add(level, "[#{lineno}]" + (" " * depth) + args.join(" "))
+ elsif @options[:logger].respond_to?(:<<)
+ args << yield if block_given?
+ @options[:logger] << "[#{lineno}]" + (" " * depth) + args.join(" ")
+ end
end
private
diff --git a/lib/ebnf/native.rb b/lib/ebnf/native.rb
new file mode 100644
index 0000000..664d01c
--- /dev/null
+++ b/lib/ebnf/native.rb
@@ -0,0 +1,320 @@
+module EBNF
+ module Native
+ ##
+ # Native parser for EBNF; less accurate, but appropriate when changing EBNF grammar, itself.
+ #
+ # Iterate over rule strings.
+ # a line that starts with '\[' or '@' starts a new rule
+ #
+ # @param [StringScanner] scanner
+ # @yield rule_string
+ # @yieldparam [String] rule_string
+ def eachRule(scanner)
+ cur_lineno = 1
+ r = ''
+ until scanner.eos?
+ case
+ when s = scanner.scan(%r(\s+)m)
+ # Eat whitespace
+ cur_lineno += s.count("\n")
+ #debug("eachRule(ws)") { "[#{cur_lineno}] #{s.inspect}" }
+ when s = scanner.scan(%r(/\*([^\*]|\*[^\/])*\*/)m)
+ # Eat comments /* .. */
+ cur_lineno += s.count("\n")
+ debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
+ when s = scanner.scan(%r(\(\*([^\*]|\*[^\)])*\*\))m)
+ # Eat comments (* .. *)
+ cur_lineno += s.count("\n")
+ debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
+ when s = scanner.scan(%r((#(?!x)|//).*$))
+ # Eat comments // & #
+ cur_lineno += s.count("\n")
+ debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
+ when s = scanner.scan(/\A["']/)
+ # Found a quote, scan until end of matching quote
+ s += scanner.scan_until(/#{scanner.matched}|$/)
+ r += s
+ when s = scanner.scan(%r(^@terminals))
+ #debug("eachRule(@terminals)") { "[#{cur_lineno}] #{s.inspect}" }
+ yield(r) unless r.empty?
+ @lineno = cur_lineno
+ yield(s)
+ r = ''
+ when s = scanner.scan(/@pass/)
+ # Found rule start, if we've already collected a rule, yield it
+ #debug("eachRule(@pass)") { "[#{cur_lineno}] #{s.inspect}" }
+ yield r unless r.empty?
+ @lineno = cur_lineno
+ r = s
+ when s = scanner.scan(EBNF::Terminals::LHS)
+ # Found rule start, if we've already collected a rule, yield it
+ yield r unless r.empty?
+ #debug("eachRule(rule)") { "[#{cur_lineno}] #{s.inspect}" }
+ @lineno = cur_lineno
+ r = s
+ else
+ # Collect until end of line, or start of comment or quote
+ s = scanner.scan_until(%r{(?:[/\(]\*)|#(?!x)|//|["']|$})
+ if scanner.matched.length > 0
+ # Back up scan head before ending match
+ scanner.pos = scanner.pos - scanner.matched.length
+
+ # Remove matched from end of string
+ s = s[0..-(scanner.matched.length+1)]
+ end
+ cur_lineno += s.count("\n")
+ #debug("eachRule(rest)") { "[#{cur_lineno}] #{s.inspect}" }
+ r += s
+ end
+ end
+ yield r unless r.empty?
+ end
+
+ ##
+ # Parse a rule into an optional rule number, a symbol and an expression
+ #
+ # @param [String] rule
+ # @return [Rule]
+ def ruleParts(rule)
+ num_sym, expr = rule.split('::=', 2).map(&:strip)
+ num, sym = num_sym.split(']', 2).map(&:strip)
+ num, sym = "", num if sym.nil?
+ num = num[1..-1]
+ r = Rule.new(sym && sym.to_sym, num, expression(expr).first, ebnf: self)
+ debug("ruleParts") { r.inspect }
+ r
+ end
+
+ ##
+ # Parse a string into an expression tree and a remaining string
+ #
+ # @example
+ # >>> expression("a b c")
+ # ((seq a b c) '')
+ #
+ # >>> expression("a? b+ c*")
+ # ((seq (opt a) (plus b) (star c)) '')
+ #
+ # >>> expression(" | x xlist")
+ # ((alt (seq) (seq x xlist)) '')
+ #
+ # >>> expression("a | (b - c)")
+ # ((alt a (diff b c)) '')
+ #
+ # >>> expression("a b | c d")
+ # ((alt (seq a b) (seq c d)) '')
+ #
+ # >>> expression("a | b | c")
+ # ((alt a b c) '')
+ #
+ # >>> expression("a) b c")
+ # (a ' b c')
+ #
+ # >>> expression("BaseDecl? PrefixDecl*")
+ # ((seq (opt BaseDecl) (star PrefixDecl)) '')
+ #
+ # >>> expression("NCCHAR1 | diff | [0-9] | #x00B7 | [#x0300-#x036F] | \[#x203F-#x2040\]")
+ # ((alt NCCHAR1 diff
+ # (range '0-9')
+ # (hex '#x00B7')
+ # (range '#x0300-#x036F')
+ # (range, '#x203F-#x2040')) '')
+ #
+ # @param [String] s
+ # @return [Array]
+ def expression(s)
+ debug("expression") {"(#{s.inspect})"}
+ e, s = depth {alt(s)}
+ debug {"=> alt returned #{[e, s].inspect}"}
+ unless s.to_s.empty?
+ t, ss = depth {terminal(s)}
+ debug {"=> terminal returned #{[t, ss].inspect}"}
+ return [e, ss] if t.is_a?(Array) && t.first == :")"
+ end
+ [e, s]
+ end
+
+ ##
+ # Parse alt
+ # >>> alt("a | b | c")
+ # ((alt a b c) '')
+ # @param [String] s
+ # @return [Array]
+ def alt(s)
+ debug("alt") {"(#{s.inspect})"}
+ args = []
+ while !s.to_s.empty?
+ e, s = depth {seq(s)}
+ debug {"=> seq returned #{[e, s].inspect}"}
+ if e.to_s.empty?
+ break unless args.empty?
+ e = [:seq, []] # empty sequence
+ end
+ args << e
+ unless s.to_s.empty?
+ t, ss = depth {terminal(s)}
+ break unless t[0] == :alt
+ s = ss
+ end
+ end
+ args.length > 1 ? [args.unshift(:alt), s] : [e, s]
+ end
+
+ ##
+ # parse seq
+ #
+ # >>> seq("a b c")
+ # ((seq a b c) '')
+ #
+ # >>> seq("a b? c")
+ # ((seq a (opt b) c) '')
+ def seq(s)
+ debug("seq") {"(#{s.inspect})"}
+ args = []
+ while !s.to_s.empty?
+ e, ss = depth {diff(s)}
+ debug {"=> diff returned #{[e, ss].inspect}"}
+ unless e.to_s.empty?
+ args << e
+ s = ss
+ else
+ break;
+ end
+ end
+ if args.length > 1
+ [args.unshift(:seq), s]
+ elsif args.length == 1
+ args + [s]
+ else
+ ["", s]
+ end
+ end
+
+ ##
+ # parse diff
+ #
+ # >>> diff("a - b")
+ # ((diff a b) '')
+ def diff(s)
+ debug("diff") {"(#{s.inspect})"}
+ e1, s = depth {postfix(s)}
+ debug {"=> postfix returned #{[e1, s].inspect}"}
+ unless e1.to_s.empty?
+ unless s.to_s.empty?
+ t, ss = depth {terminal(s)}
+ debug {"diff #{[t, ss].inspect}"}
+ if t.is_a?(Array) && t.first == :diff
+ s = ss
+ e2, s = primary(s)
+ unless e2.to_s.empty?
+ return [[:diff, e1, e2], s]
+ else
+ error("diff", "Syntax Error")
+ raise SyntaxError, "diff missing second operand"
+ end
+ end
+ end
+ end
+ [e1, s]
+ end
+
+ ##
+ # parse postfix
+ #
+ # >>> postfix("a b c")
+ # (a ' b c')
+ #
+ # >>> postfix("a? b c")
+ # ((opt, a) ' b c')
+ def postfix(s)
+ debug("postfix") {"(#{s.inspect})"}
+ e, s = depth {primary(s)}
+ debug {"=> primary returned #{[e, s].inspect}"}
+ return ["", s] if e.to_s.empty?
+ if !s.to_s.empty?
+ t, ss = depth {terminal(s)}
+ debug {"=> #{[t, ss].inspect}"}
+ if t.is_a?(Array) && [:opt, :star, :plus].include?(t.first)
+ return [[t.first, e], ss]
+ end
+ end
+ [e, s]
+ end
+
+ ##
+ # parse primary
+ #
+ # >>> primary("a b c")
+ # (a ' b c')
+ def primary(s)
+ debug("primary") {"(#{s.inspect})"}
+ t, s = depth {terminal(s)}
+ debug {"=> terminal returned #{[t, s].inspect}"}
+ if t.is_a?(Symbol) || t.is_a?(String)
+ [t, s]
+ elsif %w(range hex).map(&:to_sym).include?(t.first)
+ [t, s]
+ elsif t.first == :"("
+ e, s = depth {expression(s)}
+ debug {"=> expression returned #{[e, s].inspect}"}
+ [e, s]
+ else
+ ["", s]
+ end
+ end
+
+ ##
+ # parse one terminal; return the terminal and the remaining string
+ #
+ # A terminal is represented as a tuple whose 1st item gives the type;
+ # some types have additional info in the tuple.
+ #
+ # @example
+ # >>> terminal("'abc' def")
+ # ('abc' ' def')
+ #
+ # >>> terminal("[0-9]")
+ # ((range '0-9') '')
+ # >>> terminal("#x00B7")
+ # ((hex '#x00B7') '')
+ # >>> terminal ("\[#x0300-#x036F\]")
+ # ((range '#x0300-#x036F') '')
+ # >>> terminal("\[^<>'{}|^`\]-\[#x00-#x20\]")
+ # ((range "^<>'{}|^`") '-\[#x00-#x20\]')
+ def terminal(s)
+ s = s.strip
+ #STDERR.puts s.inspect
+ case m = s[0,1]
+ when '"', "'" # STRING1 or STRING2
+ l, s = s[1..-1].split(m.rstrip, 2)
+ [LL1::Lexer.unescape_string(l), s]
+ when '[' # RANGE, O_RANGE
+ l, s = s[1..-1].split(/(?<=[^\\])\]/, 2)
+ [[:range, LL1::Lexer.unescape_string(l)], s]
+ when '#' # HEX
+ s.match(/(#x\h+)(.*)$/)
+ l, s = $1, $2
+ [[:hex, l], s]
+ when /[\w\.]/ # SYMBOL
+ s.match(/([\w\.]+)(.*)$/)
+ l, s = $1, $2
+ [l.to_sym, s]
+ when '-'
+ [[:diff], s[1..-1]]
+ when '?'
+ [[:opt], s[1..-1]]
+ when '|'
+ [[:alt], s[1..-1]]
+ when '+'
+ [[:plus], s[1..-1]]
+ when '*'
+ [[:star], s[1..-1]]
+ when /[\(\)]/ # '(' or ')'
+ [[m.to_sym], s[1..-1]]
+ else
+ error("terminal", "unrecognized terminal: #{s.inspect}")
+ raise SyntaxError, "unrecognized terminal: #{s.inspect}"
+ end
+ end
+ end
+end
\ No newline at end of file
diff --git a/lib/ebnf/parser.rb b/lib/ebnf/parser.rb
index f88b94e..c475a10 100644
--- a/lib/ebnf/parser.rb
+++ b/lib/ebnf/parser.rb
@@ -1,322 +1,305 @@
+require_relative 'ebnf/meta'
+require 'logger'
+
module EBNF
- module Parser
- ##
- # Iterate over rule strings.
- # a line that starts with '\[' or '@' starts a new rule
- #
- # @param [StringScanner] scanner
- # @yield rule_string
- # @yieldparam [String] rule_string
- def eachRule(scanner)
- cur_lineno = 1
- r = ''
- until scanner.eos?
- case
- when s = scanner.scan(%r(\s+)m)
- # Eat whitespace
- cur_lineno += s.count("\n")
- #debug("eachRule(ws)") { "[#{cur_lineno}] #{s.inspect}" }
- when s = scanner.scan(%r(/\*([^\*]|\*[^\/])*\*/)m)
- # Eat comments /* .. */
- cur_lineno += s.count("\n")
- debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
- when s = scanner.scan(%r(\(\*([^\*]|\*[^\)])*\*\))m)
- # Eat comments (* .. *)
- cur_lineno += s.count("\n")
- debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
- when s = scanner.scan(%r((#(?!x)|//).*$))
- # Eat comments // & #
- cur_lineno += s.count("\n")
- debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
- when s = scanner.scan(/\A["']/)
- # Found a quote, scan until end of matching quote
- s += scanner.scan_until(/#{scanner.matched}|$/)
- r += s
- when s = scanner.scan(%r(^@terminals))
- #debug("eachRule(@terminals)") { "[#{cur_lineno}] #{s.inspect}" }
- yield(r) unless r.empty?
- @lineno = cur_lineno
- yield(s)
- r = ''
- when s = scanner.scan(/@pass/)
- # Found rule start, if we've already collected a rule, yield it
- #debug("eachRule(@pass)") { "[#{cur_lineno}] #{s.inspect}" }
- yield r unless r.empty?
- @lineno = cur_lineno
- r = s
- when s = scanner.scan(/(?:\[[\w\.]+\])\s*[\w\.]+\s*::=/)
- # Found rule start, if we've already collected a rule, yield it
- yield r unless r.empty?
- #debug("eachRule(rule)") { "[#{cur_lineno}] #{s.inspect}" }
- @lineno = cur_lineno
- r = s
- else
- # Collect until end of line, or start of comment or quote
- s = scanner.scan_until(%r{(?:[/\(]\*)|#(?!x)|//|["']|$})
- if scanner.matched.length > 0
- # Back up scan head before ending match
- scanner.pos = scanner.pos - scanner.matched.length
+ class Parser
+ include EBNF::PEG::Parser
+ include EBNF::Terminals
- # Remove matched from end of string
- s = s[0..-(scanner.matched.length+1)]
- end
- cur_lineno += s.count("\n")
- #debug("eachRule(rest)") { "[#{cur_lineno}] #{s.inspect}" }
- r += s
- end
- end
- yield r unless r.empty?
+ # Abstract syntax tree from parse
+ #
+ # @return [Array]
+ attr_reader :ast
+
+ # ## Terminals
+ # Define rules for Terminals, placing results on the input stack, making them available to upstream non-Terminal rules.
+ #
+ # Terminals are defined with a symbol matching the associated rule name, and an optional (although strongly encouraged) regular expression used to match the head of the input stream.
+ #
+ # The result of the terminal block is the semantic value of that terminal, which if often a string, but may be any instance which reflects the semantic interpretation of that terminal.
+ #
+ # The `value` parameter is the value matched by the regexp, if defined, or by the sub-terminal rules otherwise.
+ #
+ # The `prod` parameter is the name of the parent rule for which this terminal is matched, which may have a bearing in some circumstances, although not used in this example.
+ #
+ # If no block is provided, then the value which would have been passed to the block is used as the result directly.
+
+ # Match the Left hand side of a rule or terminal
+ #
+ # [11] LHS ::= ('[' SYMBOL+ ']' ' '+)? SYMBOL ' '* '::='
+ terminal(:LHS, LHS) do |value, prod|
+ value.to_s.scan(/(?:\[([^\]]+)\])?\s*(\w+)\s*::=/).first
+ end
+
+ # Match `SYMBOL` terminal
+ #
+ # [12] SYMBOL ::= ([a-z] | [A-Z] | [0-9] | '_' | '.')+
+ terminal(:SYMBOL, SYMBOL) do |value|
+ value.to_sym
end
-
- ##
- # Parse a rule into an optional rule number, a symbol and an expression
- #
- # @param [String] rule
- # @return [Rule]
- def ruleParts(rule)
- num_sym, expr = rule.split('::=', 2).map(&:strip)
- num, sym = num_sym.split(']', 2).map(&:strip)
- num, sym = "", num if sym.nil?
- num = num[1..-1]
- r = Rule.new(sym && sym.to_sym, num, expression(expr).first, ebnf: self)
- debug("ruleParts") { r.inspect }
- r
+
+ # Match `HEX` terminal
+ #
+ # [13] HEX ::= #x' ([a-f] | [A-F] | [0-9])+
+ terminal(:HEX, HEX) do |value|
+ [:hex, value]
end
- ##
- # Parse a string into an expression tree and a remaining string
- #
- # @example
- # >>> expression("a b c")
- # ((seq a b c) '')
- #
- # >>> expression("a? b+ c*")
- # ((seq (opt a) (plus b) (star c)) '')
- #
- # >>> expression(" | x xlist")
- # ((alt (seq) (seq x xlist)) '')
- #
- # >>> expression("a | (b - c)")
- # ((alt a (diff b c)) '')
- #
- # >>> expression("a b | c d")
- # ((alt (seq a b) (seq c d)) '')
- #
- # >>> expression("a | b | c")
- # ((alt a b c) '')
- #
- # >>> expression("a) b c")
- # (a ' b c')
- #
- # >>> expression("BaseDecl? PrefixDecl*")
- # ((seq (opt BaseDecl) (star PrefixDecl)) '')
- #
- # >>> expression("NCCHAR1 | diff | [0-9] | #x00B7 | [#x0300-#x036F] | \[#x203F-#x2040\]")
- # ((alt NCCHAR1 diff
- # (range '0-9')
- # (hex '#x00B7')
- # (range '#x0300-#x036F')
- # (range, '#x203F-#x2040')) '')
- #
- # @param [String] s
- # @return [Array]
- def expression(s)
- debug("expression") {"(#{s.inspect})"}
- e, s = depth {alt(s)}
- debug {"=> alt returned #{[e, s].inspect}"}
- unless s.to_s.empty?
- t, ss = depth {terminal(s)}
- debug {"=> terminal returned #{[t, ss].inspect}"}
- return [e, ss] if t.is_a?(Array) && t.first == :")"
- end
- [e, s]
+ # Terminal for `RANGE` is matched as part of a `primary` rule.
+ #
+ # [14] RANGE ::= '[' ((R_CHAR '-' R_CHAR) | (HEX '-' HEX) | R_CHAR | HEX)+ '-'? ']' - LHS
+ terminal(:RANGE, RANGE) do |value|
+ [:range, value[1..-2]]
end
-
- ##
- # Parse alt
- # >>> alt("a | b | c")
- # ((alt a b c) '')
- # @param [String] s
- # @return [Array]
- def alt(s)
- debug("alt") {"(#{s.inspect})"}
- args = []
- while !s.to_s.empty?
- e, s = depth {seq(s)}
- debug {"=> seq returned #{[e, s].inspect}"}
- if e.to_s.empty?
- break unless args.empty?
- e = [:seq, []] # empty sequence
- end
- args << e
- unless s.to_s.empty?
- t, ss = depth {terminal(s)}
- break unless t[0] == :alt
- s = ss
- end
- end
- args.length > 1 ? [args.unshift(:alt), s] : [e, s]
+
+ # Terminal for `O_RANGE` is matched as part of a `primary` rule.
+ #
+ # [15] O_RANGE ::= '[^' ((R_CHAR '-' R_CHAR) | (HEX '-' HEX) | R_CHAR | HEX)+ '-'? ']'
+ terminal(:O_RANGE, O_RANGE) do |value|
+ [:range, value[1..-2]]
end
-
- ##
- # parse seq
- #
- # >>> seq("a b c")
- # ((seq a b c) '')
- #
- # >>> seq("a b? c")
- # ((seq a (opt b) c) '')
- def seq(s)
- debug("seq") {"(#{s.inspect})"}
- args = []
- while !s.to_s.empty?
- e, ss = depth {diff(s)}
- debug {"=> diff returned #{[e, ss].inspect}"}
- unless e.to_s.empty?
- args << e
- s = ss
- else
- break;
- end
- end
- if args.length > 1
- [args.unshift(:seq), s]
- elsif args.length == 1
- args + [s]
+
+ # Match double quote string
+ #
+ # [16] STRING1 ::= '"' (CHAR - '"')* '"'
+ terminal(:STRING1, STRING1) do |value|
+ value[1..-2]
+ end
+
+ # Match single quote string
+ #
+ # [17] STRING2 ::= "'" (CHAR - "'")* "'"
+ terminal(:STRING2, STRING2) do |value|
+ value[1..-2]
+ end
+
+ # The `CHAR` and `R_CHAR` productions are not used explicitly
+
+ # Match `POSTFIX` terminal
+ #
+ # [20] POSTFIX ::= [?*+]
+ terminal(:POSTFIX, POSTFIX)
+
+ # The `PASS` productions is not used explicitly
+
+ # ## Non-terminal productions
+ # Define productions for non-Termainals. This can include `start_production` as well as `production` to hook into rule start and end. In some cases, we need to use sub-productions as generated when turning EBNF into PEG.
+ #
+ # Productions are defined with a symbol matching the associated rule name.
+ #
+ # The result of the productions is typically the abstract syntax tree matched by the rule, so far, but could be a specific semantic value, or could be ignored with the result being returned via the `callback`.
+ #
+ # The `value` parameter is the result returned from child productions
+ #
+ # The `data` parameter other data which may be returned by child productions placing information onto their input (unused in this example).
+ #
+ # The `callback` parameter provides access to a callback defined in the call to `parse`).
+
+ # Production for end of `declaration` non-terminal.
+ #
+ # Look for `@terminals` to change parser state to parsing terminals.
+ #
+ # Clears the packrat parser when called.
+ #
+ # `@pass` is ignored here.
+ #
+ # [2] declaration ::= '@terminals' | pass
+ production(:declaration, clear_packrat: true) do |value, data, callback|
+ # value contains a declaration.
+ # Invoke callback
+ callback.call(:terminals) if value == '@terminals'
+ nil
+ end
+
+ # Production for end of `rule` non-terminal.
+ #
+ # By setting `as_hash: true` in the `start_production`, the `value` parameter will be in the form `{LHS: "v", expression: "v"}`. Otherwise, it would be expressed using an array of hashes of the form `[{LHS: "v"}, {expression: "v"}]`.
+ #
+ # Clears the packrat parser when called.
+ #
+ # Create rule from expression value and pass to callback
+ #
+ # [3] rule ::= LHS expression
+ start_production(:rule, as_hash: true)
+ production(:rule, clear_packrat: true) do |value, data, callback|
+ # value contains an expression.
+ # Invoke callback
+ id, sym = value[:LHS]
+ expression = value[:expression]
+ callback.call(:rule, EBNF::Rule.new(sym.to_sym, id, expression))
+ nil
+ end
+
+ # Production for end of `expression` non-terminal.
+ # Passes through the optimized value of the alt production as follows:
+ #
+ # The `value` parameter, is of the form `[{alt: "v"}]`.
+ #
+ # [:alt foo] => foo
+ # [:alt foo bar] => [:alt foo bar]
+ #
+ # [4] expression ::= alt
+ production(:expression) do |value|
+ value.first[:alt]
+ end
+
+ # Production for end of `alt` non-terminal.
+ # Passes through the optimized value of the seq production as follows:
+ #
+ # The `value` parameter, is of the form `{seq: "v", _alt_1: "v"}`.
+ #
+ # [:seq foo] => foo
+ # [:seq foo bar] => [:seq foo bar]
+ #
+ # Note that this also may just pass through from `_alt_1`
+ #
+ # [5] alt ::= seq ('|' seq)*
+ start_production(:alt, as_hash: true)
+ production(:alt) do |value|
+ if value[:_alt_1].length > 0
+ [:alt, value[:seq]] + value[:_alt_1]
else
- ["", s]
+ value[:seq]
end
end
-
- ##
- # parse diff
- #
- # >>> diff("a - b")
- # ((diff a b) '')
- def diff(s)
- debug("diff") {"(#{s.inspect})"}
- e1, s = depth {postfix(s)}
- debug {"=> postfix returned #{[e1, s].inspect}"}
- unless e1.to_s.empty?
- unless s.to_s.empty?
- t, ss = depth {terminal(s)}
- debug {"diff #{[t, ss].inspect}"}
- if t.is_a?(Array) && t.first == :diff
- s = ss
- e2, s = primary(s)
- unless e2.to_s.empty?
- return [[:diff, e1, e2], s]
- else
- error("diff", "Syntax Error")
- raise "Syntax Error"
- end
- end
- end
- end
- [e1, s]
+
+ # Production for end of `_alt_1` non-terminal.
+ # Used to collect the `('|' seq)*` portion of the `alt` non-terminal:
+ #
+ # The `value` parameter, is of the form `[{seq: ["v"]}]`.
+ #
+ # [5] _alt_1 ::= ('|' seq)*
+ production(:_alt_1) do |value|
+ value.map {|a1| a1.last[:seq]}.compact # Get rid of '|'
end
-
- ##
- # parse postfix
- #
- # >>> postfix("a b c")
- # (a ' b c')
- #
- # >>> postfix("a? b c")
- # ((opt, a) ' b c')
- def postfix(s)
- debug("postfix") {"(#{s.inspect})"}
- e, s = depth {primary(s)}
- debug {"=> primary returned #{[e, s].inspect}"}
- return ["", s] if e.to_s.empty?
- if !s.to_s.empty?
- t, ss = depth {terminal(s)}
- debug {"=> #{[t, ss].inspect}"}
- if t.is_a?(Array) && [:opt, :star, :plus].include?(t.first)
- return [[t.first, e], ss]
- end
- end
- [e, s]
+
+ # Production for end of `seq` non-terminal.
+ # Passes through the optimized value of the `diff` production as follows:
+ #
+ # The `value` parameter, is an array of values, which cannot be empty.
+ #
+ # [:diff foo] => foo
+ # [:diff foo bar] => [:diff foo bar]
+ #
+ # Note that this also may just pass through from `_seq_1`
+ #
+ # [6] seq ::= diff+
+ production(:seq) do |value|
+ value.length == 1 ? value.first : ([:seq] + value)
end
- ##
- # parse primary
- #
- # >>> primary("a b c")
- # (a ' b c')
- def primary(s)
- debug("primary") {"(#{s.inspect})"}
- t, s = depth {terminal(s)}
- debug {"=> terminal returned #{[t, s].inspect}"}
- if t.is_a?(Symbol) || t.is_a?(String)
- [t, s]
- elsif %w(range hex).map(&:to_sym).include?(t.first)
- [t, s]
- elsif t.first == :"("
- e, s = depth {expression(s)}
- debug {"=> expression returned #{[e, s].inspect}"}
- [e, s]
+ # `Diff` production returns concatenated postfix values
+ #
+ # The `value` parameter, is of the form `{postfix: "v", _diff_1: "v"}`.
+ #
+ # [7] diff ::= postfix ('-' postfix)?
+ start_production(:diff, as_hash: true)
+ production(:diff) do |value|
+ if value[:_diff_1]
+ [:diff, value[:postfix], value[:_diff_1]]
else
- ["", s]
+ value[:postfix]
end
end
-
- ##
- # parse one terminal; return the terminal and the remaining string
- #
- # A terminal is represented as a tuple whose 1st item gives the type;
- # some types have additional info in the tuple.
- #
- # @example
- # >>> terminal("'abc' def")
- # ('abc' ' def')
- #
- # >>> terminal("[0-9]")
- # ((range '0-9') '')
- # >>> terminal("#x00B7")
- # ((hex '#x00B7') '')
- # >>> terminal ("\[#x0300-#x036F\]")
- # ((range '#x0300-#x036F') '')
- # >>> terminal("\[^<>'{}|^`\]-\[#x00-#x20\]")
- # ((range "^<>'{}|^`") '-\[#x00-#x20\]')
- def terminal(s)
- s = s.strip
- #STDERR.puts s.inspect
- case m = s[0,1]
- when '"', "'" # STRING1 or STRING2
- l, s = s[1..-1].split(m.rstrip, 2)
- [LL1::Lexer.unescape_string(l), s]
- when '[' # RANGE, O_RANGE
- l, s = s[1..-1].split(/(?<=[^\\])\]/, 2)
- [[:range, LL1::Lexer.unescape_string(l)], s]
- when '#' # HEX
- s.match(/(#x\h+)(.*)$/)
- l, s = $1, $2
- [[:hex, l], s]
- when /[\w\.]/ # SYMBOL
- s.match(/([\w\.]+)(.*)$/)
- l, s = $1, $2
- [l.to_sym, s]
- when '@' # @pass or @terminals
- s.match(/@(#\w+)(.*)$/)
- l, s = $1, $2
- [[:"@", l], s]
- when '-'
- [[:diff], s[1..-1]]
- when '?'
- [[:opt], s[1..-1]]
- when '|'
- [[:alt], s[1..-1]]
- when '+'
- [[:plus], s[1..-1]]
- when '*'
- [[:star], s[1..-1]]
- when /[\(\)]/ # '(' or ')'
- [[m.to_sym], s[1..-1]]
- else
- error("terminal", "unrecognized terminal: #{s.inspect}")
- raise "Syntax Error, unrecognized terminal: #{s.inspect}"
+
+ production(:_diff_1) do |value|
+ value.last[:postfix] if value
+ end
+
+ # Production for end of `postfix` non-terminal.
+ # Either returns the `primary` production value, or as modified by the `postfix`.
+ #
+ # The `value` parameter, is of the form `{primary: "v", _postfix_1: "v"}`.
+ #
+ # [:primary] => [:primary]
+ # [:primary, '*'] => [:star, :primary]
+ # [:primary, '+'] => [:plus, :primary]
+ # [:primary, '?'] => [:opt, :primary]
+ #
+ # [8] postfix ::= primary POSTFIX?
+ start_production(:postfix, as_hash: true)
+ production(:postfix) do |value|
+ # Push result onto input stack, as the `diff` production can have some number of `postfix` values that are applied recursively
+ case value[:_postfix_1]
+ when "*" then [:star, value[:primary]]
+ when "+" then [:plus, value[:primary]]
+ when "?" then [:opt, value[:primary]]
+ else value[:primary]
+ end
+ end
+
+ # Production for end of `primary` non-terminal.
+ # Places `:primary` on the stack
+ #
+ # The `value` parameter, is either a string (for a terminal) or an array of the form `['(': '(', expression: "v", ')', ')']`.
+ #
+ # This may either be a terminal, or the result of an `expression`.
+ #
+ # [9] primary ::= HEX
+ # | SYMBOL
+ # | RANGE
+ # | O_RANGE
+ # | STRING1
+ # | STRING2
+ # | '(' expression ')'
+ production(:primary) do |value|
+ Array(value).length > 2 ? value[1][:expression] : value
+ end
+
+ # Production for end of pass non-terminal.
+ #
+ # [10] pass ::= '@pass' expression
+ production(:pass) do |value, data, callback|
+ # Invoke callback
+ callback.call(:pass, value.last[:expression])
+ end
+
+ # ## Parser invocation.
+ # On start, yield ourselves if a block is given, otherwise, return this parser instance
+ #
+ # @param [#read, #to_s] input
+ # @param [Hash{Symbol => Object}] options
+ # @option options [Boolean] :level
+ # Trace level. 0(debug), 1(info), 2(warn), 3(error).
+ # @return [EBNFParser]
+ def initialize(input, **options, &block)
+ # If the `level` option is set, instantiate a logger for collecting trace information.
+ if options.has_key?(:level)
+ options[:logger] = Logger.new(STDERR)
+ options[:logger].level = options[:level]
+ options[:logger].formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"}
+ end
+
+ # Read input, if necessary, which will be used in a Scanner.
+ @input = input.respond_to?(:read) ? input.read : input.to_s
+
+ parsing_terminals = false
+ @ast = []
+ parse(@input, :ebnf, EBNFMeta::RULES,
+ # Use an optimized Regexp for whitespace
+ whitespace: EBNF::Terminals::PASS,
+ **options
+ ) do |context, *data|
+ rule = case context
+ when :terminals
+ # After parsing `@terminals`
+ # This changes the state of the parser to treat subsequent rules as terminals.
+ parsing_terminals = true
+ rule = EBNF::Rule.new(nil, nil, data.first, kind: :terminals)
+ when :pass
+ # After parsing `@pass`
+ # This defines a specific rule for whitespace.
+ rule = EBNF::Rule.new(nil, nil, data.first, kind: :pass)
+ when :rule
+ # A rule which has already been turned into a `Rule` object.
+ rule = data.first
+ rule.kind = :terminal if parsing_terminals
+ rule
+ end
+ @ast << rule if rule
end
+ rescue EBNF::PEG::Parser::Error => e
+ raise SyntaxError, e.message
end
end
end
\ No newline at end of file
diff --git a/lib/ebnf/peg.rb b/lib/ebnf/peg.rb
index 6422f78..b94d71c 100644
--- a/lib/ebnf/peg.rb
+++ b/lib/ebnf/peg.rb
@@ -31,7 +31,7 @@ def make_peg
def to_ruby_peg(output, **options)
output.puts " RULES = ["
ast.each do |rule|
- output.puts " " + rule.to_ruby + '.extend(EBNF::PEG::Rule),'
+ output.puts " " + rule.to_ruby + (rule.is_a?(EBNF::PEG::Rule) ? '.extend(EBNF::PEG::Rule)' : '') + ','
end
output.puts " ]"
end
diff --git a/lib/ebnf/peg/parser.rb b/lib/ebnf/peg/parser.rb
index 0393036..48b7ea7 100644
--- a/lib/ebnf/peg/parser.rb
+++ b/lib/ebnf/peg/parser.rb
@@ -51,6 +51,7 @@ def self.included(base)
# DSL for creating terminals and productions
module ClassMethods
def start_handlers; (@start_handlers ||= {}); end
+ def start_options; (@start_hoptions ||= {}); end
def production_handlers; (@production_handlers ||= {}); end
def terminal_handlers; (@terminal_handlers ||= {}); end
def terminal_regexps; (@terminal_regexps ||= {}); end
@@ -97,6 +98,10 @@ def terminal(term, regexp = nil, **options, &block)
#
# @param [Symbol] term
# The rule name
+ # @param [Hash{Symbol => Object}] options
+ # Options which are returned from {Parser#onStart}.
+ # @option options [Boolean] :as_hash (false)
+ # If the production is a `seq`, causes the value to be represented as a single hash, rather than an array of individual hashes for each sub-production. Note that this is not always advisable due to the possibility of repeated productions within the sequence.
# @yield [data, block]
# @yieldparam [Hash] data
# A Hash defined for the current production, during :start
@@ -106,8 +111,9 @@ def terminal(term, regexp = nil, **options, &block)
# Block passed to initialization for yielding to calling parser.
# Should conform to the yield specs for #initialize
# Yield to generate a triple
- def start_production(term, &block)
+ def start_production(term, **options, &block)
start_handlers[term] = block
+ start_options[term] = options.freeze
end
##
@@ -204,6 +210,7 @@ def parse(input = nil, start = nil, rules = nil, **options, &block)
@whitespace = case options[:whitespace]
when Regexp then options[:whitespace]
when Symbol then @rules[options[:whitespace]]
+ else options[:whitespace]
end ||
@rules.values.detect(&:pass?) ||
/(?:\s|(?:#[^x][^\n\r]*))+/m.freeze
@@ -329,19 +336,30 @@ def progress(node, *args, &block)
# @option options [Integer] :depth
# Recursion depth for indenting output
# @yieldreturn [String] additional string appended to `message`.
- def debug(*args)
+ def debug(*args, &block)
return unless @options[:logger]
options = args.last.is_a?(Hash) ? args.pop : {}
lineno = options[:lineno] || (scanner.lineno if scanner)
level = options.fetch(:level, 0)
-
depth = options[:depth] || self.depth
- args << yield if block_given?
- @options[:logger].add(level, "[#{lineno}]" + (" " * depth) + args.join(" "))
+
+ if self.respond_to?(:log_debug)
+ level = [:debug, :info, :warn, :error, :fatal][level]
+ log_debug(*args, **options.merge(level: level, lineno: lineno, depth: depth), &block)
+ elsif @options[:logger].respond_to?(:add)
+ args << yield if block_given?
+ @options[:logger].add(level, "[#{lineno}]" + (" " * depth) + args.join(" "))
+ elsif @options[:logger].respond_to?(:<<)
+ args << yield if block_given?
+ @options[:logger] << "[#{lineno}]" + (" " * depth) + args.join(" ")
+ end
end
# Start for production
# Adds data avoiable during the processing of the production
+ #
+ # @return [Hash] composed of production options. Currently only `as_hash` is supported.
+ # @see ClassMethods#start_production
def onStart(prod)
handler = self.class.start_handlers[prod]
@productions << prod
@@ -367,6 +385,7 @@ def onStart(prod)
# explicit start handler
@prod_data << {}
end
+ return self.class.start_options.fetch(prod, {}) # any options on this production
end
# Finish of production
diff --git a/lib/ebnf/peg/rule.rb b/lib/ebnf/peg/rule.rb
index d6681a4..0934bb8 100644
--- a/lib/ebnf/peg/rule.rb
+++ b/lib/ebnf/peg/rule.rb
@@ -18,14 +18,15 @@ module Rule
#
# If matched, the input position is updated and the results returned in a Hash.
#
- # * `alt`: returns the value of the matched production or `:unmatched`
- # * `diff`: returns the string value matched, or `:unmatched`
+ # * `alt`: returns the value of the matched production or `:unmatched`.
+ # * `diff`: returns the value matched, or `:unmatched`.
# * `hex`: returns a string composed of the matched hex character, or `:unmatched`.
- # * `opt`: returns the matched production, or `nil` if unmatched.
- # * `plus`: returns an array of the matches for the specified production, or `:unmatched`, if none are matched. For Terminals, these are concatenated into a single string.
- # * `range`: returns a string composed of the character matching the range, or `:unmatched`.
- # * `seq`: returns an array composed of single-entry hashes for each matched production indexed by the production name, or `:unmatched` if any production fails to match. For Terminals, returns a string created by concatenating these values.
- # * `star`: returns an array of the matches for the specified production.For Terminals, these are concatenated into a single string.
+ # * `opt`: returns the value matched, or `nil` if unmatched.
+ # * `plus`: returns an array of the values matched for the specified production, or `:unmatched`, if none are matched. For Terminals, these are concatenated into a single string.
+ # * `range`: returns a string composed of the values matched, or `:unmatched`, if less than `min` are matched.
+ # * `seq`: returns an array composed of single-entry hashes for each matched production indexed by the production name, or `:unmatched` if any production fails to match. For Terminals, returns a string created by concatenating these values. Via option in a `production` or definition, the result can be a single hash with values for each matched production; note that this is not always possible due to the possibility of repeated productions within the sequence.
+ # * `star`: returns an array of the values matched for the specified production. For Terminals, these are concatenated into a single string.
+ #
# @param [Scanner] input
# @return [Hash{Symbol => Object}, :unmatched] A hash with keys for matched component of the expression. Returns :unmatched if the input does not match the production.
def parse(input)
@@ -45,7 +46,7 @@ def parse(input)
# otherwise,
if regexp = parser.find_terminal_regexp(sym)
matched = input.scan(regexp)
- result = (matched ? parser.onTerminal(sym, matched) : :unmatched)
+ result = parser.onTerminal(sym, (matched ? matched : :unmatched))
# Update furthest failure for strings and terminals
parser.update_furthest_failure(input.pos, input.lineno, sym) if result == :unmatched
parser.packrat[sym][pos] = {
@@ -58,7 +59,7 @@ def parse(input)
else
eat_whitespace(input)
end
- parser.onStart(sym)
+ start_options = parser.onStart(sym)
result = case expr.first
when :alt
@@ -84,7 +85,8 @@ def parse(input)
alt
when :diff
# matches any string that matches A but does not match B.
- # XXX: Should this work for arbitrary rules?
+ # (Note, this is only used for Terminal rules, non-terminals will use :not)
+ raise "Diff used on non-terminal #{prod}" unless terminal?
re1, re2 = Regexp.new(translate_codepoints(expr[1])), Regexp.new(translate_codepoints(expr[2]))
matched = input.scan(re1)
if !matched || re2.match?(matched)
@@ -101,9 +103,9 @@ def parse(input)
parser.update_furthest_failure(input.pos, input.lineno, expr.last)
:unmatched
end
- when :opt
- # Always matches
- opt = case prod = expr[1]
+ when :not
+ # matches any string that does not match B.
+ res = case prod = expr[1]
when Symbol
rule = parser.find_rule(prod)
raise "No rule found for #{prod}" unless rule
@@ -111,35 +113,29 @@ def parse(input)
when String
input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
end
- if opt == :unmatched
+ if res != :unmatched
# Update furthest failure for terminals
- parser.update_furthest_failure(input.pos, input.lineno, prod) if terminal?
- nil
+ parser.update_furthest_failure(input.pos, input.lineno, sym) if terminal?
+ :unmatched
else
- opt
+ nil
end
+ when :opt
+ # Result is the matched value or nil
+ opt = rept(input, 0, 1, expr[1])
+
+ # Update furthest failure for strings and terminals
+ parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
+ opt.first
when :plus
# Result is an array of all expressions while they match,
# at least one must match
- prod, plus = expr[1], []
- case prod
- when Symbol
- rule = parser.find_rule(prod)
- raise "No rule found for #{prod}" unless rule
- while (res = rule.parse(input)) != :unmatched
- eat_whitespace(input)
- plus << res
- end
- when String
- while res = input.scan(Regexp.new(Regexp.quote(prod)))
- eat_whitespace(input)
- plus << res
- end
- end
+ plus = rept(input, 1, '*', expr[1])
+
# Update furthest failure for strings and terminals
- parser.update_furthest_failure(input.pos, input.lineno, prod)
- plus.empty? ? :unmatched : (terminal? ? plus.compact.join("") : plus.compact)
- when :range
+ parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
+ plus.is_a?(Array) && terminal? ? plus.join("") : plus
+ when :range, :istr
# Matches the specified character range
input.scan(to_regexp) || begin
# Update furthest failure for strings and terminals
@@ -149,7 +145,7 @@ def parse(input)
when :seq
# Evaluate each expression into an array of hashes where each hash contains a key from the associated production and the value is the parsed value of that production. Returns :unmatched if the input does not match the production. Value ordering is ensured by native Hash ordering.
seq = expr[1..-1].each_with_object([]) do |prod, accumulator|
- eat_whitespace(input) unless accumulator.empty?
+ eat_whitespace(input) unless accumulator.empty? || terminal?
res = case prod
when Symbol
rule = parser.find_rule(prod)
@@ -165,32 +161,23 @@ def parse(input)
end
accumulator << {prod.to_sym => res}
end
- seq == :unmatched ?
- :unmatched :
- (terminal? ?
- seq.map(&:values).compact.join("") : # Concat values for terminal production
- seq)
+ if seq == :unmatched
+ :unmatched
+ elsif terminal?
+ seq.map(&:values).compact.join("") # Concat values for terminal production
+ elsif start_options[:as_hash]
+ seq.inject {|memo, h| memo.merge(h)}
+ else
+ seq
+ end
when :star
# Result is an array of all expressions while they match,
# an empty array of none match
- prod, star = expr[1], []
- case prod
- when Symbol
- rule = parser.find_rule(prod)
- raise "No rule found for #{prod}" unless rule
- while (res = rule.parse(input)) != :unmatched
- eat_whitespace(input)
- star << res
- end
- when String
- while res = input.scan(Regexp.new(Regexp.quote(prod)))
- eat_whitespace(input)
- star << res
- end
- end
+ star = rept(input, 0, '*', expr[1])
+
# Update furthest failure for strings and terminals
- parser.update_furthest_failure(input.pos, input.lineno, prod)
- star.compact
+ parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
+ star.is_a?(Array) && terminal? ? star.join("") : star
else
raise "attempt to parse unknown rule type: #{expr.first}"
end
@@ -208,6 +195,38 @@ def parse(input)
return parser.packrat[sym][pos][:result]
end
+ ##
+ # Repitition, 0-1, 0-n, 1-n, ...
+ #
+ # Note, nil results are removed from the result, but count towards min/max calculations
+ #
+ # @param [Scanner] input
+ # @param [Integer] min
+ # @param [Integer] max
+ # If it is an integer, it stops matching after max entries.
+ # @param [Symbol, String] prod
+ # @return [:unmatched, Array]
+ def rept(input, min, max, prod)
+ result = []
+
+ case prod
+ when Symbol
+ rule = parser.find_rule(prod)
+ raise "No rule found for #{prod}" unless rule
+ while (max == '*' || result.length < max) && (res = rule.parse(input)) != :unmatched
+ eat_whitespace(input) unless terminal?
+ result << res
+ end
+ when String
+ while (res = input.scan(Regexp.new(Regexp.quote(prod)))) && (max == '*' || result.length < max)
+ eat_whitespace(input) unless terminal?
+ result << res
+ end
+ end
+
+ result.length < min ? :unmatched : result.compact
+ end
+
##
# Eat whitespace between non-terminal rules
def eat_whitespace(input)
diff --git a/lib/ebnf/rule.rb b/lib/ebnf/rule.rb
index ef53c0c..f01e9c5 100644
--- a/lib/ebnf/rule.rb
+++ b/lib/ebnf/rule.rb
@@ -1,17 +1,33 @@
require 'scanf'
+require 'strscan'
module EBNF
# Represent individual parsed rules
class Rule
# Operations which are flattened to seprate rules in to_bnf.
BNF_OPS = %w{
- alt opt plus seq star
+ alt diff not opt plus rept seq star
}.map(&:to_sym).freeze
TERM_OPS = %w{
- diff hex range
+ hex istr range
}.map(&:to_sym).freeze
+ # The number of arguments expected per operator. `nil` for unspecified
+ OP_ARGN = {
+ alt: nil,
+ diff: 2,
+ hex: 1,
+ istr: 1,
+ not: 1,
+ opt: 1,
+ plus: 1,
+ range: 1,
+ rept: 3,
+ seq: nil,
+ star: 1
+ }
+
# Symbol of rule
#
# @return [Symbol]
@@ -28,7 +44,7 @@ class Rule
# Kind of rule
#
- # @return [:rule, :terminal, or :pass]
+ # @return [:rule, :terminal, :terminals, or :pass]
attr_accessor :kind
# Rule expression
@@ -59,19 +75,38 @@ class Rule
# Determines preparation and cleanup rules for reconstituting EBNF ? * + from BNF
attr_accessor :cleanup
- # @param [Symbol] sym
- # @param [Integer] id
+ # @param [Symbol, nil] sym
+ # `nil` is allowed only for @pass or @terminals
+ # @param [Integer, nil] id
# @param [Array] expr
- # @param [Symbol] kind (nil)
+ # The expression is an internal-representation of an S-Expression with one of the following oparators:
+ #
+ # * `alt` – A list of alternative rules, which are attempted in order. It terminates with the first matching rule, or is terminated as unmatched, if no such rule is found.
+ # * `diff` – matches any string that matches `A` but does not match `B`.
+ # * `hex` – A single character represented using the hexadecimal notation `#xnn`.
+ # * `istr` – A string which matches in a case-insensitive manner, so that `(istr "fOo")` will match either of the strings `"foo"`, `"FOO"` or any other combination.
+ # * `opt` – An optional rule or terminal. It either results in the matching rule or returns `nil`.
+ # * `plus` – A sequence of one or more of the matching rule. If there is no such rule, it is terminated as unmatched; otherwise, the result is an array containing all matched input.
+ # * `range` – A range of characters, possibly repeated, of the form `(range "a-z")`. May also use hexadecimal notation.
+ # * `rept m n` – A sequence of at lest `m` and at most `n` of the matching rule. It will always return an array.
+ # * `seq` – A sequence of rules or terminals. If any (other than `opt` or `star`) to not parse, the rule is terminated as unmatched.
+ # * `star` – A sequence of zero or more of the matching rule. It will always return an array.
+ # @param [:rule, :terminal, :terminals, :pass] kind (nil)
# @param [String] ebnf (nil)
+ # When parsing, records the EBNF string used to create the rule.
# @param [Array] first (nil)
+ # Recorded set of terminals that can proceed this rule (LL(1))
# @param [Array] follow (nil)
+ # Recorded set of terminals that can follow this rule (LL(1))
# @param [Boolean] start (nil)
+ # Is this the starting rule for the grammar?
# @param [Rule] top_rule (nil)
+ # The top-most rule. All expressed rules are top-rules, derived rules have the original rule as their top-rule.
# @param [Boolean] cleanup (nil)
+ # Records information useful for cleaning up converted :plus, and :star expansions (LL(1)).
def initialize(sym, id, expr, kind: nil, ebnf: nil, first: nil, follow: nil, start: nil, top_rule: nil, cleanup: nil)
@sym, @id = sym, id
- @expr = expr.is_a?(Array) ? expr : [:seq, expr]
+ @expr = expr.is_a?(Array) ? expr : [:seq, expr].compact
@ebnf, @kind, @first, @follow, @start, @cleanup, @top_rule = ebnf, kind, first, follow, start, cleanup, top_rule
@top_rule ||= self
@kind ||= case
@@ -79,21 +114,53 @@ def initialize(sym, id, expr, kind: nil, ebnf: nil, first: nil, follow: nil, sta
when !BNF_OPS.include?(@expr.first) then :terminal
else :rule
end
+
+ # Allow @pass and @terminals to not be named
+ @sym ||= :_pass if @kind == :pass
+ @sym ||= :_terminals if @kind == :terminals
+
+ raise ArgumentError, "Rule sym must be a symbol, was #{@sym.inspect}" unless @sym.is_a?(Symbol)
+ raise ArgumentError, "Rule id must be a string or nil, was #{@id.inspect}" unless (@id || "").is_a?(String)
+ raise ArgumentError, "Rule kind must be one of :rule, :terminal, :terminals, or :pass, was #{@kind.inspect}" unless
+ @kind.is_a?(Symbol) && %w(rule terminal terminals pass).map(&:to_sym).include?(@kind)
+
+ case @expr.first
+ when :alt
+ raise ArgumentError, "#{@expr.first} operation must have at least one operand, had #{@expr.length - 1}" unless @expr.length > 1
+ when :diff
+ raise ArgumentError, "#{@expr.first} operation must have exactly two operands, had #{@expr.length - 1}" unless @expr.length == 3
+ when :hex, :istr, :not, :opt, :plus, :range, :star
+ raise ArgumentError, "#{@expr.first} operation must have exactly one operand, had #{@expr.length - 1}" unless @expr.length == 2
+ when :rept
+ raise ArgumentError, "#{@expr.first} operation must have exactly three, had #{@expr.length - 1}" unless @expr.length == 4
+ raise ArgumentError, "#{@expr.first} operation must an non-negative integer minimum, was #{@expr[1]}" unless
+ @expr[1].is_a?(Integer) && @expr[1] >= 0
+ raise ArgumentError, "#{@expr.first} operation must an non-negative integer maximum or '*', was #{@expr[2]}" unless
+ @expr[2] == '*' || @expr[2].is_a?(Integer) && @expr[2] >= 0
+ when :seq
+ # It's legal to have a zero-length sequence
+ else
+ raise ArgumentError, "Rule expression must be an array using a known operator, was #{@expr.first}"
+ end
end
##
# Return a rule from its SXP representation:
#
# @example inputs
- # (pass (plus (range "#x20\\t\\r\\n")))
+ # (pass _pass (plus (range "#x20\\t\\r\\n")))
# (rule ebnf "1" (star (alt declaration rule)))
- # (terminal O_ENUM "17" (seq "[^" (plus CHAR) "]"))
+ # (terminal R_CHAR "19" (diff CHAR (alt "]" "-")))
#
# Also may have `(first ...)`, `(follow ...)`, or `(start #t)`.
#
- # @param [Array] sxp
+ # @param [String, Array] sxp
# @return [Rule]
def self.from_sxp(sxp)
+ if sxp.is_a?(String)
+ require 'sxp' unless defined?(SXP)
+ sxp = SXP.parse(sxp)
+ end
expr = sxp.detect {|e| e.is_a?(Array) && ![:first, :follow, :start].include?(e.first.to_sym)}
first = sxp.detect {|e| e.is_a?(Array) && e.first.to_sym == :first}
first = first[1..-1] if first
@@ -115,11 +182,11 @@ def self.from_sxp(sxp)
# @param [Hash{Symbol => Symbol}] cleanup (nil)
# @param [Hash{Symbol => Object}] options
def build(expr, kind: nil, cleanup: nil, **options)
- new_sym, new_id = (@top_rule ||self).send(:make_sym_id)
+ new_sym, new_id = @top_rule.send(:make_sym_id)
self.class.new(new_sym, new_id, expr,
kind: kind,
ebnf: @ebnf,
- top_rule: (@top_rule || self),
+ top_rule: @top_rule,
cleanup: cleanup,
**options)
end
@@ -152,15 +219,16 @@ def to_sxp
# @return [String]
def to_ttl
@ebnf.debug("to_ttl") {inspect} if @ebnf
- comment = orig.to_s.strip.
- gsub(/"""/, '\"\"\"').
- gsub("\\", "\\\\").
- sub(/^\"/, '\"').
- sub(/\"$/m, '\"')
- statements = [
- %{:#{id} rdfs:label "#{id}"; rdf:value "#{sym}";},
- %{ rdfs:comment #{comment.inspect};},
- ]
+ statements = [%{:#{sym} rdfs:label "#{sym}";}]
+ if orig
+ comment = orig.to_s.strip.
+ gsub(/"""/, '\"\"\"').
+ gsub("\\", "\\\\").
+ sub(/^\"/, '\"').
+ sub(/\"$/m, '\"')
+ statements << %{ rdfs:comment #{comment.inspect};}
+ end
+ statements << %{ dc:identifier "#{id}";} if id
statements += ttl_expr(expr, terminal? ? "re" : "g", 1, false)
"\n" + statements.join("\n")
@@ -175,12 +243,13 @@ def to_ruby
##
# Transform EBNF rule to BNF rules:
#
- # * Transform (rule a "n" (op1 (op2))) into two rules:
- # (rule a "n" (op1 _a_1))
- # (rule _a_1 "n.1" (op2))
- # * Transform (rule a (opt b)) into (rule a (alt _empty b))
- # * Transform (rule a (star b)) into (rule a (alt _empty (seq b a)))
- # * Transform (rule a (plus b)) into (rule a (seq b (star b)
+ # * Transform `(rule a "n" (op1 (op2)))` into two rules:
+ #
+ # (rule a "n" (op1 _a_1))
+ # (rule _a_1 "n.1" (op2))
+ # * Transform `(rule a (opt b))` into `(rule a (alt _empty b))`
+ # * Transform `(rule a (star b))` into `(rule a (alt _empty (seq b a)))`
+ # * Transform `(rule a (plus b))` into `(rule a (seq b (star b)`
#
# Transformation includes information used to re-construct non-transformed.
#
@@ -231,7 +300,7 @@ def to_bnf
# Otherwise, no further transformation necessary
new_rules << self
elsif [:diff, :hex, :range].include?(expr.first)
- # This rules are fine, the just need to be terminals
+ # This rules are fine, they just need to be terminals
raise "Encountered #{expr.first.inspect}, which is a #{self.kind}, not :terminal" unless self.terminal?
new_rules << self
else
@@ -245,9 +314,14 @@ def to_bnf
##
# Transform EBNF rule for PEG:
#
- # * Transform (rule a "n" (op1 ... (op2 y) ...z)) into two rules:
- # (rule a "n" (op1 ... _a_1 ... z))
- # (rule _a_1 "n.1" (op2 y))
+ # * Transform `(rule a "n" (op1 ... (op2 y) ...z))` into two rules:
+ #
+ # (rule a "n" (op1 ... _a_1 ... z))
+ # (rule _a_1 "n.1" (op2 y))
+ # * Transform `(rule a "n" (diff op1 op2))` into two rules:
+ #
+ # (rule a "n" (seq _a_1 op1))
+ # (rule _a_1 "n.1" (not op1))
#
# @return [Array]
def to_peg
@@ -268,8 +342,14 @@ def to_peg
# Return new rules after recursively applying #to_bnf
new_rules = new_rules.map {|r| r.to_peg}.flatten
- elsif [:diff, :hex, :range].include?(expr.first)
- # This rules are fine, the just need to be terminals
+ elsif expr.first == :diff && !terminal?
+ this = dup
+ new_rule = build([:not, expr[2]])
+ this.expr = [:seq, new_rule.sym, expr[1]]
+ new_rules << this
+ new_rules << new_rule
+ elsif [:hex, :istr, :range].include?(expr.first)
+ # This rules are fine, they just need to be terminals
raise "Encountered #{expr.first.inspect}, which is a #{self.kind}, not :terminal" unless self.terminal?
new_rules << self
else
@@ -287,6 +367,8 @@ def to_regexp
case expr.first
when :hex
Regexp.new(translate_codepoints(expr[1]))
+ when :istr
+ /#{expr.last}/ui
when :range
Regexp.new("[#{translate_codepoints(expr[1])}]")
else
@@ -294,45 +376,170 @@ def to_regexp
end
end
- # Return the non-terminals for this rule. For seq, this is the first
- # non-terminal in the sequence. For alt, this is every non-terminal in the alt.
+ # Is this a terminal?
+ #
+ # @return [Boolean]
+ def terminal?
+ kind == :terminal
+ end
+
+ # Is this a pass?
+ # @return [Boolean]
+ def pass?
+ kind == :pass
+ end
+
+ # Is this a rule?
+ # @return [Boolean]
+ def rule?
+ kind == :rule
+ end
+
+ # Is this rule of the form (alt ...)?
+ def alt?
+ expr.is_a?(Array) && expr.first == :alt
+ end
+
+ # Is this rule of the form (seq ...)?
+ def seq?
+ expr.is_a?(Array) && expr.first == :seq
+ end
+
+ def inspect
+ "#"
+ end
+
+ # Two rules are equal if they have the same {#sym}, {#kind} and {#expr}.
+ #
+ # @param [Rule] other
+ # @return [Boolean]
+ def ==(other)
+ sym == other.sym &&
+ kind == other.kind &&
+ expr == other.expr
+ end
+
+ # Two rules are equivalent if they have the same {#expr}.
+ #
+ # @param [Rule] other
+ # @return [Boolean]
+ def eql?(other)
+ expr == other.expr
+ end
+
+ # Rules compare using their ids
+ def <=>(other)
+ if id && other.id
+ if id == other.id
+ id.to_s <=> other.id.to_s
+ else
+ id.to_f <=> other.id.to_f
+ end
+ else
+ sym.to_s <=> other.sym.to_s
+ end
+ end
+
+ ##
+ # Utility function to translate code points of the form '#xN' into ruby unicode characters
+ def translate_codepoints(str)
+ str.gsub(/#x\h+/) {|c| c[2..-1].scanf("%x").first.chr(Encoding::UTF_8)}
+ end
+
+ # Return the non-terminals for this rule.
+ #
+ # * `alt` => this is every non-terminal.
+ # * `diff` => this is every non-terminal.
+ # * `hex` => nil
+ # * `istr` => nil
+ # * `not` => this is the last expression, if any.
+ # * `opt` => this is the last expression, if any.
+ # * `plus` => this is the last expression, if any.
+ # * `range` => nil
+ # * `rept` => this is the last expression, if any.
+ # * `seq` => this is the first expression in the sequence, if any.
+ # * `star` => this is the last expression, if any.
#
# @param [Array] ast
# The set of rules, used to turn symbols into rules
+ # @param [Array] expr (@expr)
+ # The expression to check, defaults to the rule expression.
+ # Typically, if the expression is recursive, the embedded expression is called recursively.
# @return [Array]
- def non_terminals(ast)
- @non_terms ||= (alt? ? expr[1..-1] : expr[1,1]).map do |sym|
+ # @note this is used for LL(1) tansformation, so rule types are limited
+ def non_terminals(ast, expr = @expr)
+ ([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).map do |sym|
case sym
when Symbol
r = ast.detect {|r| r.sym == sym}
r if r && r.rule?
+ when Array
+ non_terminals(ast, sym)
else
nil
end
- end.compact
+ end.flatten.compact.uniq
end
- # Return the terminals for this rule. For seq, this is the first
- # terminals or strings in the seq. For alt, this is every non-terminal ni the alt.
+ # Return the terminals for this rule.
+ #
+ # * `alt` => this is every terminal.
+ # * `diff` => this is every terminal.
+ # * `hex` => nil
+ # * `istr` => nil
+ # * `not` => this is the last expression, if any.
+ # * `opt` => this is the last expression, if any.
+ # * `plus` => this is the last expression, if any.
+ # * `range` => nil
+ # * `rept` => this is the last expression, if any.
+ # * `seq` => this is the first expression in the sequence, if any.
+ # * `star` => this is the last expression, if any.
#
# @param [Array] ast
# The set of rules, used to turn symbols into rules
+ # @param [Array] expr (@expr)
+ # The expression to check, defaults to the rule expression.
+ # Typically, if the expression is recursive, the embedded expression is called recursively.
# @return [Array]
- def terminals(ast)
- @terms ||= (alt? ? expr[1..-1] : expr[1,1]).map do |sym|
+ # @note this is used for LL(1) tansformation, so rule types are limited
+ def terminals(ast, expr = @expr)
+ ([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).map do |sym|
case sym
when Symbol
r = ast.detect {|r| r.sym == sym}
r if r && r.terminal?
when String
sym
- else
- nil
+ when Array
+ terminals(ast, sym)
end
- end.compact
+ end.flatten.compact.uniq
end
- # Does this rule start with a sym? It does if expr is that sym,
+ # Return the symbols used in the rule.
+ #
+ # @param [Array] expr (@expr)
+ # The expression to check, defaults to the rule expression.
+ # Typically, if the expression is recursive, the embedded expression is called recursively.
+ # @return [Array]
+ def symbols(expr = @expr)
+ expr[1..-1].map do |sym|
+ case sym
+ when Symbol
+ sym
+ when Array
+ symbols(sym)
+ end
+ end.flatten.compact.uniq
+ end
+
+ ##
+ # The following are used for LL(1) transformation.
+ ##
+
+ # Does this rule start with `sym`? It does if expr is that sym,
# expr starts with alt and contains that sym,
# or expr starts with seq and the next element is that sym.
#
@@ -349,6 +556,92 @@ def starts_with?(sym)
end
end
+ ##
+ # Validate the rule, with respect to an AST.
+ #
+ # @param [Array] ast
+ # The set of rules, used to turn symbols into rules
+ # @param [Array] expr (@expr)
+ # The expression to check, defaults to the rule expression.
+ # Typically, if the expression is recursive, the embedded expression is called recursively.
+ # @raise [RangeError]
+ def validate!(ast, expr = @expr)
+ op = expr.first
+ raise SyntaxError, "Unknown operator: #{op}" unless OP_ARGN.key?(op)
+ raise SyntaxError, "Argument count missmatch on operator #{op}, had #{expr.length - 1} expected #{OP_ARGN[op]}" if
+ OP_ARGN[op] && OP_ARGN[op] != expr.length - 1
+
+ # rept operator needs min and max
+ if op == :alt
+ raise SyntaxError, "alt operation must have at least one operand, had #{expr.length - 1}" unless expr.length > 1
+ elsif op == :rept
+ raise SyntaxError, "rept operation must an non-negative integer minimum, was #{expr[1]}" unless
+ expr[1].is_a?(Integer) && expr[1] >= 0
+ raise SyntaxError, "rept operation must an non-negative integer maximum or '*', was #{expr[2]}" unless
+ expr[2] == '*' || expr[2].is_a?(Integer) && expr[2] >= 0
+ end
+
+ case op
+ when :hex
+ raise SyntaxError, "Hex operand must be of form '#xN+': #{sym}" unless expr.last.match?(/^#x\h+$/)
+ when :range
+ str = expr.last.dup
+ str = str[1..-1] if str.start_with?('^')
+ str = str[0..-2] if str.end_with?('-') # Allowed at end of range
+ scanner = StringScanner.new(str)
+ hex = rchar = in_range = false
+ while !scanner.eos?
+ begin
+ if scanner.scan(Terminals::HEX)
+ raise SyntaxError if in_range && rchar
+ rchar = in_range = false
+ hex = true
+ elsif scanner.scan(Terminals::R_CHAR)
+ raise SyntaxError if in_range && hex
+ hex = in_range = false
+ rchar = true
+ else
+ raise(SyntaxError, "Range contains illegal components at offset #{scanner.pos}: was #{expr.last}")
+ end
+
+ if scanner.scan(/\-/)
+ raise SyntaxError if in_range
+ in_range = true
+ end
+ rescue SyntaxError
+ raise(SyntaxError, "Range contains illegal components at offset #{scanner.pos}: was #{expr.last}")
+ end
+ end
+ else
+ ([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).each do |sym|
+ case sym
+ when Symbol
+ r = ast.detect {|r| r.sym == sym}
+ raise SyntaxError, "No rule found for #{sym}" unless r
+ when Array
+ validate!(ast, sym)
+ when String
+ raise SyntaxError, "String must be of the form CHAR*" unless sym.match?(/^#{Terminals::CHAR}*$/)
+ end
+ end
+ end
+ end
+
+ ##
+ # Validate the rule, with respect to an AST.
+ #
+ # Uses `#validate!` and catches `RangeError`
+ #
+ # @param [Array] ast
+ # The set of rules, used to turn symbols into rules
+ # @return [Boolean]
+ def valid?(ast)
+ validate!(ast)
+ true
+ rescue SyntaxError
+ false
+ end
+
# Do the firsts of this rule include the empty string?
#
# @return [Boolean]
@@ -381,79 +674,6 @@ def add_follow(terminals)
terminals.length
end
- # Is this a terminal?
- #
- # @return [Boolean]
- def terminal?
- kind == :terminal
- end
-
- # Is this a pass?
- # @return [Boolean]
- def pass?
- kind == :pass
- end
-
- # Is this a rule?
- # @return [Boolean]
- def rule?
- kind == :rule
- end
-
- # Is this rule of the form (alt ...)?
- def alt?
- expr.is_a?(Array) && expr.first == :alt
- end
-
- # Is this rule of the form (seq ...)?
- def seq?
- expr.is_a?(Array) && expr.first == :seq
- end
-
- # Is this rule of the form (alt ...)?
- def alt?
- expr.is_a?(Array) && expr.first == :alt
- end
-
- def inspect
- "#"
- end
-
- # Two rules are equal if they have the same {#sym}, {#kind} and {#expr}.
- #
- # @param [Rule] other
- # @return [Boolean]
- def ==(other)
- sym == other.sym &&
- kind == other.kind &&
- expr == other.expr
- end
-
- # Two rules are equivalent if they have the same {#expr}.
- #
- # @param [Rule] other
- # @return [Boolean]
- def equivalent?(other)
- expr == other.expr
- end
-
- # Rules compare using their ids
- def <=>(other)
- if id.to_i == other.id.to_i
- id.to_s <=> other.id.to_s
- else
- id.to_i <=> other.id.to_i
- end
- end
-
- ##
- # Utility function to translate code points of the form '#xN' into ruby unicode characters
- def translate_codepoints(str)
- str.gsub(/#x\h+/) {|c| c[2..-1].scanf("%x").first.chr(Encoding::UTF_8)}
- end
-
private
def ttl_expr(expr, pfx, depth, is_obj = true)
indent = ' ' * depth
@@ -469,17 +689,28 @@ def ttl_expr(expr, pfx, depth, is_obj = true)
case op
when :seq, :alt, :diff
+ # Multiple operands
statements << %{#{indent}#{bra}#{pfx}:#{op} (}
expr.each {|a| statements += ttl_expr(a, pfx, depth + 1)}
statements << %{#{indent} )#{ket}}
- when :opt, :plus, :star
+ when :opt, :plus, :star, :not
+ # Single operand
statements << %{#{indent}#{bra}#{pfx}:#{op} }
statements += ttl_expr(expr.first, pfx, depth + 1)
statements << %{#{indent} #{ket}} unless ket.empty?
+ when :rept
+ # Three operands (min, max and expr)
+ statements << %{ #{indent}#{pfx}:min #{expr[0].inspect};}
+ statements << %{ #{indent}#{pfx}:max #{expr[1].inspect};}
+ statements << %{#{indent}#{bra}#{pfx}:#{op} }
+ statements += ttl_expr(expr.last, pfx, depth + 1)
+ statements << %{#{indent} #{ket}} unless ket.empty?
when :_empty, :_eps
statements << %{#{indent}"g:#{op.to_s[1..-1]}"}
when :"'"
statements << %{#{indent}"#{esc(expr)}"}
+ when :istr
+ statements << %{#{indent}#{bra} re:matches #{expr.first.inspect} #{ket}}
when :range
statements << %{#{indent}#{bra} re:matches #{cclass(expr.first).inspect} #{ket}}
when :hex
@@ -535,7 +766,7 @@ def cclass(txt)
def make_sym_id(variation = nil)
@id_seq ||= 0
@id_seq += 1
- ["_#{@sym}_#{@id_seq}#{variation}".to_sym, "#{@id}.#{@id_seq}#{variation}"]
+ ["_#{@sym}_#{@id_seq}#{variation}".to_sym, ("#{@id}.#{@id_seq}#{variation}" if @id)]
end
end
end
\ No newline at end of file
diff --git a/lib/ebnf/terminals.rb b/lib/ebnf/terminals.rb
index 68e969b..24e498b 100644
--- a/lib/ebnf/terminals.rb
+++ b/lib/ebnf/terminals.rb
@@ -1,18 +1,21 @@
# encoding: utf-8
# Terminal definitions for the EBNF grammar
module EBNF::Terminals
- SYMBOL = %r([a-zA-Z0-9_\.]+)u.freeze
- HEX = %r(\#x[a-fA-F0-9]+)u.freeze
+ SYMBOL_BASE = %r(\b[a-zA-Z0-9_\.]+\b)u.freeze
+ SYMBOL = %r(#{SYMBOL_BASE}(?!\s*::=))u.freeze
+ HEX = %r(\#x\h+)u.freeze
CHAR = %r([\u0009\u000A\u000D\u0020-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
- R_CHAR = %r([\u0009\u000A\u000D\u0020-\u005C\u005E-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
- RANGE = %r(\[(?:(?:#{R_CHAR})\-(?:#{R_CHAR})|(?:#{HEX})-(?:#{HEX}))\])u.freeze
- ENUM_BASE = %r(\[(?:(?:#{R_CHAR})+|(?:#{HEX})+)\])u.freeze
- ENUM = %r((?:#{ENUM_BASE})(?!\s+#{SYMBOL}))u.freeze
- LHS = %r(\[(?:(?:#{SYMBOL})+\]\s+)?(?:#{SYMBOL})\s*::=)u.freeze
- O_RANGE = %r(\[^(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}-#{HEX})\])u.freeze
- O_ENUM = %r(\[^(?:#{R_CHAR})+\])u.freeze
+ R_CHAR = %r([\u0009\u000A\u000D\u0020-\u002C\u002E-\u005C\u005E-\uD7FF\u{10000}-\u{10FFFF}])u.freeze
+ RANGE = %r(\[(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX})|#{R_CHAR}|#{HEX})+-?\](?!\s+#{SYMBOL_BASE}\s*::=))u.freeze
+ LHS = %r((?:\[#{SYMBOL_BASE}\])?\s*#{SYMBOL_BASE}\s*::=)u.freeze
+ O_RANGE = %r(\[\^(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX}|#{R_CHAR}|#{HEX}))+-?\])u.freeze
STRING1 = %r("[\u0009\u000A\u000D\u0020\u0021\u0023-\uD7FF\u{10000}-\u{10FFFF}]*")u.freeze
STRING2 = %r('[\u0009\u000A\u000D\u0020-\u0026\u0028-\uD7FF\u{10000}-\u{10FFFF}]*')u.freeze
POSTFIX = %r([?*+])u.freeze
- PASS = %r((\s|(?:(#[^x]|//)[^\n\r]*$)|(?:/\*(?:(?:\*[^/])|[^*])*\*/))+)mu.freeze
+ PASS = %r((
+ \s
+ | (?:(?:\#[^x]|//)[^\n\r]*)
+ | (?:/\*(?:(?:\*[^/])|[^*])*\*/)
+ | (?:\(\*(?:(?:\*[^\)])|[^*])*\*\))
+ )+)xmu.freeze
end
diff --git a/lib/ebnf/writer.rb b/lib/ebnf/writer.rb
index ba5aabc..e64cf62 100644
--- a/lib/ebnf/writer.rb
+++ b/lib/ebnf/writer.rb
@@ -1,6 +1,7 @@
# -*- encoding: utf-8 -*-
require 'rdf'
require 'strscan' unless defined?(StringScanner)
+require "ostruct"
##
# Serialize ruleset back to EBNF
@@ -8,15 +9,53 @@ module EBNF
class Writer
LINE_LENGTH = 80
+ # ASCII escape names
+ ASCII_ESCAPE_NAMES = [
+ "null", #x00
+ "start of heading", #x01
+ "start of text", #x02
+ "end of text", #x03
+ "end of transmission", #x04
+ "enquiry", #x05
+ "acknowledge", #x06
+ "bell", #x07
+ "backspace", #x08
+ "horizontal tab", #x09
+ "new line", #x0A
+ "vertical tab", #x0B
+ "form feed", #x0C
+ "carriage return", #x0D
+ "shift out", #x0E
+ "shift in", #x0F
+ "data link escape", #x10
+ "device control 1", #x11
+ "device control 2", #x12
+ "device control 3", #x13
+ "device control 4", #x14
+ "negative acknowledge", #x15
+ "synchronous idle", #x16
+ "end of trans. block", #x17
+ "cancel", #x18
+ "end of medium", #x19
+ "substitute", #x1A
+ "escape", #x1B
+ "file separator", #x1C
+ "group separator", #x1D
+ "record separator", #x1E
+ "unit separator", #x1F
+ "space" #x20
+ ]
+
##
# Format rules to a String
#
# @param [Array] rules
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
# @return [Object]
- def self.string(*rules)
+ def self.string(*rules, format: :ebnf)
require 'stringio' unless defined?(StringIO)
buf = StringIO.new
- write(buf, *rules)
+ write(buf, *rules, format: format)
buf.string
end
@@ -24,9 +63,10 @@ def self.string(*rules)
# Format rules to $stdout
#
# @param [Array] rules
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
# @return [Object]
- def self.print(*rules)
- write($stdout, *rules)
+ def self.print(*rules, format: :ebnf)
+ write($stdout, *rules, format: format)
end
##
@@ -34,20 +74,22 @@ def self.print(*rules)
#
# @param [Object] out
# @param [Array] rules
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
# @return [Object]
- def self.write(out, *rules)
- Writer.new(rules, out: out)
+ def self.write(out, *rules, format: :ebnf)
+ Writer.new(rules, out: out, format: format)
end
##
# Write formatted rules to an IO like object as HTML
#
# @param [Array] rules
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
# @return [Object]
- def self.html(*rules)
+ def self.html(*rules, format: :ebnf)
require 'stringio' unless defined?(StringIO)
buf = StringIO.new
- Writer.new(rules, out: buf, html: true)
+ Writer.new(rules, out: buf, html: true, format: format)
buf.string
end
@@ -55,17 +97,24 @@ def self.html(*rules)
# @param [Array] rules
# @param [Hash{Symbol => Object}] options
# @param [#write] out ($stdout)
+ # @param [:abnf, :ebnf, :isoebnf] format (:ebnf)
# @option options [Symbol] format
# @option options [Boolean] html (false)
- def initialize(rules, out: $stdout, html: false, **options)
- @options = options.dup
+ def initialize(rules, out: $stdout, html: false, format: :ebnf, **options)
+ @options = options.merge(html: html)
+ return if rules.empty?
# Determine max LHS length
+ format_meth = "format_#{format}".to_sym
max_id = rules.max_by {|r| r.id.to_s.length}.id.to_s.length
max_sym = rules.max_by {|r| r.sym.to_s.length}.sym.to_s.length
- lhs_length = max_sym + 3
- lhs_fmt = "%-#{max_sym}s ::= "
- if max_id > 0
+ lhs_length = max_sym + 1
+ lhs_fmt = case format
+ when :abnf then "%-#{max_sym}s = "
+ when :ebnf then "%-#{max_sym}s ::= "
+ when :isoebnf then "%-#{max_sym}s = "
+ end
+ if format == :ebnf && max_id > 0
lhs_fmt = "%-#{max_id+2}s " + lhs_fmt
lhs_length += max_id + 3
end
@@ -74,49 +123,104 @@ def initialize(rules, out: $stdout, html: false, **options)
if html
# Output as formatted HTML
begin
- require 'haml'
- hout = Haml::Engine.new(HAML_DESC).render(self, rules: rules) do |rule|
- formatted_expr = format(rule.expr)
- formatted_expr.length > rhs_length ? format(rule.expr, "\n") : formatted_expr
- end
- out.write hout
+ require 'erubis'
+ eruby = Erubis::Eruby.new(ERB_DESC)
+ formatted_rules = rules.map do |rule|
+ if rule.kind == :terminals || rule.kind == :pass
+ OpenStruct.new(id: ("@#{rule.kind}"),
+ sym: nil,
+ assign: nil,
+ formatted: ("Productions for terminals" if rule.kind == :terminals))
+ else
+ formatted_expr = self.send(format_meth, rule.expr)
+ # Measure text without markup
+ formatted_expr_text = formatted_expr.gsub(%r{?\w+[^>]*>}, '')
+ if formatted_expr_text.length > rhs_length && (format != :abnf || rule.alt?)
+ lines = []
+ # Can only reasonably split apart alts
+ self.send(format_meth, rule.expr, sep: "--rule-extensions--").
+ split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
+ assign = case format
+ when :ebnf
+ formatted.sub!(%r{\s*\| \s*}, '')
+ (ndx > 0 ? (rule.alt? ? '|' : '') : '::=')
+ when :abnf
+ formatted.sub!(%r{\s*/ \s*}, '')
+ (ndx > 0 ? '=/' : '=')
+ else
+ formatted.sub!(%r{\s*\| \s*}, '')
+ (ndx > 0 ? (rule.alt? ? '|' : '') : '=')
+ end
+ lines << OpenStruct.new(id: ("[#{rule.id}]" if rule.id),
+ sym: (rule.sym if ndx == 0 || format == :abnf),
+ assign: assign,
+ formatted: formatted)
+ end
+ if format == :isoebnf
+ lines << OpenStruct.new(assign: ';')
+ end
+ lines
+ else
+ OpenStruct.new(id: ("[#{rule.id}]" if rule.id),
+ sym: rule.sym,
+ assign: (format == :ebnf ? '::=' : '='),
+ formatted: (formatted_expr + (format == :isoebnf ? ' ;' : '')))
+ end
+ end
+ end.flatten
+ out.write eruby.evaluate(format: format, rules: formatted_rules)
return
rescue LoadError
- $stderr.puts "Generating HTML requires haml gem to be loaded"
+ $stderr.puts "Generating HTML requires erubis gem to be loaded"
end
end
# Format each rule, considering the available rhs size
rules.each do |rule|
buffer = if rule.pass?
- "%-#{lhs_length-2}s" % "@pass"
+ "\n%-#{lhs_length-2}s " % "@pass"
+ elsif rule.kind == :terminals
+ "\n%-#{lhs_length-2}s" % "@terminals"
else
lhs_fmt % {id: "[#{rule.id}]", sym: rule.sym}
end
- formatted_expr = format(rule.expr)
- if formatted_expr.length > rhs_length
- buffer << format(rule.expr, ("\n" + " " * lhs_length))
+ formatted_expr = self.send(format_meth, rule.expr)
+ if formatted_expr.length > rhs_length && (format != :abnf || rule.alt?)
+ if format == :abnf
+ # No whitespace, use =/
+ self.send(format_meth, rule.expr, sep: "--rule-extensions--").
+ split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx|
+ if ndx > 0
+ buffer << "\n" + lhs_fmt.sub('= ', '=/') % {id: "[#{rule.id}]", sym: rule.sym}
+ end
+ buffer << formatted.sub(/\s*\/\s*/, '')
+ end
+ else
+ # Space out past "= "
+ buffer << self.send(format_meth, rule.expr, sep: ("\n" + " " * (lhs_length + (rule.alt? ? 2 : 4) - (format == :ebnf ? 0 : 2))))
+ buffer << ("\n" + " " * (lhs_length) + ';') if format == :isoebnf
+ end
else
- buffer << formatted_expr
+ buffer << formatted_expr + (format == :isoebnf ? ' ;' : '')
end
+ buffer << "\n\n" if [:terminals, :pass].include?(rule.kind)
out.puts(buffer)
end
end
protected
+
+ ##
+ # W3C EBNF Formatters
+ ##
+
# Format the expression part of a rule
- def format(expr, sep = nil)
+ def format_ebnf(expr, sep: nil, embedded: false)
return (@options[:html] ? %(#{expr}) : expr.to_s) if expr.is_a?(Symbol)
if expr.is_a?(String)
- if expr.length == 1
- return format_char(expr)
- elsif expr =~ /\A#x\h+/
- return (@options[:html] ? %(#{expr} ) : expr)
- elsif expr =~ /"/
- return (@options[:html] ? %('#{escape(expr, "'")} ') : %('#{escape(expr, "'")}'))
- else
- return (@options[:html] ? %("#{escape(expr, '"')} ") : %("#{escape(expr, '"')}"))
- end
+ return expr.length == 1 ?
+ format_ebnf_char(expr) :
+ format_ebnf_string(expr, expr.include?('"') ? "'" : '"')
end
parts = {
alt: (@options[:html] ? "| " : "| "),
@@ -129,40 +233,75 @@ def format(expr, sep = nil)
rparen = (@options[:html] ? ") " : ")")
case expr.first
+ when :istr
+ # Looses fidelity, but, oh well ...
+ format_ebnf(expr.last, embedded: true)
when :alt, :diff
this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
- expr[1..-1].map {|e| format(e)}.join(this_sep)
+ res = expr[1..-1].map {|e| format_ebnf(e, embedded: true)}.join(this_sep)
+ embedded ? (lparen + res + rparen) : res
when :star, :plus, :opt
- raise "Expected star expression to have a single operand" unless expr.length == 2
char = parts[expr.first.to_sym]
- r = format(expr[1])
- (r.start_with?("(") || Array(expr[1]).length == 1) ? "#{r}#{char}" : "(#{r})#{char}"
+ r = format_ebnf(expr[1], embedded: true)
+ "#{r}#{char}"
when :hex
- (@options[:html] ? %(#{expr.last} ) : expr.last)
+ escape_ebnf_hex(expr.last[2..-1].hex.chr(Encoding::UTF_8))
when :range
- format_range(expr.last)
+ format_ebnf_range(expr.last)
when :seq
this_sep = (sep ? sep : " ")
- expr[1..-1].map {|e| r = format(e); Array(e).length > 2 ? "#{lparen}#{r}#{rparen}" : r}.join(this_sep)
+ res = expr[1..-1].map do |e|
+ format_ebnf(e, embedded: true)
+ end.join(this_sep)
+ embedded ? (lparen + res + rparen) : res
+ when :rept
+ # Expand repetition
+ min, max, value = expr[1..-1]
+ if min == 0 && max == 1
+ format_ebnf([:opt, value], sep: sep, embedded: embedded)
+ elsif min == 0 && max == '*'
+ format_ebnf([:star, value], sep: sep, embedded: embedded)
+ elsif min == 1 && max == '*'
+ format_ebnf([:plus, value], sep: sep, embedded: embedded)
+ else
+ val2 = [:seq]
+ while min > 0
+ val2 << value
+ min -= 1
+ max -= 1 unless max == '*'
+ end
+ if max == '*'
+ val2 << [:star, value]
+ else
+ opt = nil
+ while max > 0
+ opt = [:opt, opt ? [:seq, value, opt] : value]
+ max -= 1
+ end
+ val2 << opt if opt
+ end
+ format_ebnf(val2, sep: sep, embedded: embedded)
+ end
else
raise "Unknown operator: #{expr.first}"
end
end
# Format a single-character string, prefering hex for non-main ASCII
- def format_char(c)
+ def format_ebnf_char(c)
case c.ord
- when 0x22 then (@options[:html] ? %('" ') : %{'"'})
- when (0x23..0x7e) then (@options[:html] ? %("#{c} ") : %{"#{c}"})
- else (@options[:html] ? %(#{escape_hex(c)} ) : escape_hex(c))
+ when (0x21) then (@options[:html] ? %("#{c} ") : %{"#{c}"})
+ when 0x22 then (@options[:html] ? %('" ') : %{'"'})
+ when (0x23..0x7e) then (@options[:html] ? %("#{c} ") : %{"#{c}"})
+ when (0x80..0xFFFD) then (@options[:html] ? %("#{c} ") : %{"#{c}"})
+ else escape_ebnf_hex(c)
end
end
# Format a range
- def format_range(string)
+ def format_ebnf_range(string)
lbrac = (@options[:html] ? "[ " : "[")
rbrac = (@options[:html] ? "] " : "]")
- dash = (@options[:html] ? "- " : "-")
buffer = lbrac
s = StringScanner.new(string)
@@ -171,53 +310,386 @@ def format_range(string)
when s.scan(/\A[!"\u0024-\u007e]+/)
buffer << (@options[:html] ? %(#{s.matched} ) : s.matched)
when s.scan(/\A#x\h+/)
- buffer << (@options[:html] ? %(#{s.matched} ) : s.matched)
- when s.scan(/\A-/)
- buffer << dash
+ buffer << escape_ebnf_hex(s.matched[2..-1].hex.chr(Encoding::UTF_8))
else
- buffer << (@options[:html] ? %(#{escape_hex(s.getch)} ) : escape_hex(s.getch))
+ buffer << escape_ebnf_hex(s.getch)
end
end
buffer + rbrac
end
# Escape a string, using as many UTF-8 characters as possible
- def escape(string, quote = '"')
- buffer = ""
+ def format_ebnf_string(string, quote = '"')
string.each_char do |c|
- buffer << case (u = c.ord)
- when (0x00..0x1f) then "#x%02X" % u
- when quote.ord then "#x%02X" % u
- else c
+ case c.ord
+ when 0x00..0x19, quote.ord
+ raise RangeError, "cannot format #{string.inspect} as an EBNF String: #{c.inspect} is out of range" unless
+ ISOEBNF::TERMINAL_CHARACTER.match?(c)
end
end
- buffer
+
+ "#{quote}#{string}#{quote}"
end
- def escape_hex(u)
+ def escape_ebnf_hex(u)
fmt = case u.ord
+ when 0x00..0x20 then "#x%02X"
when 0x0000..0x00ff then "#x%02X"
when 0x0100..0xffff then "#x%04X"
else "#x%08X"
end
- sprintf(fmt, u.ord)
- end
-
- HAML_DESC = %q(
- %table.grammar
- %tbody#grammar-productions
- - rules.each do |rule|
- %tr{id: "grammar-production-#{rule.sym}"}
- - if rule.pass?
- %td{colspan: 3}
- %code<="@pass"
- - else
- %td<= "[#{rule.id}]"
- %td<
- %code<= rule.sym
- %td<= "::="
- %td
- != yield rule
+ char = fmt % u.ord
+ if @options[:html]
+ if u.ord <= 0x20
+ char = %(#{char})
+ elsif u.ord < 0x7F
+ char = %(#{char})
+ elsif u.ord == 0x7F
+ char = %(#{char})
+ elsif u.ord <= 0xFF
+ char = %(#{char})
+ else
+ char = %(#{char})
+ end
+ %(#{char} )
+ else
+ char
+ end
+ end
+
+ ##
+ # ABNF Formatters
+ ##
+
+ # Format the expression part of a rule
+ def format_abnf(expr, sep: nil, embedded: false, sensitive: true)
+ return (@options[:html] ? %(#{expr}) : expr.to_s) if expr.is_a?(Symbol)
+ if expr.is_a?(String)
+ if expr.length == 1
+ return format_abnf_char(expr)
+ elsif expr.start_with?('%')
+ # Already encoded
+ return expr
+ elsif expr =~ /"/
+ # Split into segments
+ segments = expr.split('"')
+
+ return format_abnf_char(expr) if segments.empty?
+
+ seq = segments.inject([]) {|memo, s| memo.concat([[:hex, "#x22"], s])}[1..-1]
+ seq.unshift(:seq)
+ return format_abnf(seq, sep: nil, embedded: false)
+ else
+ return (@options[:html] ? %("#{'%s' if sensitive}#{expr} ") : %(#{'%s' if sensitive}"#{expr}"))
+ end
+ end
+ parts = {
+ alt: (@options[:html] ? "/ " : "/ "),
+ star: (@options[:html] ? "* " : "*"),
+ plus: (@options[:html] ? "+ " : "1*"),
+ opt: (@options[:html] ? "? " : "?")
+ }
+ lbrac = (@options[:html] ? "[ " : "[")
+ rbrac = (@options[:html] ? "] " : "]")
+ lparen = (@options[:html] ? "( " : "(")
+ rparen = (@options[:html] ? ") " : ")")
+
+ case expr.first
+ when :istr
+ # FIXME: if string part is segmented, need to do something different
+ format_abnf(expr.last, embedded: true, sensitive: false)
+ when :alt
+ this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
+ res = expr[1..-1].map {|e| format_abnf(e, embedded: true)}.join(this_sep)
+ embedded ? (lparen + res + rparen) : res
+ when :diff
+ raise RangeError, "ABNF does not support the diff operator"
+ when :opt
+ char = parts[expr.first.to_sym]
+ r = format_abnf(expr[1], embedded: true)
+ "#{lbrac}#{r}#{rbrac}"
+ when :plus, :star
+ char = parts[expr.first.to_sym]
+ r = format_abnf(expr[1], embedded: true)
+ "#{char}#{r}"
+ when :hex
+ escape_abnf_hex(expr.last[2..-1].hex.chr)
+ when :range
+ # Returns an [:alt] or [:not [:alt]] if composed of multiple sequences
+ # Note: ABNF does not support the `not` operator
+ res = format_abnf_range(expr.last)
+ res.is_a?(Array) ?
+ format_abnf(res, embedded: true) :
+ res
+ when :seq
+ this_sep = (sep ? sep : " ")
+ res = expr[1..-1].map do |e|
+ format_abnf(e, embedded: true)
+ end.join(this_sep)
+ embedded ? (lparen + res + rparen) : res
+ when :rept
+ # Expand repetition
+ min, max, value = expr[1..-1]
+ r = format_abnf(value, embedded: true)
+ if min == max
+ "#{min}#{r}"
+ elsif min == 0 && max == '*'
+ "#{parts[:star]}#{r}"
+ elsif min > 0 && max == '*'
+ "#{min}#{parts[:star]}#{r}"
+ else
+ "#{min}#{parts[:star]}#{max}#{r}"
+ end
+ else
+ raise "Unknown operator: #{expr.first}"
+ end
+ end
+
+ # Format a single-character string, prefering hex for non-main ASCII
+ def format_abnf_char(c)
+ if /[\x20-\x21\x23-\x7E]/.match?(c)
+ c.inspect
+ else
+ escape_abnf_hex(c)
+ end
+ end
+
+ # Format a range
+ #
+ # Presumes range has already been validated
+ def format_abnf_range(string)
+ alt, o_dash = [:alt], false
+
+ raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
+
+ if string.end_with?('-')
+ o_dash = true
+ string = string[0..-2]
+ end
+
+ scanner = StringScanner.new(string)
+ hexes, deces = [], []
+ in_range = false
+ # Build op (alt) from different ranges/enums
+ while !scanner.eos?
+ if hex = scanner.scan(Terminals::HEX)
+ # Append any decimal values
+ alt << "%d" + deces.join(".") unless deces.empty?
+ deces = []
+
+ if in_range
+ # Add "." sequences for any previous hexes
+ alt << "%x" + hexes[0..-2].join(".") if hexes.length > 1
+ alt << "%x#{hexes.last}-#{hex[2..-1]}"
+ in_range, hexes = false, []
+ else
+ hexes << hex[2..-1]
+ end
+ elsif dec = scanner.scan(Terminals::R_CHAR)
+ # Append any hexadecimal values
+ alt << "%x" + hexes.join(".") unless hexes.empty?
+ hexes = []
+
+ if in_range
+ # Add "." sequences for any previous hexes
+ alt << "%d" + deces[0..-2].join(".") if deces.length > 1
+ alt << "%d#{deces.last}-#{dec.codepoints.first}"
+ in_range, deces = false, []
+ else
+ deces << dec.codepoints.first.to_s
+ end
+ end
+
+ in_range = true if scanner.scan(/\-/)
+ end
+
+ deces << '45' if o_dash
+
+ # Append hexes and deces as "." sequences (should be only one)
+ alt << "%d" + deces.join(".") unless deces.empty?
+ alt << "%x" + hexes.join(".") unless hexes.empty?
+
+ # FIXME: HTML abbreviations?
+ if alt.length == 2
+ # Just return the range or enum
+ alt.last
+ else
+ # Return the alt, which will be further formatted
+ alt
+ end
+ end
+
+ def escape_abnf_hex(u)
+ fmt = case u.ord
+ when 0x0000..0x00ff then "%02X"
+ when 0x0100..0xffff then "%04X"
+ else "%08X"
+ end
+ char = "%x" + (fmt % u.ord)
+ if @options[:html]
+ if u.ord <= 0x20
+ char = %(#{char})
+ elsif u.ord <= 0x7F
+ char = %(#{char})
+ elsif u.ord == 0x7F
+ char = %(#{char})
+ elsif u.ord <= 0xFF
+ char = %(#{char})
+ else
+ char = %(#{char})
+ end
+ %(#{char} )
+ else
+ char
+ end
+ end
+
+ ##
+ # ISO EBNF Formatters
+ ##
+
+ # Format the expression part of a rule
+ def format_isoebnf(expr, sep: nil, embedded: false)
+ return (@options[:html] ? %(#{expr}) : expr.to_s) if expr.is_a?(Symbol)
+ if expr.is_a?(String)
+ expr = expr[2..-1].hex.chr if expr =~ /\A#x\h+/
+ expr.chars.each do |c|
+ raise RangeError, "cannot format #{expr.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless
+ ISOEBNF::TERMINAL_CHARACTER.match?(c)
+ end
+ if expr =~ /"/
+ return (@options[:html] ? %('#{expr} ') : %('#{expr}'))
+ else
+ return (@options[:html] ? %("#{expr} ") : %("#{expr}"))
+ end
+ end
+ parts = {
+ alt: (@options[:html] ? "| " : "| "),
+ diff: (@options[:html] ? "- " : "- "),
+ }
+ lparen = (@options[:html] ? "( " : "(")
+ rparen = (@options[:html] ? ") " : ")")
+
+ case expr.first
+ when :istr
+ # Looses fidelity, but, oh well ...
+ format_isoebnf(expr.last, embedded: true)
+ when :alt, :diff
+ this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
+ res = expr[1..-1].map {|e| format_isoebnf(e, embedded: true)}.join(this_sep)
+ embedded ? (lparen + res + rparen) : res
+ when :opt
+ r = format_isoebnf(expr[1], embedded: true)
+ "[#{r}]"
+ when :star
+ r = format_isoebnf(expr[1], embedded: true)
+ "{#{r}}"
+ when :plus
+ r = format_isoebnf(expr[1], embedded: true)
+ "#{r}, {#{r}}"
+ when :hex
+ format_isoebnf(expr[1], embedded: true)
+ when :range
+ res = format_isoebnf_range(expr.last)
+ res.is_a?(Array) ?
+ format_isoebnf(res, embedded: true) :
+ res
+ when :seq
+ this_sep = "," + (sep ? sep : " ")
+ res = expr[1..-1].map do |e|
+ format_isoebnf(e, embedded: true)
+ end.join(this_sep)
+ embedded ? (lparen + res + rparen) : res
+ when :rept
+ # Expand repetition
+ min, max, value = expr[1..-1]
+ if min == 0 && max == 1
+ format_isoebnf([:opt, value], sep: sep, embedded: embedded)
+ elsif min == 0 && max == '*'
+ format_isoebnf([:star, value], sep: sep, embedded: embedded)
+ elsif min == 1 && max == '*'
+ format_isoebnf([:plus, value], sep: sep, embedded: embedded)
+ else
+ val2 = [:seq]
+ while min > 0
+ val2 << value
+ min -= 1
+ max -= 1 unless max == '*'
+ end
+ if max == '*'
+ val2 << [:star, value]
+ else
+ opt = nil
+ while max > 0
+ opt = [:opt, opt ? [:seq, value, opt] : value]
+ max -= 1
+ end
+ val2 << opt if opt
+ end
+ format_isoebnf(val2, sep: sep, embedded: embedded)
+ end
+ else
+ raise "Unknown operator: #{expr.first}"
+ end
+ end
+
+ # Format a range
+ # Range is formatted as a aliteration of characters
+ def format_isoebnf_range(string)
+ chars = []
+ o_dash = false
+
+ raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^')
+
+ if string.end_with?('-')
+ o_dash = true
+ string = string[0..-2]
+ end
+
+ scanner = StringScanner.new(string)
+ in_range = false
+ # Build chars from different ranges/enums
+ while !scanner.eos?
+ char = if hex = scanner.scan(Terminals::HEX)
+ hex[2..-1].hex.ord.char(Encoding::UTF_8)
+ else scanner.scan(Terminals::R_CHAR)
+ end
+ raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration: #{char.inspect} is out of range" unless
+ char && ISOEBNF::TERMINAL_CHARACTER.match?(char)
+
+ if in_range
+ # calculate characters from chars.last to this char
+ raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration" unless chars.last < char
+ chars.concat (chars.last..char).to_a[1..-1]
+ in_range = false
+ else
+ chars << char
+ end
+
+ in_range = true if scanner.scan(/\-/)
+ end
+
+ chars << '-' if o_dash
+
+ # Possibly only a single character (no character?)
+ chars.length == 1 ? chars.last.inspect : chars.unshift(:alt)
+ end
+
+ ERB_DESC = %q(
+
+
+ <% for rule in @rules %>
+ >
+ <% if rule.id %>
+ <%= rule.id %> |
+ <% end %>
+ <%== rule.sym %> |
+ <%= rule.assign %> |
+ <%= rule.formatted %> |
+
+ <% end %>
+
+
).gsub(/^ /, '')
end
end
diff --git a/spec/abnf_spec.rb b/spec/abnf_spec.rb
new file mode 100644
index 0000000..91eae38
--- /dev/null
+++ b/spec/abnf_spec.rb
@@ -0,0 +1,253 @@
+# coding: utf-8
+$:.unshift "."
+require 'spec_helper'
+require 'ebnf'
+require 'sxp'
+
+describe EBNF::ABNF do
+ let(:logger) {RDF::Spec.logger}
+ after(:each) do |example|
+ puts logger.to_s if example.exception && !example.exception.is_a?(RSpec::Expectations::ExpectationNotMetError)
+ end
+
+ context "rule variations" do
+ {
+ "legal rule name": [
+ 'rulename = "foo"',
+ %{((terminal rulename (istr "foo")))}
+ ],
+ "binary character": [
+ "bin = %b11",
+ %{((terminal bin (hex "#x3")))}
+ ],
+ "binary string": [
+ "bin = %b1.10.11",
+ %{((rule bin (seq (hex "#x1") (hex "#x2") (hex "#x3"))))}
+ ],
+ "binary string (ascii range)": [
+ "bin = %b1100010.1101001.1101110",
+ %{((rule bin (seq "bin")))}
+ ],
+ "binary string (mixed range)": [
+ "bin = %b1100010.1.1101110",
+ %{((rule bin (seq "b" (hex "#x1") "n")))}
+ ],
+ "binary range": [
+ "bin = %b1100010-1101110",
+ %{((terminal bin (range "#x62-#x6e")))}
+ ],
+ "decimal char": [
+ "dec = %d22",
+ %{((terminal dec (hex "#x16")))}
+ ],
+ "decimal string": [
+ "dec = %d1.2.3",
+ %{((rule dec (seq (hex "#x1") (hex "#x2") (hex "#x3"))))}
+ ],
+ "decimal string (ascii range)": [
+ "dec = %d100.101.99",
+ %{((rule dec (seq "dec")))}
+ ],
+ "decimal string (mixed range)": [
+ "dec = %d100.1.99",
+ %{((rule dec (seq "d" (hex "#x1") "c")))}
+ ],
+ "decimal range": [
+ "dec = %d22-40",
+ %{((terminal dec (range "#x16-#x28")))}
+ ],
+ "hex character": [
+ "hex = %x1f",
+ %{((terminal hex (hex "#x1f")))}
+ ],
+ "hex string": [
+ "hex = %x1.a.c",
+ %{((rule hex (seq (hex "#x1") (hex "#xa") (hex "#xc"))))}
+ ],
+ "hex string (ascii range)": [
+ "hex = %x68.65.78",
+ %{((rule hex (seq "hex")))}
+ ],
+ "hex string (mixed range)": [
+ "hex = %x68.1.78",
+ %{((rule hex (seq "h" (hex "#x1") "x")))}
+ ],
+ "hex range": [
+ "hex = %x22-40",
+ %{((terminal hex (range "#x22-#x40")))}
+ ],
+ "aliteration": [
+ %(baz = foo / bar),
+ %{((rule baz (alt foo bar)))}
+ ],
+ "aliteration 2": [
+ %(buzz = foo / bar / baz),
+ %{((rule buzz (alt foo bar baz)))}
+ ],
+ "incremental alternatives": [
+ %(ruleset = alt1 / alt2\nruleset =/ alt3\nruleset =/ alt4 / alt5),
+ %{((rule ruleset (alt alt1 alt2 alt3 alt4 alt5)))}
+ ],
+ "concatenated chars and ranges": [
+ %(char-line = %x0D.0A %x20-7E %x0D.0A),
+ %{((rule char-line (seq (seq (hex "#xd") (hex "#xa")) (range "#x20-#x7e") (seq (hex "#xd") (hex "#xa")))))}
+ ],
+ "sequence group": [
+ %(sequence-group = elem (foo / bar) blat),
+ %{((rule sequence-group (seq elem (alt foo bar) blat)))}
+ ],
+ "rept *": [
+ %(rept = *A),
+ %{((rule rept (star A)))}
+ ],
+ "rept 0*": [
+ %(rept = 0*A),
+ %{((rule rept (star A)))}
+ ],
+ "rept 1*": [
+ %(rept = 1*A),
+ %{((rule rept (plus A)))}
+ ],
+ "rept 2*": [
+ %(rept = 2*A),
+ %{((rule rept (rept 2 "*" A)))}
+ ],
+ "rept *1": [
+ %(rept = *1A),
+ %{((rule rept (rept 0 1 A)))}
+ ],
+ "rept 0*2": [
+ %(rept = 0*2A),
+ %{((rule rept (rept 0 2 A)))}
+ ],
+ "rept 1*3": [
+ %(rept = 1*3A),
+ %{((rule rept (rept 1 3 A)))}
+ ],
+ "rept 3": [
+ %(rept = 3A),
+ %{((rule rept (rept 3 3 A)))}
+ ],
+ "opt": [
+ %(opt = [foo bar]),
+ %{((rule opt (opt (seq foo bar))))}
+ ],
+ "comment": [
+ %(foo = %x61 ; a),
+ %{((terminal foo (hex "#x61")))}
+ ],
+ "prose-value": [
+ %(prose = < free form >),
+ %{((rule prose (seq "< free form >")))}
+ ]
+ }.each do |title, (input, expect)|
+ it title do
+ input << "\n" unless input.end_with?("\n")
+ expect(parse(input).to_sxp).to produce(expect, logger)
+ end
+ end
+ end
+
+ context "Case-Sensitive String Support in ABNF" do
+ {
+ "case insensitive": [
+ %(rulename = %i"aBc"),
+ %{((terminal rulename (istr "aBc")))}
+ ],
+ "case sensitive": [
+ %(rulename = %s"aBc"),
+ %{((rule rulename (seq "aBc")))}
+ ],
+ }.each do |title, (input, expect)|
+ it title do
+ input << "\n" unless input.end_with?("\n")
+ expect(parse(input).to_sxp).to produce(expect, logger)
+ end
+ end
+ end
+
+ context "Core Rules" do
+ {
+ "ALPHA": [
+ "builtin = ALPHA",
+ %{((rule builtin (seq ALPHA)) (terminal ALPHA (range "#x41-#x5A#x61-#x7A")))}
+ ],
+ "BIT": [
+ "builtin = BIT",
+ %{((rule builtin (seq BIT)) (terminal BIT (alt "0" "1")))}
+ ],
+ "CR": [
+ "builtin = CR",
+ %{((rule builtin (seq CR)) (terminal CR (hex "#x0D")))}
+ ],
+ "CRLF": [
+ "builtin = CRLF",
+ %{((rule builtin (seq CRLF)) (terminal CRLF (seq (opt CR) LF)))}
+ ],
+ "CTL": [
+ "builtin = CTL",
+ %{((rule builtin (seq CTL)) (terminal CTL (alt (range "#x00-#x1F") (hex "#x7F"))))}
+ ],
+ "DIGIT": [
+ "builtin = DIGIT",
+ %{((rule builtin (seq DIGIT)) (terminal DIGIT (range "#x30-#x39")))}
+ ],
+ "DQUOTE": [
+ "builtin = DQUOTE",
+ %{((rule builtin (seq DQUOTE)) (terminal DQUOTE (hex "#x22")))}
+ ],
+ "HEXDIG": [
+ "builtin = HEXDIG",
+ %{((rule builtin (seq HEXDIG)) (terminal HEXDIG (alt DIGIT (range "A-F"))))}
+ ],
+ "HTAB": [
+ "builtin = HTAB",
+ %{((rule builtin (seq HTAB)) (terminal HTAB (hex "#x09")))}
+ ],
+ "LF": [
+ "builtin = LF",
+ %{((rule builtin (seq LF)) (terminal LF (hex "#x0A")))}
+ ],
+ "LWSP": [
+ "builtin = LWSP",
+ %{((rule builtin (seq LWSP)) (terminal LWSP (star (alt WSP (seq CRLF WSP)))))}
+ ],
+ "WSP": [
+ "builtin = WSP",
+ %{((rule builtin (seq WSP)) (terminal WSP (alt SP HTAB)))}
+ ],
+ }.each do |title, (input, expect)|
+ it title do
+ input << "\n" unless input.end_with?("\n")
+ expect(parse(input).to_sxp).to produce(expect, logger)
+ end
+ end
+ end
+
+ context "illegal syntax" do
+ {
+ "illegal rule name": "rule.name = CRLF\n",
+ "no line ending": "rule.name = CRLF",
+ "illegal binary": "bin = %b2\n",
+ "illegal binary range": "bin = %b10-20\n",
+ "illegal decimal": "dec = %d2f\n",
+ "illegal decimal range": "dec = %d22-4060-80\n",
+ "illegal hex": "hex = %x2g\n",
+ "illegal hex range": "hex = %x22-4060-80\n",
+ }.each do |title, input|
+ it title do
+ expect {parse(input)}.to raise_error(SyntaxError)
+ end
+ end
+ end
+
+ it "parses ABNF grammar" do
+ gram = parse(File.open(File.expand_path("../../etc/abnf.abnf", __FILE__)))
+ expect(gram).to be_valid
+ end
+
+ def parse(input, **options)
+ @debug = []
+ EBNF.parse(input, debug: @debug, format: :abnf, **options)
+ end
+end
diff --git a/spec/base_spec.rb b/spec/base_spec.rb
index 50d14d9..7510b48 100644
--- a/spec/base_spec.rb
+++ b/spec/base_spec.rb
@@ -3,35 +3,37 @@
require 'spec_helper'
require 'ebnf'
require 'sxp'
+require 'rdf/turtle'
describe EBNF::Base do
- describe ".new" do
+ subject {PARSED_EBNF_GRAMMAR.dup}
+
+ describe "#initialize" do
{
%{[2] Prolog ::= BaseDecl? PrefixDecl*} =>
%{((rule Prolog "2" (seq (opt BaseDecl) (star PrefixDecl))))},
%{
@terminals
- [3] terminal ::= [A-Z_]+
- } => %{((terminal terminal "3" (plus (range "A-Z_"))))},
+ [3] terminal ::= [A-Z]+
+ } => %{((terminals _terminals (seq))
+ (terminal terminal "3" (plus (range "A-Z"))))},
%{
[9] primary ::= HEX
| RANGE
- | ENUM
| O_RANGE
- | O_ENUM
| STRING1
| STRING2
| '(' expression ')'
- } => %{((rule primary "9" (alt HEX RANGE ENUM O_RANGE O_ENUM STRING1 STRING2 (seq "(" expression ")"))))},
+ } => %{((rule primary "9" (alt HEX RANGE O_RANGE STRING1 STRING2 (seq "(" expression ")"))))},
%{#[1] rule ::= 'FOO'} => %{()},
%{//[1] rule ::= 'FOO'} => %{()},
%{[18] SolutionModifier ::= _SolutionModifier_1 _SolutionModifier_2} =>
%{((rule SolutionModifier "18" (seq _SolutionModifier_1 _SolutionModifier_2)))},
%{[18.1] _SolutionModifier_1 ::= _empty | GroupClause} =>
%{((rule _SolutionModifier_1 "18.1" (alt _empty GroupClause)))},
- %q{[18] STRING1 ::= '"' (CHAR | [\t\'\[\]\(\)\-])* '"'} =>
- %q{((terminal STRING1 "18" (seq "\"" (star (alt CHAR (range "\t'[]()-"))) "\"")))},
+ %q{[18] STRING1 ::= '"' (CHAR - '"')* '"'} =>
+ %q{((terminal STRING1 "18" (seq "\"" (star (diff CHAR "\"")) "\"")))},
%q{[161s] WS ::= #x20 | #x9 | #xD | #xA} =>
%q{((terminal WS "161s" (alt (hex "#x20") (hex "#x9") (hex "#xD") (hex "#xA"))))},
%q{[1] shexDoc ::= directive* # leading CODE} =>
@@ -44,7 +46,7 @@
%q{((rule shexDoc "1" (star directive)))},
%q{[1] shexDoc ::= /* leading CODE */ directive*} =>
%q{((rule shexDoc "1" (star directive)))},
- %q{[1] shexDoc (* leading CODE *) ::= directive*} =>
+ %q{[1] shexDoc ::= (* leading CODE *) directive*} =>
%q{((rule shexDoc "1" (star directive)))},
%q{[156s] STRING_LITERAL1 ::= "'" ([^#x27#x5C#xA#xD] | ECHAR | UCHAR)* "'" /* #x27=' #x5C=\ #xA=new line #xD=carriage return */} =>
%q{((terminal STRING_LITERAL1 "156s"
@@ -60,12 +62,94 @@
expect(ast.to_sxp).to produce(expected, @debug)
end
end
+
+ it "rejects unknown format" do
+ expect {parse("foo", format: :unknown)}.to raise_error "unknown input format :unknown"
+ end
+ end
+
+ describe "#renumber!" do
+ it "creates identifiers for grammars without identifiers" do
+ gram = EBNF.parse("a ::= b d ::= e")
+ gram.renumber!
+ expect(gram.ast.map(&:id)).to eq %w{1 2}
+ end
+
+ it "renumbers grammars with identifiers" do
+ gram = EBNF.parse("[10] a ::= b [20] d ::= e")
+ gram.renumber!
+ expect(gram.ast.map(&:id)).to eq %w{1 2}
+ end
+ end
+
+ describe "#validate!" do
+ let(:simple) {EBNF.parse("a ::= b")}
+ it "notes invalid grammar" do
+ expect do
+ expect {simple.validate!}.to raise_error SyntaxError, "In rule a: No rule found for b"
+ end.to write(:something).to(:error)
+ end
+
+ it "validates EBNF" do
+ expect {subject.validate!}.not_to raise_error
+ end
+ end
+
+ describe "#valid?" do
+ let(:simple) {EBNF.parse("a ::= b")}
+ it "notes invalid grammar" do
+ expect do
+ expect(simple.valid?).to be_falsey
+ end.to write(:something).to(:error)
+ end
+
+ it "validates EBNF" do
+ expect(subject).to be_valid
+ end
+ end
+
+ describe "#each" do
+ it "yields each rule" do
+ rules = subject.ast.select {|r| r.rule?}
+ expect {|b| subject.each(:rule, &b)}.to yield_control.exactly(rules.length).times
+ end
+ it "yields each terminal" do
+ terminals = subject.ast.select {|r| r.terminal?}
+ expect {|b| subject.each(:terminal, &b)}.to yield_control.exactly(terminals.length).times
+ end
+ end
+
+ describe "#to_sxp" do
+ specify {expect(subject.to_sxp).to include("(rule ebnf")}
+ end
+
+ describe "#to_s" do
+ specify {expect(subject.to_s).to include("[1] ebnf")}
+ end
+
+ describe "#to_html" do
+ specify {expect(subject.to_s).to include("[1] ebnf")}
+ end
+
+ describe "#to_ruby" do
+ specify {expect {subject.to_ruby}.to write(:something).to(:output)}
+ end
+
+ describe "#to_ttl" do
+ let(:reader) {RDF::Turtle::Reader.new(subject.to_ttl, base_uri: 'http://example.org/')}
+ specify {expect(reader).to be_valid}
end
describe "#dup" do
specify {expect(parse(%{[2] Prolog ::= BaseDecl? PrefixDecl*}).dup).to be_a(EBNF::Base)}
end
+ describe "#find_rule" do
+ it "finds ebnf" do
+ expect(subject.find_rule(:ebnf).sym).to eql :ebnf
+ end
+ end
+
def parse(value, **options)
@debug = []
options = {debug: @debug}.merge(options)
diff --git a/spec/bnf_spec.rb b/spec/bnf_spec.rb
index 4fe65ab..137cfbb 100644
--- a/spec/bnf_spec.rb
+++ b/spec/bnf_spec.rb
@@ -16,16 +16,14 @@
%{
[9] primary ::= HEX
| RANGE
- | ENUM
| O_RANGE
- | O_ENUM
| STRING1
| STRING2
| '(' expression ')'
} =>
%{((rule _empty "0" (seq))
- (rule primary "9" (alt HEX RANGE ENUM O_RANGE O_ENUM STRING1 STRING2 _primary_1 ))
+ (rule primary "9" (alt HEX RANGE O_RANGE STRING1 STRING2 _primary_1 ))
(rule _primary_1 "9.1" (seq "(" expression ")")))},
%{
[1] R1 ::= 1 2
diff --git a/spec/ebnf_spec.rb b/spec/ebnf_spec.rb
index 4fdfd77..1664976 100644
--- a/spec/ebnf_spec.rb
+++ b/spec/ebnf_spec.rb
@@ -11,28 +11,25 @@
%{((rule Prolog "2" (seq (opt BaseDecl) (star PrefixDecl))))},
%{
@terminals
- [3] terminal ::= [A-Z_]+
- } => %{((terminal terminal "3" (plus (range "A-Z_"))))},
+ [3] terminal ::= [A-Z]+
+ } => %{((terminals _terminals (seq))
+ (terminal terminal "3" (plus (range "A-Z"))))},
%{
[9] primary ::= HEX
| RANGE
- | ENUM
| O_RANGE
- | O_ENUM
| STRING1
| STRING2
| '(' expression ')'
- } => %{((rule primary "9" (alt HEX RANGE ENUM O_RANGE O_ENUM STRING1 STRING2 (seq "(" expression ")"))))},
+ } => %{((rule primary "9" (alt HEX RANGE O_RANGE STRING1 STRING2 (seq "(" expression ")"))))},
%{
primary ::= HEX
| RANGE
- | ENUM
| O_RANGE
- | O_ENUM
| STRING1
| STRING2
| '(' expression ')'
- } => %{((rule primary (alt HEX RANGE ENUM O_RANGE O_ENUM STRING1 STRING2 (seq "(" expression ")"))))},
+ } => %{((rule primary (alt HEX RANGE O_RANGE STRING1 STRING2 (seq "(" expression ")"))))},
}.each do |input, expected|
context input do
subject {EBNF.parse(input)}
@@ -59,7 +56,7 @@
end
context "README" do
- let(:ebnf) {EBNF.parse(File.open(File.expand_path("../../etc/ebnf.ebnf", __FILE__)))}
+ let(:ebnf) {PARSED_EBNF_GRAMMAR.dup}
subject {ebnf}
it "creates ast" do
@@ -82,18 +79,24 @@
expect(subject.to_s).not_to be_empty
end
- context "BNF" do
+ context "LL1" do
before {subject.make_bnf}
- context "LL1" do
- before do
- subject.first_follow(:ebnf)
- subject.build_tables
- end
+ before do
+ subject.first_follow(:ebnf)
+ subject.build_tables
+ end
+
+ it "#to_ruby" do
+ expect {subject.to_ruby}.to write(:something).to(:output)
+ end
+ end
+
+ context "PEG" do
+ before {subject.make_peg}
- it "#to_ruby" do
- expect {subject.to_ruby}.to write(:something).to(:output)
- end
+ it "#to_ruby" do
+ expect {subject.to_ruby}.to write(:something).to(:output)
end
end
end
diff --git a/spec/examples/ebnf-parser-spec.rb b/spec/examples/ebnf-parser-spec.rb
deleted file mode 100644
index 9cd92f5..0000000
--- a/spec/examples/ebnf-parser-spec.rb
+++ /dev/null
@@ -1,46 +0,0 @@
-# coding: utf-8
-require 'spec_helper'
-$:.unshift(File.expand_path("../../../examples/ebnf-parser", __FILE__))
-require 'parser'
-
-describe EBNFParser do
- describe ".new" do
- {
- %{[2] Prolog ::= BaseDecl? PrefixDecl*} =>
- %{((rule Prolog "2" (seq (opt BaseDecl) (star PrefixDecl))))},
- %{
- @terminals
- [3] terminal ::= [A-Z_]+
- } => %{((terminal terminal "3" (plus (range "A-Z_"))))},
- %{
- [9] primary ::= HEX
- | RANGE
- | ENUM
- | O_RANGE
- | O_ENUM
- | STRING1
- | STRING2
- | '(' expression ')'
-
- } => %{((rule primary "9" (alt HEX RANGE ENUM O_RANGE O_ENUM STRING1 STRING2 (seq "(" expression ")"))))},
- %{#[1] rule ::= 'FOO'} => %{()},
- %{//[1] rule ::= 'FOO'} => %{()},
- %{[18] SolutionModifier ::= _SolutionModifier_1 _SolutionModifier_2} =>
- %{((rule SolutionModifier "18" (seq _SolutionModifier_1 _SolutionModifier_2)))},
- %{[18.1] _SolutionModifier_1 ::= _empty | GroupClause} =>
- %{((rule _SolutionModifier_1 "18.1" (alt _empty GroupClause)))},
- %q{[18] STRING1 ::= '"' ((CHAR - '"') | '\\t')* '"'} =>
- %q{((terminal STRING1 "18" (seq "\"" (star (alt (diff CHAR "\"") "\t")) "\"")))}
- }.each do |input, expected|
- it "parses #{input.inspect}" do
- expect(parse(input, validate: true).ast.to_sxp).to produce(expected, @debug)
- end
- end
- end
-
- def parse(value, **options)
- @debug = []
- options = {debug: @debug}.merge(options)
- EBNFParser.new(value, **options)
- end
-end
\ No newline at end of file
diff --git a/spec/isoebnf_spec.rb b/spec/isoebnf_spec.rb
new file mode 100644
index 0000000..ddb7fd1
--- /dev/null
+++ b/spec/isoebnf_spec.rb
@@ -0,0 +1,237 @@
+# coding: utf-8
+$:.unshift "."
+require 'spec_helper'
+require 'ebnf'
+require 'sxp'
+
+describe EBNF::ISOEBNF do
+ let(:logger) {RDF::Spec.logger}
+ after(:each) do |example|
+ puts logger.to_s if example.exception && !example.exception.is_a?(RSpec::Expectations::ExpectationNotMetError)
+ end
+
+ context "rule variations" do
+ {
+ "legal meta_identifier": [
+ 'rulename = "foo" ;',
+ %{((rule rulename (seq "foo")))}
+ ],
+ "digits": [
+ %{
+ digit_excluding_zero = "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
+ digit = "0" | digit_excluding_zero ;
+ },
+ %{((rule digit_excluding_zero (alt "1" "2" "3" "4" "5" "6" "7" "8" "9"))
+ (rule digit (alt "0" digit_excluding_zero)))}
+ ],
+ "sequence of numbers": [
+ %{
+ twelve = "1", "2" ;
+ two_hundred_one = "2", "0", "1" ;
+ three_hundred_twelve = "3", twelve ;
+ twelve_thousand_two_hundred_one = twelve, two_hundred_one ;
+ },
+ %{((rule twelve (seq "1" "2"))
+ (rule two_hundred_one (seq "2" "0" "1"))
+ (rule three_hundred_twelve (seq "3" twelve))
+ (rule twelve_thousand_two_hundred_one (seq twelve two_hundred_one)))}
+ ],
+ "natural number": [
+ %{natural_number = digit_excluding_zero, { digit } ;},
+ %{((rule natural_number (seq digit_excluding_zero (star digit))))}
+ ],
+ "integer": [
+ %{integer = "0" | [ "-" ], natural_number ;},
+ %{((rule integer (alt "0" (seq (opt "-") natural_number))))}
+ ],
+ "simple grammar": [
+ %q{
+ letter = "A" | "B" | "C" | "D" | "E" | "F" | "G"
+ | "H" | "I" | "J" | "K" | "L" | "M" | "N"
+ | "O" | "P" | "Q" | "R" | "S" | "T" | "U"
+ | "V" | "W" | "X" | "Y" | "Z" | "a" | "b"
+ | "c" | "d" | "e" | "f" | "g" | "h" | "i"
+ | "j" | "k" | "l" | "m" | "n" | "o" | "p"
+ | "q" | "r" | "s" | "t" | "u" | "v" | "w"
+ | "x" | "y" | "z" ;
+ digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
+ symbol = "[" | "]" | "{" | "}" | "(" | ")" | "<" | ">"
+ | "'" | '"' | "=" | "|" | "." | "," | ";" ;
+ character = letter | digit | symbol | "_" ;
+
+ identifier = letter , { letter | digit | "_" } ;
+ terminal = "'" , character , { character } , "'"
+ | '"' , character , { character } , '"' ;
+
+ lhs = identifier ;
+ rhs = identifier
+ | terminal
+ | "[" , rhs , "]"
+ | "{" , rhs , "}"
+ | "(" , rhs , ")"
+ | rhs , "|" , rhs
+ | rhs , "," , rhs ;
+
+ rule = lhs , "=" , rhs , ";" ;
+ grammar = { rule } ;
+ },
+ %q{((rule letter
+ (alt "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R"
+ "S" "T" "U" "V" "W" "X" "Y" "Z" "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k"
+ "l" "m" "n" "o" "p" "q" "r" "s" "t" "u" "v" "w" "x" "y" "z" ))
+ (rule digit (alt "0" "1" "2" "3" "4" "5" "6" "7" "8" "9"))
+ (rule symbol (alt "[" "]" "{" "}" "(" ")" "<" ">" "'" "\"" "=" "|" "." "," ";"))
+ (rule character (alt letter digit symbol "_"))
+ (rule identifier (seq letter (star (alt letter digit "_"))))
+ (rule terminal
+ (alt (seq "'" character (star character) "'") (seq "\"" character (star character) "\"")))
+ (rule lhs (seq identifier))
+ (rule rhs
+ (alt identifier terminal
+ (seq "[" rhs "]")
+ (seq "{" rhs "}")
+ (seq "(" rhs ")")
+ (seq rhs "|" rhs)
+ (seq rhs "," rhs)) )
+ (rule rule (seq lhs "=" rhs ";"))
+ (rule grammar (star rule)))}
+ ],
+ "pascal": [
+ %q{
+ (* a simple program syntax in EBNF − Wikipedia *)
+ program = 'PROGRAM', white_space, identifier, white_space,
+ 'BEGIN', white_space,
+ { assignment, ";", white_space },
+ 'END.' ;
+ identifier = alphabetic_character, { alphabetic_character | digit } ;
+ number = [ "-" ], digit, { digit } ;
+ string = '"' , { all_characters - '"' }, '"' ;
+ assignment = identifier , ":=" , ( number | identifier | string ) ;
+ alphabetic_character = "A" | "B" | "C" | "D" | "E" | "F" | "G"
+ | "H" | "I" | "J" | "K" | "L" | "M" | "N"
+ | "O" | "P" | "Q" | "R" | "S" | "T" | "U"
+ | "V" | "W" | "X" | "Y" | "Z" ;
+ digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
+ white_space = ? white_space characters ? ;
+ all_characters = ? all visible characters ? ;
+ },
+ %q{((rule program
+ (seq "PROGRAM" white_space identifier white_space "BEGIN" white_space
+ (star (seq assignment ";" white_space)) "END." ))
+ (rule identifier (seq alphabetic_character (star (alt alphabetic_character digit))))
+ (rule number (seq (opt "-") digit (star digit)))
+ (rule string (seq "\"" (star (diff all_characters "\"")) "\""))
+ (rule assignment (seq identifier ":=" (seq (alt number identifier string))))
+ (rule alphabetic_character
+ (alt "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R"
+ "S" "T" "U" "V" "W" "X" "Y" "Z" ))
+ (rule digit (alt "0" "1" "2" "3" "4" "5" "6" "7" "8" "9"))
+ (rule white_space (seq "? white_space characters ?"))
+ (rule all_characters (seq "? all visible characters ?")))}
+ ],
+ "AA": [
+ %{AA = "A";},
+ %{((terminal AA (seq "A")))}
+ ],
+ "BB": [
+ %{BB = 3 * AA, "B";},
+ %{ ((terminal BB (seq (rept 3 3 AA) "B")))}
+ ],
+ "CC": [
+ %{CC = 3 * [AA], "C";},
+ %{((terminal CC (seq (rept 3 3 (opt AA)) "C")))}
+ ],
+ "DD": [
+ %{DD = {AA}, "D";},
+ %{((terminal DD (seq (star AA) "D")))}
+ ],
+ "EE": [
+ %{EE = AA, {AA}, "E";},
+ %{((terminal EE (seq AA (star AA) "E")))}
+ ],
+ "FF": [
+ %{FF = 3 * AA, 3 * [AA], "F";},
+ %{((terminal FF (seq (rept 3 3 AA) (rept 3 3 (opt AA)) "F")))}
+ ],
+ "GG": [
+ %{GG = {3 * AA}, "G";},
+ %{((terminal GG (seq (star (rept 3 3 AA)) "G")))}
+ ],
+ "space": [
+ %{space = ? US-ASCII character 32 ?;},
+ %{((rule space (seq "? US-ASCII character 32 ?")))} # XXX probably not
+ ],
+ "something": [
+ %{something = foo, ( bar );},
+ %{((rule something (seq foo (seq bar))))}
+ ],
+ "diff": [
+ %{first_terminal_character = terminal_character - "'" ;},
+ %{((rule first_terminal_character (diff terminal_character "'")))},
+ ],
+ }.each do |title, (input, expect)|
+ it title do
+ input << "\n" unless input.end_with?("\n")
+ expect(parse(input).to_sxp).to produce(expect, logger)
+ end
+ end
+ end
+
+ context "alternate terminal characters" do
+ {
+ "digits /": [
+ %{
+ digit_excluding_zero = "1" / "2" / "3" / "4" / "5" / "6" / "7" / "8" / "9" ;
+ digit = "0" / digit_excluding_zero ;
+ },
+ %{((rule digit_excluding_zero (alt "1" "2" "3" "4" "5" "6" "7" "8" "9"))
+ (rule digit (alt "0" digit_excluding_zero)))}
+ ],
+ "digits !": [
+ %{
+ digit_excluding_zero = "1" ! "2" ! "3" ! "4" ! "5" ! "6" ! "7" ! "8" ! "9" ;
+ digit = "0" ! digit_excluding_zero ;
+ },
+ %{((rule digit_excluding_zero (alt "1" "2" "3" "4" "5" "6" "7" "8" "9"))
+ (rule digit (alt "0" digit_excluding_zero)))}
+ ],
+ #"integer (/ /)": [
+ # %{integer = "0" | (/ "-" /), natural_number ;},
+ # %{((rule integer (alt "0" (seq (opt "-") natural_number))))}
+ #],
+ "natural number (: :)": [
+ %{natural_number = digit_excluding_zero, (: digit :) ;},
+ %{((rule natural_number (seq digit_excluding_zero (star digit))))}
+ ],
+ "legal meta_identifier .": [
+ 'rulename = "foo" .',
+ %{((rule rulename (seq "foo")))}
+ ],
+ }.each do |title, (input, expect)|
+ it title do
+ input << "\n" unless input.end_with?("\n")
+ expect(parse(input).to_sxp).to produce(expect, logger)
+ end
+ end
+ end
+
+ context "illegal syntax" do
+ {
+ "something": "something = foo ( bar );"
+ }.each do |title, input|
+ it title do
+ expect {parse(input)}.to raise_error(SyntaxError)
+ end
+ end
+ end
+
+ it "parses ISO EBNF grammar" do
+ gram = parse(File.open(File.expand_path("../../etc/iso-ebnf.isoebnf", __FILE__)))
+ expect(gram).to be_valid
+ end
+
+ def parse(input, **options)
+ @debug = []
+ EBNF.parse(input, debug: @debug, format: :isoebnf, **options)
+ end
+end
diff --git a/spec/ll1/data/meta.rb b/spec/ll1/data/meta.rb
index e143b86..96a33a9 100644
--- a/spec/ll1/data/meta.rb
+++ b/spec/ll1/data/meta.rb
@@ -1,14 +1,11 @@
-# This file is automatically generated by /Users/gregg/Projects/ebnf/bin/ebnf
-# BRANCH derived from ../../etc/ebnf.ebnf
+# This file is automatically generated by ebnf version 2.0.0
+# Derived from etc/ebnf.ebnf
module EBNFParserMeta
START = :ebnf
-
BRANCH = {
:alt => {
"(" => [:seq, :_alt_1],
- :ENUM => [:seq, :_alt_1],
:HEX => [:seq, :_alt_1],
- :O_ENUM => [:seq, :_alt_1],
:O_RANGE => [:seq, :_alt_1],
:RANGE => [:seq, :_alt_1],
:STRING1 => [:seq, :_alt_1],
@@ -34,9 +31,7 @@ module EBNFParserMeta
},
:diff => {
"(" => [:postfix, :_diff_1],
- :ENUM => [:postfix, :_diff_1],
:HEX => [:postfix, :_diff_1],
- :O_ENUM => [:postfix, :_diff_1],
:O_RANGE => [:postfix, :_diff_1],
:RANGE => [:postfix, :_diff_1],
:STRING1 => [:postfix, :_diff_1],
@@ -49,10 +44,8 @@ module EBNFParserMeta
"-" => [:_diff_2],
"@pass" => [],
"@terminals" => [],
- :ENUM => [],
:HEX => [],
:LHS => [],
- :O_ENUM => [],
:O_RANGE => [],
:RANGE => [],
:STRING1 => [],
@@ -80,9 +73,7 @@ module EBNFParserMeta
},
:expression => {
"(" => [:alt],
- :ENUM => [:alt],
:HEX => [:alt],
- :O_ENUM => [:alt],
:O_RANGE => [:alt],
:RANGE => [:alt],
:STRING1 => [:alt],
@@ -94,9 +85,7 @@ module EBNFParserMeta
},
:postfix => {
"(" => [:primary, :_postfix_1],
- :ENUM => [:primary, :_postfix_1],
:HEX => [:primary, :_postfix_1],
- :O_ENUM => [:primary, :_postfix_1],
:O_RANGE => [:primary, :_postfix_1],
:RANGE => [:primary, :_postfix_1],
:STRING1 => [:primary, :_postfix_1],
@@ -109,10 +98,8 @@ module EBNFParserMeta
"-" => [],
"@pass" => [],
"@terminals" => [],
- :ENUM => [],
:HEX => [],
:LHS => [],
- :O_ENUM => [],
:O_RANGE => [],
:POSTFIX => [:POSTFIX],
:RANGE => [],
@@ -123,9 +110,7 @@ module EBNFParserMeta
},
:primary => {
"(" => [:_primary_1],
- :ENUM => [:ENUM],
:HEX => [:HEX],
- :O_ENUM => [:O_ENUM],
:O_RANGE => [:O_RANGE],
:RANGE => [:RANGE],
:STRING1 => [:STRING1],
@@ -140,9 +125,7 @@ module EBNFParserMeta
},
:seq => {
"(" => [:diff, :_seq_1],
- :ENUM => [:diff, :_seq_1],
:HEX => [:diff, :_seq_1],
- :O_ENUM => [:diff, :_seq_1],
:O_RANGE => [:diff, :_seq_1],
:RANGE => [:diff, :_seq_1],
:STRING1 => [:diff, :_seq_1],
@@ -154,10 +137,8 @@ module EBNFParserMeta
")" => [],
"@pass" => [],
"@terminals" => [],
- :ENUM => [:_seq_2],
:HEX => [:_seq_2],
:LHS => [],
- :O_ENUM => [:_seq_2],
:O_RANGE => [:_seq_2],
:RANGE => [:_seq_2],
:STRING1 => [:_seq_2],
@@ -167,9 +148,7 @@ module EBNFParserMeta
},
:_seq_2 => {
"(" => [:diff, :_seq_1],
- :ENUM => [:diff, :_seq_1],
:HEX => [:diff, :_seq_1],
- :O_ENUM => [:diff, :_seq_1],
:O_RANGE => [:diff, :_seq_1],
:RANGE => [:diff, :_seq_1],
:STRING1 => [:diff, :_seq_1],
@@ -183,10 +162,8 @@ module EBNFParserMeta
"-",
"@pass",
"@terminals",
- :ENUM,
:HEX,
:LHS,
- :O_ENUM,
:O_RANGE,
:POSTFIX,
:RANGE,
@@ -200,9 +177,7 @@ module EBNFParserMeta
:HEX,
:SYMBOL,
:RANGE,
- :ENUM,
:O_RANGE,
- :O_ENUM,
:STRING1,
:STRING2,
"("],
@@ -223,9 +198,7 @@ module EBNFParserMeta
:HEX,
:SYMBOL,
:RANGE,
- :ENUM,
:O_RANGE,
- :O_ENUM,
:STRING1,
:STRING2,
"("],
@@ -236,9 +209,7 @@ module EBNFParserMeta
:HEX,
:SYMBOL,
:RANGE,
- :ENUM,
:O_RANGE,
- :O_ENUM,
:STRING1,
:STRING2,
"("],
@@ -254,9 +225,7 @@ module EBNFParserMeta
:HEX,
:SYMBOL,
:RANGE,
- :ENUM,
:O_RANGE,
- :O_ENUM,
:STRING1,
:STRING2,
"("],
@@ -284,9 +253,7 @@ module EBNFParserMeta
:HEX,
:SYMBOL,
:RANGE,
- :ENUM,
:O_RANGE,
- :O_ENUM,
:STRING1,
:STRING2,
"("],
@@ -296,9 +263,7 @@ module EBNFParserMeta
:HEX,
:SYMBOL,
:RANGE,
- :ENUM,
:O_RANGE,
- :O_ENUM,
:STRING1,
:STRING2,
"("],
@@ -306,9 +271,7 @@ module EBNFParserMeta
:HEX,
:SYMBOL,
:RANGE,
- :ENUM,
:O_RANGE,
- :O_ENUM,
:STRING1,
:STRING2,
"("],
@@ -322,9 +285,7 @@ module EBNFParserMeta
:HEX,
:SYMBOL,
:RANGE,
- :ENUM,
:O_RANGE,
- :O_ENUM,
:STRING1,
:STRING2,
"("],
@@ -334,9 +295,7 @@ module EBNFParserMeta
:HEX,
:SYMBOL,
:RANGE,
- :ENUM,
:O_RANGE,
- :O_ENUM,
:STRING1,
:STRING2,
"("],
@@ -348,9 +307,7 @@ module EBNFParserMeta
:HEX,
:SYMBOL,
:RANGE,
- :ENUM,
:O_RANGE,
- :O_ENUM,
:STRING1,
:STRING2,
"("],
@@ -358,9 +315,7 @@ module EBNFParserMeta
:HEX,
:SYMBOL,
:RANGE,
- :ENUM,
:O_RANGE,
- :O_ENUM,
:STRING1,
:STRING2,
"("],
@@ -369,9 +324,7 @@ module EBNFParserMeta
:HEX,
:SYMBOL,
:RANGE,
- :ENUM,
:O_RANGE,
- :O_ENUM,
:STRING1,
:STRING2,
"("],
@@ -379,9 +332,7 @@ module EBNFParserMeta
:HEX,
:SYMBOL,
:RANGE,
- :ENUM,
:O_RANGE,
- :O_ENUM,
:STRING1,
:STRING2,
"("],
@@ -390,9 +341,7 @@ module EBNFParserMeta
:HEX,
:SYMBOL,
:RANGE,
- :ENUM,
:O_RANGE,
- :O_ENUM,
:STRING1,
:STRING2,
"("],
@@ -401,9 +350,7 @@ module EBNFParserMeta
:HEX,
:SYMBOL,
:RANGE,
- :ENUM,
:O_RANGE,
- :O_ENUM,
:STRING1,
:STRING2,
"("],
@@ -465,9 +412,7 @@ module EBNFParserMeta
:HEX,
:SYMBOL,
:RANGE,
- :ENUM,
:O_RANGE,
- :O_ENUM,
:STRING1,
:STRING2,
"@terminals",
@@ -481,9 +426,7 @@ module EBNFParserMeta
:HEX,
:SYMBOL,
:RANGE,
- :ENUM,
:O_RANGE,
- :O_ENUM,
:STRING1,
:STRING2,
"@terminals",
@@ -497,9 +440,7 @@ module EBNFParserMeta
:HEX,
:SYMBOL,
:RANGE,
- :ENUM,
:O_RANGE,
- :O_ENUM,
:STRING1,
:STRING2,
"@terminals",
@@ -513,9 +454,7 @@ module EBNFParserMeta
:HEX,
:SYMBOL,
:RANGE,
- :ENUM,
:O_RANGE,
- :O_ENUM,
:STRING1,
:STRING2,
"@terminals",
@@ -529,9 +468,7 @@ module EBNFParserMeta
:HEX,
:SYMBOL,
:RANGE,
- :ENUM,
:O_RANGE,
- :O_ENUM,
:STRING1,
:STRING2,
"@terminals",
@@ -573,9 +510,7 @@ module EBNFParserMeta
:HEX,
:SYMBOL,
:RANGE,
- :ENUM,
:O_RANGE,
- :O_ENUM,
:STRING1,
:STRING2,
"@terminals",
@@ -590,9 +525,7 @@ module EBNFParserMeta
:HEX,
:SYMBOL,
:RANGE,
- :ENUM,
:O_RANGE,
- :O_ENUM,
:STRING1,
:STRING2,
"@terminals",
@@ -607,9 +540,7 @@ module EBNFParserMeta
:HEX,
:SYMBOL,
:RANGE,
- :ENUM,
:O_RANGE,
- :O_ENUM,
:STRING1,
:STRING2,
"@terminals",
@@ -625,9 +556,7 @@ module EBNFParserMeta
:HEX,
:SYMBOL,
:RANGE,
- :ENUM,
:O_RANGE,
- :O_ENUM,
:STRING1,
:STRING2,
"@terminals",
@@ -643,9 +572,7 @@ module EBNFParserMeta
:HEX,
:SYMBOL,
:RANGE,
- :ENUM,
:O_RANGE,
- :O_ENUM,
:STRING1,
:STRING2,
"@terminals",
@@ -661,9 +588,7 @@ module EBNFParserMeta
:HEX,
:SYMBOL,
:RANGE,
- :ENUM,
:O_RANGE,
- :O_ENUM,
:STRING1,
:STRING2,
"@terminals",
@@ -679,9 +604,7 @@ module EBNFParserMeta
:HEX,
:SYMBOL,
:RANGE,
- :ENUM,
:O_RANGE,
- :O_ENUM,
:STRING1,
:STRING2,
"@terminals",
@@ -734,6 +657,17 @@ module EBNFParserMeta
:LHS,
"@pass"],
}.freeze
+ CLEANUP = {
+ :_alt_1 => :star,
+ :_alt_3 => :merge,
+ :_diff_1 => :opt,
+ :ebnf => :star,
+ :_ebnf_2 => :merge,
+ :_postfix_1 => :opt,
+ :seq => :plus,
+ :_seq_1 => :star,
+ :_seq_2 => :merge,
+ }.freeze
PASS = [
:PASS
].freeze
diff --git a/spec/ll1/data/parser.rb b/spec/ll1/data/parser.rb
index 6317492..e9a0079 100644
--- a/spec/ll1/data/parser.rb
+++ b/spec/ll1/data/parser.rb
@@ -29,14 +29,6 @@ class EBNFParser
input[:terminal] = token.value
end
- terminal(:ENUM, ENUM, unescape: true) do |prod, token, input|
- input[:terminal] = [:range, token.value[1..-2]]
- end
-
- terminal(:O_ENUM, O_ENUM, unescape: true) do |prod, token, input|
- input[:terminal] = [:range, token.value[1..-2]]
- end
-
terminal(:RANGE, RANGE, unescape: true) do |prod, token, input|
input[:terminal] = [:range, token.value[1..-2]]
end
@@ -61,10 +53,15 @@ class EBNFParser
input[:terminal] = token.value
end
+ production(:ebnf) do |input, current, callback|
+ # Cause method_missing to invoke something in our context
+ to_sxp
+ end
+
production(:declaration) do |input, current, callback|
# current contains a declaration.
# Invoke callback
- callback.call(:terminal) if current[:terminal] == '@terminals'
+ callback.call(:terminals) if current[:terminal] == '@terminals'
end
production(:rule) do |input, current, callback|
@@ -145,6 +142,8 @@ class EBNFParser
end
production(:_diff_1) do |input, current, callback|
+ # Gratuitous call to exercise method
+ add_prod_data(:_diff_1, "foo")
input[:diff] ||= [:diff]
# Add optimized value of `postfix`, if any
@@ -152,6 +151,8 @@ class EBNFParser
end
production(:postfix) do |input, current, callback|
+ # Gratuitous call to exercise method
+ add_prod_datum(:postfix, "foo")
# Push result onto input stack, as the `diff` production can have some number of `postfix` values that are applied recursively
input[:postfix] = case current[:postfix]
when "*" then [:star, current[:primary]]
@@ -162,6 +163,8 @@ class EBNFParser
end
production(:primary) do |input, current, callback|
+ # Gratuitous call to exercise method
+ add_prod_datum(:primary, ["foo"])
input[:primary] = if current[:expression]
v = current[:expression][1..-1]
v = v.first if v.length == 1
@@ -199,9 +202,9 @@ def initialize(input, **options, &block)
**options
) do |context, *data|
rule = case context
- when :terminal
+ when :terminals
parsing_terminals = true
- next
+ rule = EBNF::Rule.new(nil, nil, data.first, kind: :terminals)
when :pass
rule = EBNF::Rule.new(nil, nil, data.first, kind: :pass)
when :rule
@@ -218,6 +221,6 @@ def initialize(input, **options, &block)
def to_sxp
require 'sxp' unless defined?(SXP)
# Output rules as a formatted S-Expression
- SXP::Generator.string(@ast.sort_by{|r| r.id.to_f}.map(&:for_sxp))
+ SXP::Generator.string(@ast.map(&:for_sxp))
end
end
diff --git a/spec/ll1/lexer_spec.rb b/spec/ll1/lexer_spec.rb
index f0aa643..76354f9 100644
--- a/spec/ll1/lexer_spec.rb
+++ b/spec/ll1/lexer_spec.rb
@@ -196,46 +196,6 @@
end
end
- describe "#lineno" do
- it "for white space" do
- inputs = {
- "" => 1,
- "\n" => 2,
- "\n\n" => 3,
- "\r\n" => 2,
- }
- inputs.each do |input, lineno|
- lexer = tokenize(input)
- lexer.to_a # consumes the input
- expect(lexer.lineno).to eq lineno
- end
- end
-
- context "STRING_LITERAL_LONG_QUOTE" do
- it "tracks line numbers" do
- input = %(
- :Test a rdfs:Class ;
- rdfs:subClassOf mf:ManifestEntry;
- rdfs:label "Superclass of all CSVW tests" ;
- rdfs:comment """
- All CSVW tests have an input file referenced using `mf:action`. Positive
- and Negative Evaluation Tests also have a result file referenced using
- `mf:result` . Other tests may take different inputs and options as defined
- for each test class.
- """ ;
- :b :c .
- )
- expect(tokenize(input).to_a.map(&:lineno)).to include(
- 2, 2, 2, 2,
- 3, 3, 3,
- 4, 4, 4,
- 5, 5, 10,
- 11, 11, 11
- )
- end
- end
- end
-
it "matches input longer than low water mark when buffer is low" do
input = StringIO.new %("""123456789 123456789 """ """123456789 123456789 """)
lexer = EBNF::LL1::Lexer.new(input, terminals,
@@ -246,68 +206,159 @@
expect(lexer.shift.type).to eq :STRING_LITERAL_LONG_QUOTE
expect(lexer.shift.type).to eq :STRING_LITERAL_LONG_QUOTE
end
+ end
- context "yielding tokens" do
- it "annotates tokens with the current line number" do
- results = %w(1 2 3 4)
- tokenize("1\n2\n3\n4").each_token do |token|
- expect(token.type).to eq :INTEGER
- expect(token.value).to eq results.shift
- end
+ describe "#valid?" do
+ it "validates legal input" do
+ expect(tokenize(%q(:a "b" ))).to be_valid
+ end
+
+ it "invalidates illegal input" do
+ expect(tokenize(%q(:a 'open))).not_to be_valid
+ end
+ end
+
+ describe "#lineno" do
+ it "for white space" do
+ inputs = {
+ "" => 1,
+ "\n" => 2,
+ "\n\n" => 3,
+ "\r\n" => 2,
+ }
+ inputs.each do |input, lineno|
+ lexer = tokenize(input)
+ lexer.to_a # consumes the input
+ expect(lexer.lineno).to eq lineno
end
end
- describe "#first/#shift/#recover" do
- subject {tokenize("1\n2\n3\n4")}
- it "returns tokens in first/shift sequence" do
- %w{1 2 3 4}.each do |v|
- expect(subject.first.value).to eq v
- subject.shift
- end
- expect(subject.first).to be_nil
+ context "STRING_LITERAL_LONG_QUOTE" do
+ it "tracks line numbers" do
+ input = %(
+ :Test a rdfs:Class ;
+ rdfs:subClassOf mf:ManifestEntry;
+ rdfs:label "Superclass of all CSVW tests" ;
+ rdfs:comment """
+ All CSVW tests have an input file referenced using `mf:action`. Positive
+ and Negative Evaluation Tests also have a result file referenced using
+ `mf:result` . Other tests may take different inputs and options as defined
+ for each test class.
+ """ ;
+ :b :c .
+ )
+ expect(tokenize(input).to_a.map(&:lineno)).to include(
+ 2, 2, 2, 2,
+ 3, 3, 3,
+ 4, 4, 4,
+ 5, 5, 10,
+ 11, 11, 11
+ )
end
+ end
+ end
- context "with unrecognized token" do
- subject {tokenize("< space > 'foo' 1")}
+ describe "#first/#shift/#recover" do
+ subject {tokenize("1\n2\n3\n4")}
+ it "returns tokens in first/shift sequence" do
+ %w{1 2 3 4}.each do |v|
+ expect(subject.first.value).to eq v
+ subject.shift
+ end
+ expect(subject.first).to be_nil
+ end
- it "raises error with #first" do
- expect {subject.first}.to raise_error(EBNF::LL1::Lexer::Error, /Invalid token/)
- end
-
- it "recovers to next token" do
- subject.recover
- expect(subject.first.value).to eq "'foo'"
- end
+ context "with unrecognized token" do
+ subject {tokenize("< space > 'foo' 1")}
+
+ it "raises error with #first" do
+ expect {subject.first}.to raise_error(EBNF::LL1::Lexer::Error, /Invalid token/)
end
+
+ it "recovers to next token" do
+ subject.recover
+ expect(subject.first.value).to eq "'foo'"
+ end
+ end
- describe "#first" do
- it "returns a token when passed as an argument" do
- expect(subject.first(:INTEGER)).to be_a(EBNF::LL1::Lexer::Token)
- end
+ describe "#first" do
+ it "returns a token when passed as an argument" do
+ expect(subject.first(:INTEGER)).to be_a(EBNF::LL1::Lexer::Token)
+ end
- it "does not return a token unless passed as an argument" do
- expect {subject.first(:Double)}.to raise_error(EBNF::LL1::Lexer::Error, 'Invalid token "1"')
- end
+ it "does not return a token unless passed as an argument" do
+ expect {subject.first(:Double)}.to raise_error(EBNF::LL1::Lexer::Error, 'Invalid token "1"')
end
end
+ end
- describe EBNF::LL1::Lexer::Terminal do
- {
- "returns itself with no map entry": {
- input: "FOO",
- map: {},
- expect: "FOO"
- },
- "returns map value if specified": {
- input: "FOO",
- map: {"foo" => 'bar'},
- expect: "bar"
- },
- }.each do |name, params|
- it name do
- term = described_class.new(:nil, params[:regexp], map: params[:map])
- expect(term.canonicalize(params[:input])).to eq params[:expect]
- end
+ describe EBNF::LL1::Lexer::Token do
+ subject {described_class.new(:type, 'value', lineno: 1)}
+
+ describe "#type" do
+ its(:type) {is_expected.to eq :type}
+ end
+
+ describe "#value" do
+ its(:value) {is_expected.to eq 'value'}
+ end
+
+ describe "#lineno" do
+ its(:lineno) {is_expected.to eq 1}
+ end
+
+ describe "#[]" do
+ it "returns type at 0 index" do
+ expect(subject[0]).to eq :type
+ end
+
+ it "returns value at 1 index" do
+ expect(subject[1]).to eq 'value'
+ end
+
+ it "returns nil for other indexes" do
+ expect(subject[2]).to be_nil
+ end
+ end
+
+ describe "#===" do
+ specify {expect(subject).to be === :type}
+ specify {expect(subject).to be === 'value'}
+ end
+
+ describe "#to_hash" do
+ specify {expect(subject.to_hash).to eql({type: :type, value: 'value'})}
+ end
+
+ describe "#to_s" do
+ specify {expect(subject.to_s).to eq ":type"}
+ end
+
+ describe "#representation" do
+ specify {expect(subject.representation).to eq :type}
+ end
+
+ describe "#to_a" do
+ specify {expect(subject.to_a).to eq [:type, 'value']}
+ end
+ end
+
+ describe EBNF::LL1::Lexer::Terminal do
+ {
+ "returns itself with no map entry": {
+ input: "FOO",
+ map: {},
+ expect: "FOO"
+ },
+ "returns map value if specified": {
+ input: "FOO",
+ map: {"foo" => 'bar'},
+ expect: "bar"
+ },
+ }.each do |name, params|
+ it name do
+ term = described_class.new(:nil, params[:regexp], map: params[:map])
+ expect(term.canonicalize(params[:input])).to eq params[:expect]
end
end
end
diff --git a/spec/ll1/parser_spec.rb b/spec/ll1/parser_spec.rb
index b1cc445..3d85c0b 100644
--- a/spec/ll1/parser_spec.rb
+++ b/spec/ll1/parser_spec.rb
@@ -58,6 +58,12 @@ class LL1ParserTest
subject.parse("foo", nil, branch: {a: {b: ["c"]}})
}.to raise_error(EBNF::LL1::Parser::Error, "Starting production not defined")
end
+
+ it "raises error on inalid input" do
+ expect {
+ subject.parse("bar", :foo, branch: {foo: {bar: ["baz"]}})
+ }.to raise_error(EBNF::LL1::Parser::Error, /Invalid token "bar"/)
+ end
end
require_relative "data/parser"
diff --git a/spec/ll1_spec.rb b/spec/ll1_spec.rb
index 35356bd..7dbac19 100644
--- a/spec/ll1_spec.rb
+++ b/spec/ll1_spec.rb
@@ -86,12 +86,12 @@
[18] IRIREF ::= '<' ("range" | UCHAR)* '>'
[29t] SPARQL_BASE ::= [Bb][Aa][Ss][Ee]
},
- %{
- ((rule _empty "0" (first _eps) (seq))
- (terminal IRIREF "18" (seq "<" (star (alt "range" UCHAR)) ">"))
- (rule sparqlBase "29s" (first SPARQL_BASE) (seq SPARQL_BASE IRIREF))
- (rule _sparqlBase_1 "29s.1" (first IRIREF) (seq IRIREF))
- (terminal SPARQL_BASE "29t" (seq (range "Bb") (range "Aa") (range "Ss") (range "Ee"))))
+ %{(
+ (rule _empty "0" (first _eps) (seq))
+ (terminal IRIREF "18" (seq "<" (star (alt "range" UCHAR)) ">"))
+ (rule sparqlBase "29s" (first SPARQL_BASE) (seq SPARQL_BASE IRIREF))
+ (terminal SPARQL_BASE "29t" (seq (range "Bb") (range "Aa") (range "Ss") (range "Ee")))
+ (rule _sparqlBase_1 "29s.1" (first IRIREF) (seq IRIREF)))
}, nil
],
"declaration (FF.1)" => [
@@ -372,6 +372,11 @@
expect(false).to produce(true, @debug)
}.to raise_error("Table creation failed with errors")
expect(ebnf.errors.to_s).to match(expected)
+
+ sio = StringIO.new
+ ebnf.to_ruby(sio)
+ sio.rewind
+ expect(sio.read).to match(/Note, grammar has errors/)
end
end
end
diff --git a/spec/native_spec.rb b/spec/native_spec.rb
new file mode 100644
index 0000000..8c71fea
--- /dev/null
+++ b/spec/native_spec.rb
@@ -0,0 +1,137 @@
+# coding: utf-8
+$:.unshift "."
+require 'spec_helper'
+require 'ebnf'
+require 'sxp'
+
+describe EBNF::Native do
+ let(:logger) {RDF::Spec.logger}
+ after(:each) do |example|
+ puts logger.to_s if example.exception && !example.exception.is_a?(RSpec::Expectations::ExpectationNotMetError)
+ end
+
+ context "rule variations" do
+ {
+ "legal rule name": [
+ 'rulename ::= "foo"',
+ %{((rule rulename (seq "foo")))}
+ ],
+ "prolog": [
+ %{[2] Prolog ::= BaseDecl? PrefixDecl*},
+ %{((rule Prolog "2" (seq (opt BaseDecl) (star PrefixDecl))))}
+ ],
+ "aliteration": [
+ %{[2] declaration ::= '@terminals' | '@pass'},
+ %{((rule declaration "2" (alt "@terminals" "@pass")))},
+ ],
+ "posfix": [
+ %{[9] postfix ::= primary ( [?*+] )?},
+ %{((rule postfix "9" (seq primary (opt (range "?*+")))))},
+ ],
+ "diff": [
+ %{[18] STRING2 ::= "'" (CHAR - "'")* "'"},
+ %{((terminal STRING2 "18" (seq "'" (star (diff CHAR "'")) "'")))},
+ ],
+ "IRIREF": [
+ %([18] IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20] | UCHAR)* '>'),
+ %{((terminal IRIREF "18"
+ (seq "<"
+ (star
+ (alt
+ (diff (range "^<>\\\"{}|^`") (range "#x00-#x20"))
+ UCHAR))
+ ">")))},
+ ],
+ }.each do |title, (input, expect)|
+ it title do
+ expect(parse(input).to_sxp).to produce(expect, logger)
+ end
+ end
+
+ context "without rule identifiers" do
+ {
+ "prolog": [
+ %{Prolog ::= BaseDecl? PrefixDecl*},
+ %{((rule Prolog (seq (opt BaseDecl) (star PrefixDecl))))}
+ ],
+ "aliteration": [
+ %{declaration ::= '@terminals' | '@pass'},
+ %{((rule declaration (alt "@terminals" "@pass")))},
+ ],
+ "posfix": [
+ %{postfix ::= primary ( [?*+] )?},
+ %{((rule postfix (seq primary (opt (range "?*+")))))},
+ ],
+ "diff": [
+ %{STRING2 ::= "'" (CHAR - "'")* "'"},
+ %{((terminal STRING2 (seq "'" (star (diff CHAR "'")) "'")))},
+ ],
+ "IRIREF": [
+ %(IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20] | UCHAR)* '>'),
+ %{((terminal IRIREF
+ (seq "<"
+ (star
+ (alt
+ (diff (range "^<>\\\"{}|^`") (range "#x00-#x20"))
+ UCHAR))
+ ">")))},
+ ],
+ }.each do |title, (input, expect)|
+ it title do
+ expect(parse(input).to_sxp).to produce(expect, logger)
+ end
+ end
+ end
+ end
+
+ describe "#expression" do
+ {
+ "'abc' def" => %{(seq "abc" def)},
+ %{[0-9]} => %{(range "0-9")},
+ %{#x00B7} => %{(hex "#x00B7")},
+ %{[#x0300-#x036F]} => %{(range "#x0300-#x036F")},
+ %{[^<>'{}|^`]-[#x00-#x20]} => %{(diff (range "^<>'{}|^`") (range "#x00-#x20"))},
+ %{a b c} => %{(seq a b c)},
+ %{a? b c} => %{(seq (opt a) b c)},
+ %{a - b} => %{(diff a b)},
+ %{(a - b) - c} => %{(diff (diff a b) c)},
+ %{a b? c} => %{(seq a (opt b) c)},
+ %{a | b | c} => %{(alt a b c)},
+ %{a? b+ c*} => %{(seq (opt a) (plus b) (star c))},
+ %{foo | x xlist} => %{(alt foo (seq x xlist))},
+ %{a | (b - c)} => %{(alt a (diff b c))},
+ %{a b | c d} => %{(alt (seq a b) (seq c d))},
+ %{BaseDecl? PrefixDecl*} => %{(seq (opt BaseDecl) (star PrefixDecl))},
+ %{NCCHAR1 | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]} =>
+ %{(alt NCCHAR1 "-" (range "0-9") (hex "#x00B7") (range "#x0300-#x036F") (range "#x203F-#x2040"))},
+ %{'<' ([^<>"{}|^`\]-[#x00-#x20] | UCHAR)* '>'} =>
+ %{(seq "<" (star (alt (diff (range "^<>\\\"{}|^`") (range "#x00-#x20")) UCHAR)) ">")}
+ }.each do |input, expected|
+ it "given #{input.inspect} produces #{expected}" do
+ rule = parse("rule ::= #{input}").ast.first
+ expect(rule.expr.to_sxp).to produce(expected, @debug)
+ end
+ end
+ end
+
+ context "illegal syntax" do
+ {
+ "diff missing second operand": %{rule ::= a -},
+ "unrecognized terminal" => %{rule ::= %foo%},
+ }.each do |title, input|
+ it title do
+ expect {parse(input)}.to raise_error(SyntaxError)
+ end
+ end
+ end
+
+ it "parses EBNF grammar" do
+ gram = parse(File.open(File.expand_path("../../etc/ebnf.ebnf", __FILE__)))
+ expect(gram).to be_valid
+ end
+
+ def parse(input, **options)
+ @debug = []
+ EBNF.parse(input, debug: @debug, format: :native, **options)
+ end
+end
diff --git a/spec/parser_spec.rb b/spec/parser_spec.rb
index 1da0f06..c72935b 100644
--- a/spec/parser_spec.rb
+++ b/spec/parser_spec.rb
@@ -4,73 +4,147 @@
require 'ebnf'
require 'sxp'
-describe EBNF::Base do
- describe "#ruleParts" do
+describe EBNF::Parser do
+ let(:logger) {RDF::Spec.logger}
+ after(:each) do |example|
+ puts logger.to_s if example.exception && !example.exception.is_a?(RSpec::Expectations::ExpectationNotMetError)
+ end
+
+ context "rule variations" do
{
- %{[2] Prolog ::= BaseDecl? PrefixDecl*} =>
- %{(rule Prolog "2" (seq (opt BaseDecl) (star PrefixDecl)))},
- %{[2] declaration ::= '@terminals' | '@pass'} =>
- %{(rule declaration "2" (alt "@terminals" "@pass"))},
- %{[9] postfix ::= primary ( [?*+] )?} =>
- %{(rule postfix "9" (seq primary (opt (range "?*+"))))},
- %{[18] STRING2 ::= "'" (CHAR - "'")* "'"} =>
- %{(terminal STRING2 "18" (seq "'" (star (diff CHAR "'")) "'"))},
- }.each do |input, expected|
- it "given #{input.inspect} produces #{expected}" do
- expect(ebnf(:ruleParts, input).to_sxp).to produce(expected, @debug)
+ "legal rule name": [
+ 'rulename ::= "foo"',
+ %{((rule rulename (seq "foo")))}
+ ],
+ "prolog": [
+ %{[2] Prolog ::= BaseDecl? PrefixDecl*},
+ %{((rule Prolog "2" (seq (opt BaseDecl) (star PrefixDecl))))}
+ ],
+ "aliteration": [
+ %{[2] declaration ::= '@terminals' | '@pass'},
+ %{((rule declaration "2" (alt "@terminals" "@pass")))},
+ ],
+ "posfix": [
+ %{[9] postfix ::= primary ( [?*+] )?},
+ %{((rule postfix "9" (seq primary (opt (range "?*+")))))},
+ ],
+ "diff": [
+ %{[18] STRING2 ::= "'" (CHAR - "'")* "'"},
+ %{((terminal STRING2 "18" (seq "'" (star (diff CHAR "'")) "'")))},
+ ],
+ "IRIREF": [
+ %([18] IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20] | UCHAR)* '>'),
+ %{((terminal IRIREF "18"
+ (seq "<"
+ (star
+ (alt
+ (diff (range "^<>\\\"{}|^`") (range "#x00-#x20"))
+ UCHAR))
+ ">")))},
+ ],
+ "minimal whitespace": [
+ %{[xx]minimal::=whitespace[yy]whitespace::=PASS},
+ %{((rule minimal "xx" (seq whitespace (range "yy")))
+ (rule whitespace (seq PASS)))}
+ ]
+ }.each do |title, (input, expect)|
+ it title do
+ expect(parse(input).to_sxp).to produce(expect, logger)
+ end
+ end
+
+ context "without rule identifiers" do
+ {
+ "prolog": [
+ %{Prolog ::= BaseDecl? PrefixDecl*},
+ %{((rule Prolog (seq (opt BaseDecl) (star PrefixDecl))))}
+ ],
+ "aliteration": [
+ %{declaration ::= '@terminals' | '@pass'},
+ %{((rule declaration (alt "@terminals" "@pass")))},
+ ],
+ "posfix": [
+ %{postfix ::= primary ( [?*+] )?},
+ %{((rule postfix (seq primary (opt (range "?*+")))))},
+ ],
+ "diff": [
+ %{STRING2 ::= "'" (CHAR - "'")* "'"},
+ %{((terminal STRING2 (seq "'" (star (diff CHAR "'")) "'")))},
+ ],
+ "IRIREF": [
+ %(IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20] | UCHAR)* '>'),
+ %{((terminal IRIREF
+ (seq "<"
+ (star
+ (alt
+ (diff (range "^<>\\\"{}|^`") (range "#x00-#x20"))
+ UCHAR))
+ ">")))},
+ ],
+ }.each do |title, (input, expect)|
+ it title do
+ expect(parse(input).to_sxp).to produce(expect, logger)
+ end
end
end
end
-
+
describe "#expression" do
{
- "'abc' def" => %{((seq "abc" def) "")},
- %{[0-9]} => %{((range "0-9") "")},
- %{#x00B7} => %{((hex "#x00B7") "")},
- %{[#x0300-#x036F]} => %{((range "#x0300-#x036F") "")},
- %{[^<>'{}|^`]-[#x00-#x20]} => %{((diff (range "^<>'{}|^`") (range "#x00-#x20")) "")},
- %{a b c} => %{((seq a b c) "")},
- %{a? b c} => %{((seq (opt a) b c) "")},
- %(a - b) => %{((diff a b) "")},
- %((a - b) - c) => %{((diff (diff a b) c) "")},
- %(a b? c) => %{((seq a (opt b) c) "")},
- %(a | b | c) => %{((alt a b c) "")},
- %(a? b+ c*) => %{((seq (opt a) (plus b) (star c)) "")},
- %( | x xlist) => %{((alt (seq ()) (seq x xlist)) "")},
- %(a | (b - c)) => %{((alt a (diff b c)) "")},
- %(a b | c d) => %{((alt (seq a b) (seq c d)) "")},
- %{a) b c} => %{(a " b c")},
- %(BaseDecl? PrefixDecl*) => %{((seq (opt BaseDecl) (star PrefixDecl)) "")},
- %(NCCHAR1 | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]) =>
- %{((alt NCCHAR1 "-" (range "0-9") (hex "#x00B7") (range "#x0300-#x036F") (range "#x203F-#x2040")) "")}
+ "'abc' def" => %{(seq "abc" def)},
+ %{[0-9]} => %{(range "0-9")},
+ %{#x00B7} => %{(hex "#x00B7")},
+ %{[#x0300-#x036F]} => %{(range "#x0300-#x036F")},
+ %{[^<>'{}|^`]-[#x00-#x20]} => %{(diff (range "^<>'{}|^`") (range "#x00-#x20"))},
+ %{a b c} => %{(seq a b c)},
+ %{a? b c} => %{(seq (opt a) b c)},
+ %{a - b} => %{(diff a b)},
+ %{(a - b) - c} => %{(diff (diff a b) c)},
+ %{a b? c} => %{(seq a (opt b) c)},
+ %{a | b | c} => %{(alt a b c)},
+ %{a? b+ c*} => %{(seq (opt a) (plus b) (star c))},
+ %{foo | x xlist} => %{(alt foo (seq x xlist))},
+ %{a | (b - c)} => %{(alt a (diff b c))},
+ %{a b | c d} => %{(alt (seq a b) (seq c d))},
+ %{[a-z]} => %{(range "a-z")},
+ %{[a-zA-Z]} => %{(range "a-zA-Z")},
+ %{[#x20-#x22]} => %{(range "#x20-#x22")},
+ %{[abc]} => %{(range "abc")},
+ %{[abc-]} => %{(range "abc-")},
+ %{[#x20#x21#x22]} => %{(range "#x20#x21#x22")},
+ %{BaseDecl? PrefixDecl*} => %{(seq (opt BaseDecl) (star PrefixDecl))},
+ %{NCCHAR1 | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]} =>
+ %{(alt NCCHAR1 "-" (range "0-9") (hex "#x00B7") (range "#x0300-#x036F") (range "#x203F-#x2040"))},
+ %{'<' ([^<>"{}|^`\]-[#x00-#x20] | UCHAR)* '>'} =>
+ %{(seq "<" (star (alt (diff (range "^<>\\\"{}|^`") (range "#x00-#x20")) UCHAR)) ">")},
}.each do |input, expected|
it "given #{input.inspect} produces #{expected}" do
- expect(ebnf(:expression, input).to_sxp).to produce(expected, @debug)
+ rule = parse("rule ::= #{input}").ast.first
+ expect(rule.expr.to_sxp).to produce(expected, @debug)
end
end
end
- describe "#diff" do
+ context "illegal syntax" do
{
- %{'abc' def} => %{("abc" " def")},
- %{[0-9]} => %{((range "0-9") "")},
- %{#x00B7} => %{((hex "#x00B7") "")},
- %{[#x0300-#x036F]} => %{((range "#x0300-#x036F") "")},
- %{[^<>'{}|^`]-[#x00-#x20]} => %{((diff (range "^<>'{}|^`") (range "#x00-#x20")) "")},
- %{a b c} => %{(a " b c")},
- %{a? b c} => %{((opt a) " b c")},
- %{( [?*+] )?} => %{((opt (range "?*+")) "")},
- %(a - b) => %{((diff a b) "")},
- }.each do |input, expected|
- it "given #{input.inspect} produces #{expected}" do
- expect(ebnf(:diff, input).to_sxp).to produce(expected, @debug)
+ "illegal rule name": %{$rule.name ::= foo},
+ "diff missing second operand": %{rule ::= a -},
+ "unrecognized terminal" => %{rule ::= %foo%},
+ "unopened paren" => %{rule ::= a) b c}
+ }.each do |title, input|
+ it title do
+ expect {parse(input)}.to raise_error(SyntaxError)
end
end
end
-
- def ebnf(method, value, **options)
+
+ it "parses EBNF grammar" do
+ gram = parse(File.open(File.expand_path("../../etc/ebnf.ebnf", __FILE__)))
+ expect(gram).to be_valid
+ end
+
+ def parse(input, **options)
@debug = []
- options = {debug: @debug}.merge(options)
- EBNF::Base.new("", **options).send(method, value)
+ EBNF.parse(input, debug: @debug, format: :ebnf, **options)
end
end
diff --git a/spec/peg/data/parser.rb b/spec/peg/data/parser.rb
index de7cb26..b15964b 100644
--- a/spec/peg/data/parser.rb
+++ b/spec/peg/data/parser.rb
@@ -26,14 +26,6 @@ class EBNFPegParser
terminal(:HEX, HEX)
- terminal(:ENUM, ENUM, unescape: true) do |value|
- [:range, value[1..-2]]
- end
-
- terminal(:O_ENUM, O_ENUM, unescape: true) do |value|
- [:range, value[1..-2]]
- end
-
terminal(:RANGE, RANGE, unescape: true) do |value|
[:range, value[1..-2]]
end
@@ -52,17 +44,23 @@ class EBNFPegParser
terminal(:POSTFIX, POSTFIX)
+ production(:ebnf) do |input|
+ # Cause method_missing to invoke something in our context
+ to_sxp
+ end
+
production(:declaration, clear_packrat: true) do |value, data, callback|
# current contains a declaration.
# Invoke callback
- callback.call(:terminal) if value == '@terminals'
+ callback.call(:terminals) if value == '@terminals'
end
+ start_production(:rule, as_hash: true)
production(:rule, clear_packrat: true) do |value, data, callback|
# current contains an expression.
# Invoke callback
- id, sym = value.first[:LHS]
- expression = value.last[:expression]
+ id, sym = value[:LHS]
+ expression = value[:expression]
callback.call(:rule, EBNF::Rule.new(sym.to_sym, id, expression))
end
@@ -86,11 +84,12 @@ class EBNFPegParser
value.length == 1 ? value.first : ([:seq] + value)
end
+ start_production(:diff, as_hash: true)
production(:diff) do |value|
- if value.last[:_diff_1]
- [:diff, value.first[:postfix], value.last[:_diff_1]]
+ if value[:_diff_1]
+ [:diff, value[:postfix], value[:_diff_1]]
else
- value.first[:postfix]
+ value[:postfix]
end
end
@@ -98,13 +97,14 @@ class EBNFPegParser
value.last[:postfix] if value
end
+ start_production(:postfix, as_hash: true)
production(:postfix) do |value|
# Push result onto input stack, as the `diff` production can have some number of `postfix` values that are applied recursively
- case value.last[:_postfix_1]
- when "*" then [:star, value.first[:primary]]
- when "+" then [:plus, value.first[:primary]]
- when "?" then [:opt, value.first[:primary]]
- else value.first[:primary]
+ case value[:_postfix_1]
+ when "*" then [:star, value[:primary]]
+ when "+" then [:plus, value[:primary]]
+ when "?" then [:opt, value[:primary]]
+ else value[:primary]
end
end
@@ -112,9 +112,10 @@ class EBNFPegParser
Array(value).length > 2 ? value[1][:expression] : value
end
+ start_production(:pass, as_hash: true)
production(:pass) do |value, data, callback|
# Invoke callback
- callback.call(:pass, value.last[:expression])
+ callback.call(:pass, value[:expression])
end
# ## Parser invocation.
@@ -142,9 +143,9 @@ def initialize(input, **options, &block)
**options
) do |context, *data|
rule = case context
- when :terminal
+ when :terminals
parsing_terminals = true
- next
+ rule = EBNF::Rule.new(nil, nil, data.first, kind: :terminals)
when :pass
rule = EBNF::Rule.new(nil, nil, data.first, kind: :pass)
when :rule
@@ -161,6 +162,6 @@ def initialize(input, **options, &block)
def to_sxp
require 'sxp' unless defined?(SXP)
# Output rules as a formatted S-Expression
- SXP::Generator.string(@ast.sort_by{|r| r.id.to_f}.map(&:for_sxp))
+ SXP::Generator.string(@ast.map(&:for_sxp))
end
end
diff --git a/spec/peg/parser_spec.rb b/spec/peg/parser_spec.rb
index 1df502f..5fffbc5 100644
--- a/spec/peg/parser_spec.rb
+++ b/spec/peg/parser_spec.rb
@@ -81,9 +81,10 @@ class PegParserTest
{
"" => %r{syntax error, expecting "0-9", :integer },
- "10 x 1" => %r{syntax error, expecting "0-9", "\+", :operator},
+ "10 x 1" => %r{syntax error, expecting "\+", :operator},
"1-1" => %r{syntax error, expecting "0-9", "\+", :operator},
"foo" => %r{syntax error, expecting "0-9", :integer},
+ "3 1 + 2" => %r{syntax error, expecting "\+", :operator}
}.each do |input, expected|
it "fails to parse #{input.inspect} to #{expected.inspect}" do
expect {
diff --git a/spec/peg/rule_spec.rb b/spec/peg/rule_spec.rb
index 40e8aed..e3838c1 100644
--- a/spec/peg/rule_spec.rb
+++ b/spec/peg/rule_spec.rb
@@ -29,6 +29,16 @@
input: "C",
expect: :unmatched
},
+ "(not A) with 'A'" => {
+ rule: [:not, "A"],
+ input: "A",
+ expect: :unmatched
+ },
+ "(not A) with 'B'" => {
+ rule: [:not, "A"],
+ input: "B",
+ expect: nil
+ },
"(opt A) with 'A'" => {
rule: [:opt, "A"],
input: "A",
@@ -123,13 +133,148 @@
it name do
rule = EBNF::Rule.new(:rule, "0", params[:rule]).extend(EBNF::PEG::Rule)
rule.parser = parser
- expect(parser).to receive(:onStart).with(Symbol)
+ expect(parser).to receive(:onStart).with(Symbol).and_return({})
expect(parser).to receive(:onFinish).with(params[:expect]).and_return(params[:expect])
expect(parser).not_to receive(:onTerminal).with(Symbol)
expect(rule.parse(EBNF::LL1::Scanner.new(params[:input]))).to eql(params[:expect])
end
end
+
+ context "with as_hash: true" do
+ {
+ "(alt 'A' 'B') with 'A'" => {
+ rule: [:alt, "A", "B"],
+ input: "A",
+ expect: "A"
+ },
+ "(alt 'A' 'B') with ' A '" => {
+ rule: [:alt, "A", "B"],
+ input: " A ",
+ expect: "A"
+ },
+ "(alt 'A' 'B') with 'B'" => {
+ rule: [:alt, "A", "B"],
+ input: "B",
+ expect: "B"
+ },
+ "(alt 'A' 'B') with 'C'" => {
+ rule: [:alt, "A", "B"],
+ input: "C",
+ expect: :unmatched
+ },
+ "(not A) with 'A'" => {
+ rule: [:not, "A"],
+ input: "A",
+ expect: :unmatched
+ },
+ "(not A) with 'B'" => {
+ rule: [:not, "A"],
+ input: "B",
+ expect: nil
+ },
+ "(opt A) with 'A'" => {
+ rule: [:opt, "A"],
+ input: "A",
+ expect: "A"
+ },
+ "(opt A) with 'A' and whitespace" => {
+ rule: [:opt, "A"],
+ input: " A",
+ expect: "A"
+ },
+ "(opt A) with 'B'" => {
+ rule: [:opt, "A"],
+ input: "B",
+ expect: nil
+ },
+ "(plus A) with ''" => {
+ rule: [:plus, "A"],
+ input: "",
+ expect: :unmatched
+ },
+ "(plus A) with 'A'" => {
+ rule: [:plus, "A"],
+ input: "A",
+ expect: %w(A)
+ },
+ "(plus A) with 'A B'" => {
+ rule: [:plus, "A"],
+ input: "A B",
+ expect: %w(A)
+ },
+ "(plus A) with 'AAA'" => {
+ rule: [:plus, "A"],
+ input: "AAA",
+ expect: %w(A A A)
+ },
+ "(plus A) with ' A A A '" => {
+ rule: [:plus, "A"],
+ input: " A A A ",
+ expect: %w(A A A)
+ },
+ "(seq 'A' 'B')" => {
+ rule: [:seq, "A", "B"],
+ input: "A B",
+ expect: {A: "A", B: "B"}
+ },
+ "(seq 'A' 'B') with no whitespace" => {
+ rule: [:seq, "A", "B"],
+ input: "AB",
+ expect: {A: "A", B: "B"}
+ },
+ "(seq 'A' 'B') with added whitespace" => {
+ rule: [:seq, "A", "B"],
+ input: " A B ",
+ expect: {A: "A", B: "B"}
+ },
+ "(seq 'A' 'B') with 'A'" => {
+ rule: [:seq, "A", "B"],
+ input: " A ",
+ expect: :unmatched
+ },
+ "(seq 'A' 'B') with 'AC'" => {
+ rule: [:seq, "A", "B"],
+ input: "AC",
+ expect: :unmatched
+ },
+ "(star A) with ''" => {
+ rule: [:star, "A"],
+ input: "",
+ expect: []
+ },
+ "(star A) with 'A'" => {
+ rule: [:star, "A"],
+ input: "A",
+ expect: %w(A)
+ },
+ "(star A) with 'A B'" => {
+ rule: [:star, "A"],
+ input: "A B",
+ expect: %w(A)
+ },
+ "(star A) with 'AAA'" => {
+ rule: [:star, "A"],
+ input: "AAA",
+ expect: %w(A A A)
+ },
+ "(star A) with ' A A A '" => {
+ rule: [:star, "A"],
+ input: " A A A ",
+ expect: %w(A A A)
+ },
+ }.each do |name, params|
+ it name do
+ rule = EBNF::Rule.new(:rule, "0", params[:rule]).extend(EBNF::PEG::Rule)
+ rule.parser = parser
+ expect(parser).to receive(:onStart).with(Symbol).and_return({as_hash: true})
+ expect(parser).to receive(:onFinish).with(params[:expect]).and_return(params[:expect])
+ expect(parser).not_to receive(:onTerminal).with(Symbol)
+
+ expect(rule.parse(EBNF::LL1::Scanner.new(params[:input]))).to eql(params[:expect])
+ end
+ end
+ end
end
context "terminal rules" do
@@ -169,6 +314,24 @@
input: "B",
expect: :unmatched
},
+ '(istr "foo") with "foo"' => {
+ rule: [:istr, "foo"],
+
+ input: "foo",
+ expect: "foo"
+ },
+ '(istr "foo") with "FOO"' => {
+ rule: [:istr, "foo"],
+
+ input: "FOO",
+ expect: "FOO"
+ },
+ '(istr "fOo") with "FoO"' => {
+ rule: [:istr, "fOo"],
+
+ input: "FoO",
+ expect: "FoO"
+ },
"(range A-C) with 'A'" => {
rule: [:range, "A-C"],
input: "A",
@@ -253,7 +416,7 @@
it name do
rule = EBNF::Rule.new(:rule, "0", params[:rule], kind: :terminal).extend(EBNF::PEG::Rule)
rule.parser = parser
- expect(parser).to receive(:onStart).with(Symbol)
+ expect(parser).to receive(:onStart).with(Symbol).and_return({})
expect(parser).to receive(:onFinish).with(params[:expect]).and_return(params[:expect])
expect(parser).not_to receive(:onTerminal)
expect(parser).to receive(:find_terminal_regexp).with(:rule)
diff --git a/spec/peg_spec.rb b/spec/peg_spec.rb
index e940757..a354203 100644
--- a/spec/peg_spec.rb
+++ b/spec/peg_spec.rb
@@ -14,15 +14,13 @@
%{
[9] primary ::= HEX
| RANGE
- | ENUM
| O_RANGE
- | O_ENUM
| STRING1
| STRING2
| '(' expression ')'
} =>
- %{((rule primary "9" (alt HEX RANGE ENUM O_RANGE O_ENUM STRING1 STRING2 _primary_1))
+ %{((rule primary "9" (alt HEX RANGE O_RANGE STRING1 STRING2 _primary_1))
(rule _primary_1 "9.1" (seq "(" expression ")")))},
%{[1] start ::= A B C} =>
%{((rule start "1" (seq A B C)))},
diff --git a/spec/rule_spec.rb b/spec/rule_spec.rb
index 00c630f..36b2d5e 100644
--- a/spec/rule_spec.rb
+++ b/spec/rule_spec.rb
@@ -6,70 +6,309 @@
describe EBNF::Rule do
let(:debug) {[]}
- let(:ebnf) {EBNF.parse("", debug: debug)}
- subject {EBNF::Rule.new("rule", "0", [], ebnf: ebnf)}
+ let(:ebnf) {EBNF.parse(File.open(File.expand_path("../../etc/ebnf.ebnf", __FILE__)))}
+ subject {EBNF::Rule.new(:rule, "0", [:seq, :foo])}
- describe "#ttl_expr" do
+ describe ".from_sxp" do
+ context "accepts valid variations" do
+ {
+ "ebnf[1]": [
+ %{(rule ebnf "1" (star (alt declaration rule)))},
+ EBNF::Rule.new(:ebnf, "1", [:star, [:alt, :declaration, :rule]])
+ ],
+ "ebnf[1] parsed": [
+ [:rule, :ebnf, "1", [:star, [:alt, :declaration, :rule]]],
+ EBNF::Rule.new(:ebnf, "1", [:star, [:alt, :declaration, :rule]], kind: :rule)
+ ],
+ "pass": [
+ %{(pass _pass (plus (range "#x9#xA#xD#x20")))},
+ EBNF::Rule.new(nil, nil, [:plus, [:range, "#x9#xA#xD#x20"]], kind: :pass)
+ ],
+ "alt": [
+ %{(rule alt (alt a b c))},
+ EBNF::Rule.new(:alt, nil, [:alt, :a, :b, :c], kind: :rule)
+ ],
+ "diff": [
+ %{(terminal R_CHAR "21" (diff CHAR "]"))},
+ EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal)
+ ],
+ "istr": [
+ %{(terminal nc (istr "foo"))},
+ EBNF::Rule.new(:nc, nil, [:istr, "foo"], kind: :terminal)
+ ],
+ "not": [
+ %{(rule _a_1 "n.1" (not op1))},
+ EBNF::Rule.new(:_a_1, "n.1", [:not, :op1], kind: :rule)
+ ],
+ "opt": [
+ %{(rule _diff_1 "7.1" (opt _diff_2))},
+ EBNF::Rule.new(:_diff_1, "7.1", [:opt, :_diff_2], kind: :rule)
+ ],
+ "plus": [
+ %{(rule seq "6" (plus diff))},
+ EBNF::Rule.new(:seq, "6", [:plus, :diff], kind: :rule)
+ ],
+ "rept": [
+ %{(rule rept "6" (rept 1 "*" diff))},
+ EBNF::Rule.new(:rept, "6", [:rept, 1, "*", :diff])
+ ],
+ "rept m.n": [
+ %{(rule rept "6" (rept 3 5 diff))},
+ EBNF::Rule.new(:rept, "6", [:rept, 3, 5, :diff])
+ ],
+ "seq": [
+ %{(rule seq (seq a b c))},
+ EBNF::Rule.new(:seq, nil, [:seq, :a, :b, :c], kind: :rule)
+ ],
+ "star": [
+ %{(rule _alt_1 "5.1" (star _alt_2))},
+ EBNF::Rule.new(:_alt_1, "5.1", [:star, :_alt_2], kind: :rule)
+ ]
+ }.each do |title, (sxp, expected)|
+ it title do
+ res = EBNF::Rule.from_sxp(sxp)
+ expect(res).to eq expected
+ end
+ end
+ end
+
+ context "rejects invalid variations" do
+ {
+ "alt (empty)": %{(rule alt (alt))},
+ "diff (empty)": %{(terminal R_CHAR "21" (diff))},
+ "diff (one)": %{(terminal R_CHAR "21" (diff CHAR))},
+ "diff (three)": %{(terminal R_CHAR "21" (diff CHAR "]" ","))},
+ "hex (empty)": %{(terminal hex (hex))},
+ "hex (two)": %{(terminal hex (hex #x01 #x02))},
+ "istr (empty)": %{(terminal nc (istr))},
+ "istr (two)": %{(terminal nc (istr "foo" "bar"))},
+ "not (empty)": %{(rule _a_1 "n.1" (not))},
+ "not (two)": %{(rule _a_1 "n.1" (not op1 op2))},
+ "opt (empty)": %{(rule _diff_1 "7.1" (opt))},
+ "plus (empty)": %{(rule seq "6" (plus))},
+ "plus (two)": %{(rule seq "6" (plus diff extra))},
+ "rept (empty)": %{(rule rept "6" (rept))},
+ "rept (one)": %{(rule rept "6" (rept 1))},
+ "rept (two)": %{(rule rept "6" (rept 1 "*"))},
+ "rept (four)": %{(rule rept "6" (rept 1 "*" diff extra))},
+ "rept (float min)": %{(rule rept "6" (rept 1.1 1 diff))},
+ "rept (negative min)": %{(rule rept "6" (rept -1 1 diff))},
+ "rept (float max)": %{(rule rept "6" (rept 1 1.1 diff))},
+ "rept (negative max)": %{(rule rept "6" (rept 1 -1 diff))},
+ "star (empty)": %{(rule _alt_1 "5.1" (star))},
+ "star (two)": %{(rule _alt_1 "5.1" (star diff extra))},
+ "not op": %{(rule _bad nil (_bad))}
+ }.each do |title, (sxp, expected)|
+ it title do
+ expect {EBNF::Rule.from_sxp(sxp)}.to raise_error(ArgumentError)
+ end
+ end
+ end
+ end
+
+ describe "#to_sxp" do
{
- "ebnf[1]" => [
- [:star, [:alt, :declaration, :rule]],
- %{g:star [ g:alt ( :declaration :rule ) ] .}
+ "ebnf[1]": [
+ EBNF::Rule.new(:ebnf, "1", [:star, [:alt, :declaration, :rule]]),
+ %{(rule ebnf "1" (star (alt declaration rule)))},
],
- "ebnf[2]" => [
- [:alt, "@terminals", "@pass"],
- %{g:alt ( "@terminals" "@pass" ) .}
+ "pass": [
+ EBNF::Rule.new(nil, nil, [:plus, [:range, "#x20\\t\\r\\n"]], kind: :pass),
+ %{(pass _pass (plus (range "#x20\\\\t\\\\r\\\\n")))},
],
- "ebnf[5]" => [
- :alt,
- %{g:seq ( :alt ) .}
+ "alt": [
+ EBNF::Rule.new(:alt, nil, [:alt, :a, :b, :c], kind: :rule),
+ %{(rule alt (alt a b c))},
],
- "ebnf[9]" => [
- [:seq, :primary, [:opt, [:range, "?*+"]]],
- %{g:seq ( :primary [ g:opt [ re:matches "[?*+]" ] ] ) .}
+ "diff": [
+ EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal),
+ %{(terminal R_CHAR "21" (diff CHAR "]"))},
],
- "IRIREF" => [
- [:seq, "<", [:star, [:alt, [:range, "^#x00-#x20<>\"{}|^`\\"], :UCHAR]], ">"],
- %{g:seq ( "<" [ g:star [ g:alt ( [ re:matches "[^\\\\u0000-\\\\u0020<>\\\"{}|^`\\\\]" ] :UCHAR ) ] ] ">" ) .}
+ "istr": [
+ EBNF::Rule.new(:nc, nil, [:istr, "foo"], kind: :terminal),
+ %{(terminal nc (istr "foo"))},
+ ],
+ "not": [
+ EBNF::Rule.new(:_a_1, "n.1", [:not, :op1], kind: :rule),
+ %{(rule _a_1 "n.1" (not op1))},
+ ],
+ "opt": [
+ EBNF::Rule.new(:_diff_1, "7.1", [:opt, :_diff_2], kind: :rule),
+ %{(rule _diff_1 "7.1" (opt _diff_2))},
+ ],
+ "plus": [
+ EBNF::Rule.new(:seq, "6", [:plus, :diff], kind: :rule),
+ %{(rule seq "6" (plus diff))},
+ ],
+ "rept": [
+ EBNF::Rule.new(:rept, "6", [:rept, 1, "*", :diff]),
+ %{(rule rept "6" (rept 1 "*" diff))},
+ ],
+ "rept m.n": [
+ EBNF::Rule.new(:rept, "6", [:rept, 3, 5, :diff]),
+ %{(rule rept "6" (rept 3 5 diff))},
+ ],
+ "seq": [
+ EBNF::Rule.new(:seq, nil, [:seq, :a, :b, :c], kind: :rule),
+ %{(rule seq (seq a b c))},
+ ],
+ "star": [
+ EBNF::Rule.new(:_alt_1, "5.1", [:star, :_alt_2], kind: :rule),
+ %{(rule _alt_1 "5.1" (star _alt_2))},
]
- }.each do |title, (expr, expected)|
+ }.each do |title, (rule, sxp)|
it title do
- res = subject.send(:ttl_expr, expr, "g", 0, false)
- res.each {|r| expect(r).to be_a(String)}
+ expect(rule.to_sxp).to eq sxp
+ end
+ end
+ end
- expect(res.join("\n").gsub(/\s+/, ' ')).to produce(expected, debug)
+ describe "#to_ttl" do
+ {
+ "ebnf[1]": [
+ EBNF::Rule.new(:ebnf, "1", [:star, [:alt, :declaration, :rule]]),
+ %{
+ :ebnf rdfs:label "ebnf";
+ dc:identifier "1";
+ g:star
+ [ g:alt (
+ :declaration
+ :rule
+ ) ] .},
+ ],
+ "pass": [
+ EBNF::Rule.new(nil, nil, [:plus, [:range, "#x20\\t\\r\\n"]], kind: :pass),
+ %{
+ :_pass rdfs:label "_pass";
+ g:plus [ re:matches "[\\\\u0020\\\\t\\\\r\\\\n]" ] .},
+ ],
+ "alt": [
+ EBNF::Rule.new(:alt, nil, [:alt, :a, :b, :c], kind: :rule),
+ %{
+ :alt rdfs:label "alt";
+ g:alt ( :a :b :c ) .},
+ ],
+ "diff": [
+ EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal),
+ %{
+ :R_CHAR rdfs:label "R_CHAR";
+ dc:identifier "21";
+ re:diff ( :CHAR "]" ) .},
+ ],
+ "istr": [
+ EBNF::Rule.new(:nc, nil, [:istr, "foo"], kind: :terminal),
+ %{
+ :nc rdfs:label "nc";
+ re:matches "foo" .},
+ ],
+ "not": [
+ EBNF::Rule.new(:_a_1, "n.1", [:not, :op1], kind: :rule),
+ %{
+ :_a_1 rdfs:label "_a_1";
+ dc:identifier "n.1";
+ g:not :op1 .},
+ ],
+ "opt": [
+ EBNF::Rule.new(:_diff_1, "7.1", [:opt, :_diff_2], kind: :rule),
+ %{
+ :_diff_1 rdfs:label "_diff_1";
+ dc:identifier "7.1";
+ g:opt :_diff_2 .},
+ ],
+ "plus": [
+ EBNF::Rule.new(:seq, "6", [:plus, :diff], kind: :rule),
+ %{
+ :seq rdfs:label "seq";
+ dc:identifier "6";
+ g:plus :diff .},
+ ],
+ "rept": [
+ EBNF::Rule.new(:rept, "6", [:rept, 1, "*", :diff]),
+ %{
+ :rept rdfs:label "rept";
+ dc:identifier "6";
+ g:min 1;
+ g:max "*";
+ g:rept :diff .},
+ ],
+ "rept m.n": [
+ EBNF::Rule.new(:rept, "6", [:rept, 3, 5, :diff]),
+ %{
+ :rept rdfs:label "rept";
+ dc:identifier "6";
+ g:min 3;
+ g:max 5;
+ g:rept :diff .},
+ ],
+ "seq": [
+ EBNF::Rule.new(:seq, nil, [:seq, :a, :b, :c], kind: :rule),
+ %{
+ :seq rdfs:label "seq";
+ g:seq ( :a :b :c ) .},
+ ],
+ "star": [
+ EBNF::Rule.new(:_alt_1, "5.1", [:star, :_alt_2], kind: :rule),
+ %{
+ :_alt_1 rdfs:label "_alt_1";
+ dc:identifier "5.1";
+ g:star :_alt_2 .},
+ ]
+ }.each do |title, (rule, ttl)|
+ it title do
+ expect(rule.to_ttl.gsub(/\s+/m, " ")).to eq ttl.gsub(/\s+/m, " ")
end
end
end
-
- describe "#cclass" do
+
+ describe "#to_ruby" do
{
- "passes normal stuff" => [
- %{^<>'{}|^`},
- %{[^<>'{}|^`]}
+ "ebnf[1]": [
+ EBNF::Rule.new(:ebnf, "1", [:star, [:alt, :declaration, :rule]]),
+ %{EBNF::Rule.new(:ebnf, "1", [:star, [:alt, :declaration, :rule]])},
],
- "turns regular hex range into unicode range" => [
- %{#x0300-#x036F},
- %{[\\u0300-\\u036F]}
+ "pass": [
+ EBNF::Rule.new(nil, nil, [:plus, [:range, "#x20\\t\\r\\n"]], kind: :pass),
+ %{EBNF::Rule.new(:_pass, nil, [:plus, [:range, \"#x20\\\\t\\\\r\\\\n\"]], kind: :pass)},
],
- "turns short hex range into unicode range" => [
- %{#xC0-#xD6},
- %{[\\u00C0-\\u00D6]}
+ "alt": [
+ EBNF::Rule.new(:alt, nil, [:alt, :a, :b, :c], kind: :rule),
+ %{EBNF::Rule.new(:alt, nil, [:alt, :a, :b, :c])},
],
- "turns 3 char hex range into unicode range" => [
- %{#x370-#x37D},
- %{[\\u0370-\\u037D]}
+ "diff": [
+ EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal),
+ %{EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal)},
],
- "turns long hex range into unicode range" => [
- %{#x000300-#x00036F},
- %{[\\U00000300-\\U0000036F]}
+ "not": [
+ EBNF::Rule.new(:_a_1, "n.1", [:not, :op1], kind: :rule),
+ %{EBNF::Rule.new(:_a_1, "n.1", [:not, :op1])},
],
- "turns 5 char hex range into unicode range" => [
- %{#x00370-#x0037D},
- %{[\\U00000370-\\U0000037D]}
+ "opt": [
+ EBNF::Rule.new(:_diff_1, "7.1", [:opt, :_diff_2], kind: :rule),
+ %{EBNF::Rule.new(:_diff_1, "7.1", [:opt, :_diff_2])},
],
- }.each do |title, (input, expected)|
+ "plus": [
+ EBNF::Rule.new(:seq, "6", [:plus, :diff], kind: :rule),
+ %{EBNF::Rule.new(:seq, "6", [:plus, :diff])},
+ ],
+ "rept": [
+ EBNF::Rule.new(:rept, "6", [:rept, 1, "*", :diff]),
+ %{EBNF::Rule.new(:rept, "6", [:rept, 1, "*", :diff])},
+ ],
+ "rept m.n": [
+ EBNF::Rule.new(:rept, "6", [:rept, 3, 5, :diff]),
+ %{EBNF::Rule.new(:rept, "6", [:rept, 3, 5, :diff])},
+ ],
+ "seq": [
+ EBNF::Rule.new(:seq, nil, [:seq, :a, :b, :c], kind: :rule),
+ %{EBNF::Rule.new(:seq, nil, [:seq, :a, :b, :c])},
+ ],
+ "star": [
+ EBNF::Rule.new(:_alt_1, "5.1", [:star, :_alt_2], kind: :rule),
+ %{EBNF::Rule.new(:_alt_1, "5.1", [:star, :_alt_2])},
+ ]
+ }.each do |title, (rule, ruby)|
it title do
- expect(subject.send(:cclass, input)).to produce(expected, debug)
+ expect(rule.to_ruby).to eq ruby
end
end
end
@@ -77,13 +316,13 @@
describe "#to_bnf" do
{
"no-rewrite" => [
- [:seq],
- [EBNF::Rule.new(:rule, "0", [:seq])]
+ [:seq, :foo],
+ [EBNF::Rule.new(:rule, "0", [:seq, :foo])]
],
"embedded rule" => [
- [:seq, [:alt]],
+ [:seq, [:alt, :foo]],
[EBNF::Rule.new(:rule, "0", [:seq, :_rule_1]),
- EBNF::Rule.new(:_rule_1, "0.1", [:alt])]
+ EBNF::Rule.new(:_rule_1, "0.1", [:alt, :foo])]
],
"opt rule" => [
[:opt, :foo],
@@ -106,17 +345,18 @@
EBNF::Rule.new(:_rule_1, "0.1", [:alt, :_empty, :_rule_2]),
EBNF::Rule.new(:_rule_2, "0.2", [:seq, :foo, :_rule_1])]
],
- "diff rule" => [
- [:diff, "a", "b"],
- [EBNF::Rule.new(:rule, "0", [:diff, "a", "b"], kind: :terminal)]
- ],
+ # Diff may be a Rule or a Terminal now.
+ #"diff rule" => [
+ # [:diff, "a", "b"],
+ # [EBNF::Rule.new(:rule, "0", [:diff, "a", "b"])]
+ #],
"hex rule" => [
[:hex, "#x00B7"],
[EBNF::Rule.new(:rule, "0", [:hex, "#x00B7"], kind: :terminal)]
],
"range rule" => [
- [:range, "a", "b"],
- [EBNF::Rule.new(:rule, "0", [:range, "a", "b"], kind: :terminal)]
+ [:range, "a"],
+ [EBNF::Rule.new(:rule, "0", [:range, "a"], kind: :terminal)]
],
"ebnf[1]" => [
[:star, [:alt, :declaration, :rule]],
@@ -150,18 +390,31 @@
end
end
end
+
+ context "exceptions" do
+ {
+ "diff" => [:diff, "foo", "foobar"],
+ "not" => [:not, "foo"],
+ "rept" => [:rept, 1, 2, "foo"],
+ }.each do |title, expr|
+ it title do
+ rule = EBNF::Rule.new(:rule, "0", expr)
+ expect {rule.to_bnf}.to raise_error(RuntimeError)
+ end
+ end
+ end
end
describe "#to_peg" do
{
"no-rewrite" => [
- [:seq],
- [EBNF::Rule.new(:rule, "0", [:seq])]
+ [:seq, :foo],
+ [EBNF::Rule.new(:rule, "0", [:seq, :foo])]
],
"embedded rule" => [
- [:seq, [:alt]],
+ [:seq, [:alt, :foo]],
[EBNF::Rule.new(:rule, "0", [:seq, :_rule_1]),
- EBNF::Rule.new(:_rule_1, "0.1", [:alt])]
+ EBNF::Rule.new(:_rule_1, "0.1", [:alt, :foo])]
],
"opt rule" => [
[:opt, :foo],
@@ -183,15 +436,16 @@
],
"diff rule" => [
[:diff, "a", "b"],
- [EBNF::Rule.new(:rule, "0", [:diff, "a", "b"], kind: :terminal)]
+ [EBNF::Rule.new(:rule, "0", [:seq, :_rule_1, "a"]),
+ EBNF::Rule.new(:_rule_1, "0.1", [:not, "b"])]
],
"hex rule" => [
[:hex, "#x00B7"],
[EBNF::Rule.new(:rule, "0", [:hex, "#x00B7"], kind: :terminal)]
],
"range rule" => [
- [:range, "a", "b"],
- [EBNF::Rule.new(:rule, "0", [:range, "a", "b"], kind: :terminal)]
+ [:range, "a"],
+ [EBNF::Rule.new(:rule, "0", [:range, "a"], kind: :terminal)]
],
"ebnf[1]" => [
[:star, [:alt, :declaration, :rule]],
@@ -219,8 +473,586 @@
end
it "extends with EBNF::PEG::Rule" do
- rule = EBNF::Rule.new(:rule, "0", [:seq]).to_peg.first
+ rule = EBNF::Rule.new(:rule, "0", [:seq, :foo]).to_peg.first
expect(rule).to be_a(EBNF::PEG::Rule)
end
end
+
+ describe "#to_regexp" do
+ {
+ hex: [:hex, "#x20", / /],
+ range: [:range, "a-b", /[a-b]/],
+ range2: [:range, "a-zA-Z", /[a-zA-Z]/],
+ range3: [:range, "abc-", /[abc-]/],
+ }.each do |title, (op, exp, regexp)|
+ it title do
+ expect(EBNF::Rule.new(title, nil, [op, exp]).to_regexp).to eql regexp
+ end
+ end
+
+ {
+ istr: ["foo", /foo/ui],
+ }.each do |title, (exp, regexp)|
+ it title, ruby: "!jruby" do
+ expect(EBNF::Rule.new(title, nil, [title, exp]).to_regexp).to eql regexp
+ end
+ end
+
+ it "raises an error for other operation" do
+ expect {EBNF::Rule.new(:seq, nil, [:seq, :a]).to_regexp}.to raise_error(/Can't turn/)
+ end
+ end
+
+ describe "#terminal?" do
+ {
+ "ebnf[1]": [
+ EBNF::Rule.new(:ebnf, "1", [:star, [:alt, :declaration, :rule]]),
+ false,
+ ],
+ "pass": [
+ EBNF::Rule.new(nil, nil, [:plus, [:range, "#x20\\t\\r\\n"]], kind: :pass),
+ false,
+ ],
+ "alt": [
+ EBNF::Rule.new(:alt, nil, [:alt, :a, :b, :c], kind: :rule),
+ false,
+ ],
+ "diff": [
+ EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal),
+ true,
+ ],
+ "istr": [
+ EBNF::Rule.new(:nc, nil, [:istr, "foo"], kind: :terminal),
+ true,
+ ],
+ "not": [
+ EBNF::Rule.new(:_a_1, "n.1", [:not, :op1], kind: :rule),
+ false,
+ ],
+ "opt": [
+ EBNF::Rule.new(:_diff_1, "7.1", [:opt, :_diff_2], kind: :rule),
+ false,
+ ],
+ "plus": [
+ EBNF::Rule.new(:seq, "6", [:plus, :diff], kind: :rule),
+ false,
+ ],
+ "rept": [
+ EBNF::Rule.new(:rept, "6", [:rept, 1, "*", :diff]),
+ false,
+ ],
+ "rept m.n": [
+ EBNF::Rule.new(:rept, "6", [:rept, 3, 5, :diff]),
+ false,
+ ],
+ "seq": [
+ EBNF::Rule.new(:seq, nil, [:seq, :a, :b, :c], kind: :rule),
+ false,
+ ],
+ "star": [
+ EBNF::Rule.new(:_alt_1, "5.1", [:star, :_alt_2], kind: :rule),
+ false,
+ ]
+ }.each do |title, (rule, bool)|
+ it "#{title} => #{bool.inspect}" do
+ expect(rule.terminal?).to eq bool
+ end
+ end
+ end
+
+ describe "#pass?" do
+ {
+ "ebnf[1]": [
+ EBNF::Rule.new(:ebnf, "1", [:star, [:alt, :declaration, :rule]]),
+ false,
+ ],
+ "pass": [
+ EBNF::Rule.new(nil, nil, [:plus, [:range, "#x20\\t\\r\\n"]], kind: :pass),
+ true,
+ ],
+ "alt": [
+ EBNF::Rule.new(:alt, nil, [:alt, :a, :b, :c], kind: :rule),
+ false,
+ ],
+ "diff": [
+ EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal),
+ false,
+ ],
+ "istr": [
+ EBNF::Rule.new(:nc, nil, [:istr, "foo"], kind: :terminal),
+ false,
+ ],
+ "not": [
+ EBNF::Rule.new(:_a_1, "n.1", [:not, :op1], kind: :rule),
+ false,
+ ],
+ "opt": [
+ EBNF::Rule.new(:_diff_1, "7.1", [:opt, :_diff_2], kind: :rule),
+ false,
+ ],
+ "plus": [
+ EBNF::Rule.new(:seq, "6", [:plus, :diff], kind: :rule),
+ false,
+ ],
+ "rept": [
+ EBNF::Rule.new(:rept, "6", [:rept, 1, "*", :diff]),
+ false,
+ ],
+ "rept m.n": [
+ EBNF::Rule.new(:rept, "6", [:rept, 3, 5, :diff]),
+ false,
+ ],
+ "seq": [
+ EBNF::Rule.new(:seq, nil, [:seq, :a, :b, :c], kind: :rule),
+ false,
+ ],
+ "star": [
+ EBNF::Rule.new(:_alt_1, "5.1", [:star, :_alt_2], kind: :rule),
+ false,
+ ]
+ }.each do |title, (rule, bool)|
+ it "#{title} => #{bool.inspect}" do
+ expect(rule.pass?).to eq bool
+ end
+ end
+ end
+
+ describe "#rule?" do
+ {
+ "ebnf[1]": [
+ EBNF::Rule.new(:ebnf, "1", [:star, [:alt, :declaration, :rule]]),
+ true,
+ ],
+ "pass": [
+ EBNF::Rule.new(nil, nil, [:plus, [:range, "#x20\\t\\r\\n"]], kind: :pass),
+ false,
+ ],
+ "alt": [
+ EBNF::Rule.new(:alt, nil, [:alt, :a, :b, :c], kind: :rule),
+ true,
+ ],
+ "diff": [
+ EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal),
+ false,
+ ],
+ "istr": [
+ EBNF::Rule.new(:nc, nil, [:istr, "foo"], kind: :terminal),
+ false,
+ ],
+ "not": [
+ EBNF::Rule.new(:_a_1, "n.1", [:not, :op1], kind: :rule),
+ true,
+ ],
+ "opt": [
+ EBNF::Rule.new(:_diff_1, "7.1", [:opt, :_diff_2], kind: :rule),
+ true,
+ ],
+ "plus": [
+ EBNF::Rule.new(:seq, "6", [:plus, :diff], kind: :rule),
+ true,
+ ],
+ "rept": [
+ EBNF::Rule.new(:rept, "6", [:rept, 1, "*", :diff]),
+ true,
+ ],
+ "rept m.n": [
+ EBNF::Rule.new(:rept, "6", [:rept, 3, 5, :diff]),
+ true,
+ ],
+ "seq": [
+ EBNF::Rule.new(:seq, nil, [:seq, :a, :b, :c], kind: :rule),
+ true,
+ ],
+ "star": [
+ EBNF::Rule.new(:_alt_1, "5.1", [:star, :_alt_2], kind: :rule),
+ true,
+ ]
+ }.each do |title, (rule, bool)|
+ it "#{title} => #{bool.inspect}" do
+ expect(rule.rule?).to eq bool
+ end
+ end
+ end
+
+ describe "#alt?" do
+ {
+ "ebnf[1]": [
+ EBNF::Rule.new(:ebnf, "1", [:star, [:alt, :declaration, :rule]]),
+ false,
+ ],
+ "pass": [
+ EBNF::Rule.new(nil, nil, [:plus, [:range, "#x20\\t\\r\\n"]], kind: :pass),
+ false,
+ ],
+ "alt": [
+ EBNF::Rule.new(:alt, nil, [:alt, :a, :b, :c], kind: :rule),
+ true,
+ ],
+ "diff": [
+ EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal),
+ false,
+ ],
+ "istr": [
+ EBNF::Rule.new(:nc, nil, [:istr, "foo"], kind: :terminal),
+ false,
+ ],
+ "not": [
+ EBNF::Rule.new(:_a_1, "n.1", [:not, :op1], kind: :rule),
+ false,
+ ],
+ "opt": [
+ EBNF::Rule.new(:_diff_1, "7.1", [:opt, :_diff_2], kind: :rule),
+ false,
+ ],
+ "plus": [
+ EBNF::Rule.new(:seq, "6", [:plus, :diff], kind: :rule),
+ false,
+ ],
+ "rept": [
+ EBNF::Rule.new(:rept, "6", [:rept, 1, "*", :diff]),
+ false,
+ ],
+ "rept m.n": [
+ EBNF::Rule.new(:rept, "6", [:rept, 3, 5, :diff]),
+ false,
+ ],
+ "seq": [
+ EBNF::Rule.new(:seq, nil, [:seq, :a, :b, :c], kind: :rule),
+ false,
+ ],
+ "star": [
+ EBNF::Rule.new(:_alt_1, "5.1", [:star, :_alt_2], kind: :rule),
+ false,
+ ]
+ }.each do |title, (rule, bool)|
+ it "#{title} => #{bool.inspect}" do
+ expect(rule.alt?).to eq bool
+ end
+ end
+ end
+
+ describe "#seq?" do
+ {
+ "ebnf[1]": [
+ EBNF::Rule.new(:ebnf, "1", [:star, [:alt, :declaration, :rule]]),
+ false,
+ ],
+ "pass": [
+ EBNF::Rule.new(nil, nil, [:plus, [:range, "#x20\\t\\r\\n"]], kind: :pass),
+ false,
+ ],
+ "alt": [
+ EBNF::Rule.new(:alt, nil, [:alt, :a, :b, :c], kind: :rule),
+ false,
+ ],
+ "diff": [
+ EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal),
+ false,
+ ],
+ "istr": [
+ EBNF::Rule.new(:nc, nil, [:istr, "foo"], kind: :terminal),
+ false,
+ ],
+ "not": [
+ EBNF::Rule.new(:_a_1, "n.1", [:not, :op1], kind: :rule),
+ false,
+ ],
+ "opt": [
+ EBNF::Rule.new(:_diff_1, "7.1", [:opt, :_diff_2], kind: :rule),
+ false,
+ ],
+ "plus": [
+ EBNF::Rule.new(:seq, "6", [:plus, :diff], kind: :rule),
+ false,
+ ],
+ "rept": [
+ EBNF::Rule.new(:rept, "6", [:rept, 1, "*", :diff]),
+ false,
+ ],
+ "rept m.n": [
+ EBNF::Rule.new(:rept, "6", [:rept, 3, 5, :diff]),
+ false,
+ ],
+ "seq": [
+ EBNF::Rule.new(:seq, nil, [:seq, :a, :b, :c], kind: :rule),
+ true,
+ ],
+ "star": [
+ EBNF::Rule.new(:_alt_1, "5.1", [:star, :_alt_2], kind: :rule),
+ false,
+ ]
+ }.each do |title, (rule, bool)|
+ it "#{title} => #{bool.inspect}" do
+ expect(rule.seq?).to eq bool
+ end
+ end
+ end
+
+ describe "#==" do
+ let(:rule1) {EBNF::Rule.new(:foo, nil, [:seq, "FOO"])}
+ let(:rule2) {EBNF::Rule.new(:foo, nil, [:seq, "FOO"])}
+ let(:rule3) {EBNF::Rule.new(:bar, nil, [:seq, "FOO"])}
+
+ it "equals itself" do
+ expect(rule1).to eq(rule1)
+ end
+ it "equals an equivalent rule" do
+ expect(rule1).to eq(rule2)
+ end
+ it "does not equal a rule with a different symbol that has the same expression" do
+ expect(rule1).not_to eq(rule3)
+ end
+ end
+
+ describe "#eql?" do
+ let(:rule1) {EBNF::Rule.new(:foo, nil, [:seq, "FOO"])}
+ let(:rule2) {EBNF::Rule.new(:foo, nil, [:seq, "FOO"])}
+ let(:rule3) {EBNF::Rule.new(:bar, nil, [:seq, "FOO"])}
+
+ it "equals itself" do
+ expect(rule1).to eql(rule1)
+ end
+ it "equals an equivalent rule" do
+ expect(rule1).to eql(rule2)
+ end
+ it "equals a rule with a different symbol that has the same expression" do
+ expect(rule1).to eql(rule3)
+ end
+ end
+
+ describe "#translate_codepoints" do
+ {
+ "#x20" => " ",
+ "#xffff" => "\u{ffff}"
+ }.each do |str, cp|
+ specify {expect(subject.translate_codepoints(str)).to eql(cp)}
+ end
+ end
+
+ describe "#non_terminals" do
+ subject {ebnf}
+ {
+ _pass: [],
+ ebnf: [:declaration, :rule],
+ declaration: [:pass],
+ alt: [:seq],
+ seq: [:diff],
+ diff: [:postfix],
+ postfix: [:primary],
+ primary: [],
+ pass: [],
+ LHS: [],
+ SYMBOL: [],
+ HEX: [],
+ RANGE: [],
+ O_RANGE: [],
+ STRING1: [],
+ STRING2: [],
+ CHAR: [],
+ R_CHAR: [],
+ POSTFIX: [],
+ PASS: []
+ }.each do |sym, expected|
+ it "#{sym} => #{expected.inspect}" do
+ res = subject.ast.find {|r| r.sym == sym}
+ expect(res.non_terminals(subject.ast).map(&:sym)).to eq expected
+ end
+ end
+ end
+
+ describe "#terminals" do
+ subject {ebnf}
+ {
+ _pass: [:PASS],
+ ebnf: [],
+ declaration: ["@terminals"],
+ alt: [],
+ seq: [],
+ diff: [],
+ postfix: [],
+ primary: [:HEX, :SYMBOL, :O_RANGE, :RANGE, :STRING1, :STRING2, "("],
+ pass: ["@pass"],
+ LHS: ["["],
+ SYMBOL: ["a-z", "A-Z", "0-9", "_", "."],
+ HEX: ["#x"],
+ RANGE: ["["],
+ O_RANGE: ["[^"],
+ STRING1: ['"'],
+ STRING2: ["'"],
+ CHAR: ["#x9#xA#xD", "#x20-#xD7FF", "#xE000-#xFFFD", "#x10000-#x10FFFF"],
+ R_CHAR: [:CHAR, "]", "-", :HEX],
+ POSTFIX: ["?*+"],
+ PASS: ["#x9#xA#xD#x20", "#", "#x", "//", "/*", "(*"]
+ }.each do |sym, expected|
+ it "#{sym} => #{expected.inspect}" do
+ res = subject.ast.find {|r| r.sym == sym}
+ expect(res.terminals(subject.ast).map {|r| r.is_a?(EBNF::Rule) ? r.sym : r}).to eq expected
+ end
+ end
+ end
+
+ describe "#symbols" do
+ subject {ebnf}
+ {
+ _pass: [:PASS],
+ ebnf: [:declaration, :rule],
+ declaration: [:pass],
+ alt: [:seq],
+ seq: [:diff],
+ diff: [:postfix],
+ postfix: [:primary, :POSTFIX],
+ primary: [:HEX, :SYMBOL, :O_RANGE, :RANGE, :STRING1, :STRING2, :expression],
+ pass: [:expression],
+ LHS: [:SYMBOL],
+ SYMBOL: [],
+ HEX: [],
+ RANGE: [:R_CHAR, :HEX, :LHS],
+ O_RANGE: [:R_CHAR, :HEX],
+ STRING1: [:CHAR],
+ STRING2: [:CHAR],
+ CHAR: [],
+ R_CHAR: [:CHAR, :HEX],
+ POSTFIX: [],
+ PASS: []
+ }.each do |sym, expected|
+ it "#{sym} => #{expected.inspect}" do
+ res = subject.ast.find {|r| r.sym == sym}
+ expect(res.symbols).to eq expected
+ end
+ end
+ end
+
+ describe "#validate!" do
+ let(:gram) {EBNF.parse("a ::= 'b'?")}
+ subject {gram.ast.first}
+
+ {
+ "mixed enum char and hex": [
+ "a ::= [b#x20]",
+ %(In rule a: Range must be of form HEX+ or R_CHAR+: was "b#x20")
+ ],
+ "mixed enum char and hex (2)": [
+ "a ::= [#x20z]",
+ %(In rule a: Range must be of form HEX+ or R_CHAR+: was "#x20z")
+ ],
+ }.each do |name, (rule, message)|
+ it name do
+ expect(EBNF.parse(rule)).to be_valid
+ end
+ end
+
+ {
+ "missing rule": [
+ "a ::= b",
+ /In rule a: No rule found for b/
+ ],
+ "illegal string": [
+ %{a ::= "\u{01}"},
+ /syntax error/
+ ],
+ "empty range": [
+ "a ::= []",
+ /syntax error/
+ ],
+ "mixed range char and hex": [
+ "a ::= [b-#x20]",
+ /Range contains illegal components/
+ ],
+ "mixed range char and hex (2)": [
+ "a ::= [#x20-b]",
+ /Range contains illegal components/
+ ],
+ "incomplete range": [
+ "a ::= [-b]",
+ /syntax error,/
+ ],
+ "extra range": [
+ "a ::= [a-b-c]",
+ /syntax error,/
+ ],
+ }.each do |name, (rule, message)|
+ it name do
+ expect {EBNF.parse(rule, validate: true)}.to raise_error SyntaxError, message
+ end
+ end
+
+ # Validate rules that can only be created through modification
+ {
+ "alt (empty)": [:alt],
+ "diff (empty)": [:diff],
+ "diff (one)": [:diff, 'A'],
+ "diff (three)": [:diff, 'A', 'B', 'C'],
+ "hex (empty)": [:hex],
+ "hex (two)": [:hex, '#x01', '#x02'],
+ "hex (string)": [:hex, 'string'],
+ "istr (empty)": [:istr],
+ "istr (two)": [:istr, 'A', 'B'],
+ "not (empty)": [:not],
+ "not (two)": [:not, 'A', 'B'],
+ "opt (empty)": [:opt],
+ "plus (empty)": [:plus],
+ "plus (two)": [:plus, 'A', 'B'],
+ "rept (empty)": [:rept],
+ "rept (one)": [:rept, 1],
+ "rept (two)": [:rept, 1, 2],
+ "rept (four)": [:rept, 1, 2, 'A', 'B'],
+ "rept (float min)": [:rept, 1.1, 2, 'A'],
+ "rept (negative min)": [:rept, -1, 2, 'A'],
+ "rept (float max)": [:rept, 1, 2.1, 'A'],
+ "rept (negative max)": [:rept, 1, -1, 'A'],
+ "star (empty)": [:star],
+ "star (two)": [:star, 'A', 'B'],
+ "not op": [:bad]
+ }.each do |title, expr|
+ it title do
+ subject.expr = expr
+ expect {subject.validate!(gram.ast)}.to raise_error(SyntaxError)
+ end
+ end
+ end
+
+ describe "#valid?" do
+ subject {EBNF.parse("a ::= b")}
+ it "notes missing rule" do
+ expect(subject.ast.first.valid?(subject.ast)).to be_falsey
+ end
+
+ it "validates EBNF" do
+ ebnf = EBNF.parse(File.open(File.expand_path("../../etc/ebnf.ebnf", __FILE__)))
+ expect(ebnf.ast.first).to be_valid(ebnf.ast)
+ end
+ end
+
+ describe "#cclass" do
+ {
+ "passes normal stuff" => [
+ %{^<>'{}|^`},
+ %{[^<>'{}|^`]}
+ ],
+ "turns regular hex range into unicode range" => [
+ %{#x0300-#x036F},
+ %{[\\u0300-\\u036F]}
+ ],
+ "turns short hex range into unicode range" => [
+ %{#xC0-#xD6},
+ %{[\\u00C0-\\u00D6]}
+ ],
+ "turns 3 char hex range into unicode range" => [
+ %{#x370-#x37D},
+ %{[\\u0370-\\u037D]}
+ ],
+ "turns long hex range into unicode range" => [
+ %{#x000300-#x00036F},
+ %{[\\U00000300-\\U0000036F]}
+ ],
+ "turns 5 char hex range into unicode range" => [
+ %{#x00370-#x0037D},
+ %{[\\U00000370-\\U0000037D]}
+ ],
+ }.each do |title, (input, expected)|
+ it title do
+ expect(subject.send(:cclass, input)).to produce(expected, debug)
+ end
+ end
+ end
end
\ No newline at end of file
diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb
index 2a2b309..10837a5 100644
--- a/spec/spec_helper.rb
+++ b/spec/spec_helper.rb
@@ -24,8 +24,18 @@
::RSpec.configure do |c|
c.filter_run focus: true
c.run_all_when_everything_filtered = true
- c.exclusion_filter = {
- ruby: lambda { |version| !(RUBY_VERSION.to_s =~ /^#{version.to_s}/) },
- not_jruby: lambda { RUBY_PLATFORM.to_s != 'jruby'}
- }
+ c.filter_run_excluding ruby: ->(version) do
+ case version.to_s
+ when "!jruby"
+ RUBY_ENGINE == "jruby"
+ when /^> (.*)/
+ !(RUBY_VERSION.to_s > $1)
+ else
+ !(RUBY_VERSION.to_s =~ /^#{version.to_s}/)
+ end
+ end
end
+
+require 'ebnf'
+
+PARSED_EBNF_GRAMMAR = EBNF.parse(File.open(File.expand_path("../../etc/ebnf.ebnf", __FILE__))).freeze
\ No newline at end of file
diff --git a/spec/writer_spec.rb b/spec/writer_spec.rb
index 9b6e82f..c14a230 100644
--- a/spec/writer_spec.rb
+++ b/spec/writer_spec.rb
@@ -3,9 +3,43 @@
require 'spec_helper'
require 'ebnf'
require 'sxp'
+require 'nokogiri'
describe EBNF::Writer do
- describe "#initialize" do
+ RSpec::Matchers.define :have_xpath do |path, value|
+ match do |actual|
+ doc = Nokogiri::HTML.parse(actual)
+ return false unless doc
+ @result = doc.at_xpath(path.to_s) rescue false
+ case value
+ when false
+ @result.nil?
+ when true
+ !@result.nil?
+ when Array
+ @result.to_s.split(" ").include?(*value)
+ when Regexp
+ @result.to_s =~ value
+ else
+ @result.to_s == value
+ end
+ end
+
+ failure_message do |actual|
+ msg = "expected that #{path.inspect}\nwould be: #{value.inspect}"
+ msg += "\n was: #{@result}"
+ msg += "\nsource:" + actual
+ msg
+ end
+
+ failure_message_when_negated do |actual|
+ msg = "expected that #{path.inspect}\nwould not be #{value.inspect}"
+ msg += "\nsource:" + actual
+ msg
+ end
+ end
+
+ describe ".string" do
{
prolog: [
%{[2] Prolog ::= BaseDecl? PrefixDecl*},
@@ -22,17 +56,522 @@
end
end
- context "Existing grammars" do
+ describe ".print" do
{
- "EBNF Grammar" => File.expand_path("../../etc/ebnf.ebnf", __FILE__),
- "Turtle Grammar" => File.expand_path("../../etc/turtle.ebnf", __FILE__)
- }.each do |name, file|
- context name do
- it "outputs grammar as text" do
- expect {EBNF.parse(File.read(file)).to_s}.to_not raise_error
- end
- it "outputs grammar as html" do
- expect {EBNF.parse(File.read(file)).to_html}.to_not raise_error
+ prolog: [
+ %{[2] Prolog ::= BaseDecl? PrefixDecl*},
+ %{[2] Prolog ::= BaseDecl? PrefixDecl*\n}
+ ],
+ }.each do |title, (grammar, plain)|
+ context title do
+ subject {EBNF::Base.new(grammar).ast}
+
+ it "generates plain" do
+ expect {EBNF::Writer.print(*subject)}.to write(plain).to(:output)
+ end
+ end
+ end
+ end
+
+ describe ".html" do
+ {
+ prolog: [
+ %{[2] Prolog ::= BaseDecl? PrefixDecl*},
+ {
+ '//table/@class': "grammar",
+ '//table/tbody/@id': "grammar-productions",
+ '//tbody/tr/@id': "grammar-production-Prolog",
+ '//tbody/tr/td[1]/text()': "[2]",
+ '//tbody/tr/td[2]/code/text()': "Prolog",
+ '//tbody/tr/td[3]/text()': "::=",
+ #'//tbody/tr/td[4]/*/text()': /BaseDecl\? PrefixDecl\*/,
+ }
+ ],
+ }.each do |title, (grammar, xpaths)|
+ context title do
+ subject {EBNF::Writer.html(*EBNF::Base.new(grammar).ast)}
+ xpaths.each do |path, value|
+ specify {is_expected.to have_xpath(path, value)}
+ end
+ end
+ end
+ end
+
+ context "EBNF" do
+ describe "#initialize" do
+ {
+ prolog: [
+ %{[2] Prolog ::= BaseDecl? PrefixDecl*},
+ %{[2] Prolog ::= BaseDecl? PrefixDecl*\n}
+ ],
+ }.each do |title, (grammar, plain)|
+ context title do
+ subject {EBNF::Base.new(grammar).ast}
+
+ it "generates plain" do
+ expect {EBNF::Writer.new(subject)}.to write(plain).to(:output)
+ end
+ end
+ end
+ end
+
+ describe "#format_ebnf" do
+ subject {EBNF::Writer.new([])}
+
+ context "legal expressions" do
+ {
+ "alt": [
+ [:alt, :A, :B],
+ "A | B"
+ ],
+ "diff": [
+ [:diff, :A, :B],
+ "A - B"
+ ],
+ "hex": [
+ [:hex, "#x20"],
+ "#x20"
+ ],
+ "istr": [
+ [:istr, "foo"],
+ %("foo")
+ ],
+ "opt": [
+ [:opt, :A],
+ "A?"
+ ],
+ "plus": [
+ [:plus, :A],
+ "A+"
+ ],
+ "range": [
+ [:range, "a-zA-Z"],
+ "[a-zA-Z]"
+ ],
+ "rept 0 1": [
+ [:rept, 0, 1, :A],
+ "A?"
+ ],
+ "rept 0 *": [
+ [:rept, 0, '*', :A],
+ "A*"
+ ],
+ "rept 1 1": [
+ [:rept, 1, 1, :A],
+ "A"
+ ],
+ "rept 1 *": [
+ [:rept, 1, '*', :A],
+ "A+"
+ ],
+ "rept 1 2": [
+ [:rept, 1, 2, :A],
+ "A A?"
+ ],
+ "rept 1 3": [
+ [:rept, 1, 3, :A],
+ "A (A A?)?"
+ ],
+ "rept 2 *": [
+ [:rept, 2, "*", :A],
+ "A A A*"
+ ],
+ "rept 1 3 (A B)": [
+ [:rept, 1, 3, [:seq, :A, :B]],
+ "(A B) ((A B) (A B)?)?"
+ ],
+ "rept 1 3 (A | B)": [
+ [:rept, 1, 3, [:alt, :A, :B]],
+ "(A | B) ((A | B) (A | B)?)?"
+ ],
+ "star": [
+ [:star, :A],
+ "A*"
+ ],
+ "string '\\r'": [
+ [:seq, "\r"],
+ %{#x0D}
+ ],
+ "string ' '": [
+ [:seq, " "],
+ %{#x20}
+ ],
+ "string 'a'": [
+ [:seq, "a"],
+ %{"a"}
+ ],
+ "string '\"'": [
+ [:seq, '"'],
+ %{'"'}
+ ],
+ "string \"'\"": [
+ [:seq, '\''],
+ %{"'"}
+ ],
+ "string \"\€\"": [
+ [:seq, '€'],
+ %{"€"}
+ ],
+ "n3 path": [
+ [:seq, :pathItem, [:alt, [:seq, "!", :path], [:seq, "^", :path]]],
+ %{pathItem (("!" path) | ("^" path))}
+ ],
+ }.each do |title, (expr, result)|
+ it title do
+ expect(subject.send(:format_ebnf, expr)).to eql result
+ end
+ end
+ end
+
+ context "illegal expressions" do
+ {
+ "string 'a\nb": [:seq, "a\nb"],
+ }.each do |title, expr|
+ it title do
+ expect {subject.send(:format_ebnf, expr)}.to raise_error RangeError
+ end
+ end
+ end
+ end
+
+ context "Existing grammars" do
+ {
+ "ABNF Grammar" => File.expand_path("../../etc/abnf.ebnf", __FILE__),
+ "EBNF Grammar" => File.expand_path("../../etc/ebnf.ebnf", __FILE__),
+ "ISO EBNF Grammar" => File.expand_path("../../etc/iso-ebnf.ebnf", __FILE__),
+ "Turtle Grammar" => File.expand_path("../../etc/turtle.ebnf", __FILE__),
+ "SPARQL Grammar" => File.expand_path("../../etc/sparql.ebnf", __FILE__),
+ }.each do |name, file|
+ context name do
+ it "outputs grammar as text" do
+ expect {EBNF.parse(File.read(file)).to_s}.to_not raise_error
+ end
+ it "parses to equivalent rules" do
+ expect(EBNF.parse(File.read(file)).to_sxp).to produce(File.read(file.sub('.ebnf', '.sxp')))
+ end
+ it "outputs grammar as html" do
+ expect {EBNF.parse(File.read(file)).to_html}.to_not raise_error
+ end
+ end
+ end
+ end
+ end
+
+ context "ABNF" do
+ describe "#initialize" do
+ {
+ prolog: [
+ %{rulelist = 1*( rule / (*c-wsp c-nl) )\n},
+ %{rulelist = 1*(rule / (*c-wsp c-nl))\n}
+ ],
+ }.each do |title, (grammar, plain)|
+ context title do
+ subject {EBNF::Base.new(grammar, format: :abnf).ast}
+
+ it "generates plain" do
+ expect {EBNF::Writer.new(subject, format: :abnf)}.to write(plain).to(:output)
+ end
+ end
+ end
+ end
+
+ describe "#format_abnf" do
+ subject {EBNF::Writer.new([])}
+
+ context "legal expressions" do
+ {
+ "alt": [
+ [:alt, :A, :B],
+ "A / B"
+ ],
+ "enum": [
+ [:range, "abc-"],
+ "%d97.98.99.45"
+ ],
+ "hex": [
+ [:hex, "#x20"],
+ "%x20"
+ ],
+ "istr": [
+ [:istr, "foo"],
+ %("foo")
+ ],
+ "opt": [
+ [:opt, :A],
+ "[A]"
+ ],
+ "plus": [
+ [:plus, :A],
+ "1*A"
+ ],
+ "range": [
+ [:range, "a-z"],
+ "%d97-122"
+ ],
+ "range 2": [
+ [:range, "a-zA-Z"],
+ %{(%d97-122 / %d65-90)}
+ ],
+ "rept 0 1": [
+ [:rept, 0, 1, :A],
+ "0*1A"
+ ],
+ "rept 0 *": [
+ [:rept, 0, '*', :A],
+ "*A"
+ ],
+ "rept 1 1": [
+ [:rept, 1, 1, :A],
+ "1A"
+ ],
+ "rept 1 *": [
+ [:rept, 1, '*', :A],
+ "1*A"
+ ],
+ "rept 1 2": [
+ [:rept, 1, 2, :A],
+ "1*2A"
+ ],
+ "rept 1 3": [
+ [:rept, 1, 3, :A],
+ "1*3A"
+ ],
+ "rept 2 *": [
+ [:rept, 2, "*", :A],
+ "2*A"
+ ],
+ "rept 1 3 (A B)": [
+ [:rept, 1, 3, [:seq, :A, :B]],
+ "1*3(A B)"
+ ],
+ "rept 1 3 (A | B)": [
+ [:rept, 1, 3, [:alt, :A, :B]],
+ "1*3(A / B)"
+ ],
+ "star": [
+ [:star, :A],
+ "*A"
+ ],
+ "string '\\r'": [
+ [:seq, "\r"],
+ %{%x0D}
+ ],
+ "string ' '": [
+ [:seq, " "],
+ %{" "}
+ ],
+ "string 'a'": [
+ [:seq, "a"],
+ %{"a"}
+ ],
+ "string '\"'": [
+ [:seq, '"'],
+ %{%x22}
+ ],
+ "string \"'\"": [
+ [:seq, '\''],
+ %{"'"}
+ ],
+ "string \"\€\"": [
+ [:seq, '€'],
+ %{%x20AC}
+ ],
+ "n3 path": [
+ [:seq, :pathItem, [:alt, [:seq, "!", :path], [:seq, "^", :path]]],
+ %{pathItem (("!" path) / ("^" path))}
+ ],
+ }.each do |title, (expr, result)|
+ it title do
+ expect(subject.send(:format_abnf, expr)).to eql result
+ end
+ end
+ end
+
+ context "illegal expressions" do
+ {
+ "[^abc]": [:range, "^abc"],
+ "A - B": [:diff, :A, :B],
+ }.each do |title, expr|
+ it title do
+ expect {subject.send(:format_abnf, expr)}.to raise_error RangeError
+ end
+ end
+ end
+ end
+
+ context "Existing grammars" do
+ {
+ "ABNF Grammar" => File.expand_path("../../etc/abnf.abnf", __FILE__),
+ "HTTP Grammar" => File.expand_path("../../examples/abnf/examples/http.abnf", __FILE__),
+ "JSON Grammar" => File.expand_path("../../examples/abnf/examples/json.abnf", __FILE__),
+ "Postal Address" => File.expand_path("../../examples/abnf/examples/postal-address.abnf", __FILE__),
+ "URI Grammar" => File.expand_path("../../examples/abnf/examples/uri.abnf", __FILE__),
+ }.each do |name, file|
+ context name do
+ it "outputs grammar as text" do
+ expect {EBNF.parse(File.read(file), format: :abnf).to_s(format: :abnf)}.to_not raise_error
+ end
+ it "outputs grammar as html" do
+ expect {EBNF.parse(File.read(file), format: :abnf).to_html(format: :abnf)}.to_not raise_error
+ end
+ end
+ end
+ end
+ end
+
+ context "ISOEBNF" do
+ describe "#initialize" do
+ {
+ prolog: [
+ %{syntax = syntax_rule, {syntax_rule} ;},
+ %{syntax = syntax_rule, {syntax_rule} ;\n}
+ ],
+ }.each do |title, (grammar, plain)|
+ context title do
+ subject {EBNF::Base.new(grammar, format: :isoebnf).ast}
+
+ it "generates plain" do
+ expect {EBNF::Writer.new(subject, format: :isoebnf)}.to write(plain).to(:output)
+ end
+ end
+ end
+ end
+
+ describe "#format_isoebnf" do
+ subject {EBNF::Writer.new([])}
+
+ context "legal expressions" do
+ {
+ "alt": [
+ [:alt, :A, :B],
+ "A | B"
+ ],
+ "diff": [
+ [:diff, :A, :B],
+ "A - B"
+ ],
+ "enum": [
+ [:range, "abc-"],
+ %{("a" | "b" | "c" | "-")}
+ ],
+ "hex": [
+ [:hex, "#x20"],
+ %(" ")
+ ],
+ "istr": [
+ [:istr, "foo"],
+ %("foo")
+ ],
+ "opt": [
+ [:opt, :A],
+ "[A]"
+ ],
+ "plus": [
+ [:plus, :A],
+ "A, {A}"
+ ],
+ "range": [
+ [:range, "a-z"],
+ %{("a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z")}
+ ],
+ "range 2": [
+ [:range, "a-zA-Z"],
+ %{("a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" | "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z")}],
+ "rept 0 1": [
+ [:rept, 0, 1, :A],
+ "[A]"
+ ],
+ "rept 0 *": [
+ [:rept, 0, '*', :A],
+ "{A}"
+ ],
+ "rept 1 1": [
+ [:rept, 1, 1, :A],
+ "A"
+ ],
+ "rept 1 *": [
+ [:rept, 1, '*', :A],
+ "A, {A}"
+ ],
+ "rept 1 2": [
+ [:rept, 1, 2, :A],
+ "A, [A]"
+ ],
+ "rept 1 3": [
+ [:rept, 1, 3, :A],
+ "A, [(A, [A])]"
+ ],
+ "rept 2 *": [
+ [:rept, 2, "*", :A],
+ "A, A, {A}"
+ ],
+ "rept 1 3 (A B)": [
+ [:rept, 1, 3, [:seq, :A, :B]],
+ "(A, B), [((A, B), [(A, B)])]"
+ ],
+ "rept 1 3 (A | B)": [
+ [:rept, 1, 3, [:alt, :A, :B]],
+ "(A | B), [((A | B), [(A | B)])]"
+ ],
+ "star": [
+ [:star, :A],
+ "{A}"
+ ],
+ "string ' '": [
+ [:seq, " "],
+ %{" "}
+ ],
+ "string 'a'": [
+ [:seq, "a"],
+ %{"a"}
+ ],
+ "string '\"'": [
+ [:seq, '"'],
+ %{'"'}
+ ],
+ "string \"'\"": [
+ [:seq, '\''],
+ %{"'"}
+ ],
+ "n3 path": [
+ [:seq, :pathItem, [:alt, [:seq, "!", :path], [:seq, "^", :path]]],
+ %{pathItem, (("!", path) | ("^", path))}
+ ],
+ }.each do |title, (expr, result)|
+ it title do
+ expect(subject.send(:format_isoebnf, expr)).to eql result
+ end
+ end
+ end
+
+ context "illegal expressions" do
+ {
+ "[^abc]": [:range, "^abc"],
+ "string '\\r'": [:seq, "\r"],
+ "string \"\€\"": [:seq, '€'],
+ }.each do |title, expr|
+ it title do
+ expect {subject.send(:format_isoebnf, expr)}.to raise_error RangeError
+ end
+ end
+ end
+ end
+
+ context "Existing grammars" do
+ {
+ "ISO EBNF Grammar" => File.expand_path("../../etc/iso-ebnf.isoebnf", __FILE__),
+ "Simiple EBNF Grammar" => File.expand_path("../../examples/isoebnf/examples/ebnf.isoebnf", __FILE__),
+ "HTML Grammar" => File.expand_path("../../examples/isoebnf/examples/html.isoebnf", __FILE__),
+ "Pascal Grammar" => File.expand_path("../../examples/isoebnf/examples/pascal.isoebnf", __FILE__),
+ "Postal Address" => File.expand_path("../../examples/isoebnf/examples/postal-address.isoebnf", __FILE__),
+ }.each do |name, file|
+ context name do
+ it "outputs grammar as text" do
+ expect {EBNF.parse(File.read(file), format: :isoebnf).to_s(format: :isoebnf)}.to_not raise_error
+ end
+ it "outputs grammar as html" do
+ expect {EBNF.parse(File.read(file), format: :isoebnf).to_html(format: :isoebnf)}.to_not raise_error
+ end
end
end
end
|