From 6480c843372de0018fc46d31e448d1dee133ad33 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Fri, 19 Jun 2020 16:04:16 -0700 Subject: [PATCH 01/50] Better logging support of RDF::Util::Logger is loaded, or errors are being logged to STDOUT/ERR. --- lib/ebnf/ll1/parser.rb | 14 +++++++++++--- lib/ebnf/peg/parser.rb | 17 +++++++++++++---- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/lib/ebnf/ll1/parser.rb b/lib/ebnf/ll1/parser.rb index 2970881..234f205 100644 --- a/lib/ebnf/ll1/parser.rb +++ b/lib/ebnf/ll1/parser.rb @@ -581,10 +581,18 @@ def debug(*args) options = args.last.is_a?(Hash) ? args.pop : {} lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno)) level = options.fetch(:level, 0) - depth = options[:depth] || self.depth - args << yield if block_given? - @options[:logger].add(level, "[#{@lineno}]" + (" " * depth) + args.join(" ")) + + if self.respond_to?(:log_debug) + level = [:debug, :info, :warn, :error, :fatal][level] + log_debug(*args, **options.merge(level: level, lineno: lineno, depth: depth), &block) + elsif @options[:logger].respond_to?(:add) + args << yield if block_given? + @options[:logger].add(level, "[#{lineno}]" + (" " * depth) + args.join(" ")) + elsif @options[:logger].respond_to?(:<<) + args << yield if block_given? + @options[:logger] << "[#{lineno}]" + (" " * depth) + args.join(" ") + end end private diff --git a/lib/ebnf/peg/parser.rb b/lib/ebnf/peg/parser.rb index 0393036..c5e3824 100644 --- a/lib/ebnf/peg/parser.rb +++ b/lib/ebnf/peg/parser.rb @@ -273,6 +273,7 @@ def error(node, message, **options) m += ", production = #{options[:production].inspect}" if options[:production] @error_log << m unless @recovering @recovering = true + require 'byebug'; byebug debug(node, m, level: 3, **options) if options[:raise] || @options[:validate] raise Error.new(m, lineno: lineno, rest: options[:rest], production: options[:production]) @@ -329,15 +330,23 @@ def progress(node, *args, &block) # @option options [Integer] :depth # Recursion depth for indenting output # @yieldreturn [String] additional string appended to `message`. - def debug(*args) + def debug(*args, &block) return unless @options[:logger] options = args.last.is_a?(Hash) ? args.pop : {} lineno = options[:lineno] || (scanner.lineno if scanner) level = options.fetch(:level, 0) - depth = options[:depth] || self.depth - args << yield if block_given? - @options[:logger].add(level, "[#{lineno}]" + (" " * depth) + args.join(" ")) + + if self.respond_to?(:log_debug) + level = [:debug, :info, :warn, :error, :fatal][level] + log_debug(*args, **options.merge(level: level, lineno: lineno, depth: depth), &block) + elsif @options[:logger].respond_to?(:add) + args << yield if block_given? + @options[:logger].add(level, "[#{lineno}]" + (" " * depth) + args.join(" ")) + elsif @options[:logger].respond_to?(:<<) + args << yield if block_given? + @options[:logger] << "[#{lineno}]" + (" " * depth) + args.join(" ") + end end # Start for production From 9ab4fe18c2f167d7843cba5e7ff6a0572fab8ab7 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Fri, 19 Jun 2020 16:18:36 -0700 Subject: [PATCH 02/50] Better logging support of RDF::Util::Logger is loaded, or errors are being logged to STDOUT/ERR. --- lib/ebnf/peg/parser.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/ebnf/peg/parser.rb b/lib/ebnf/peg/parser.rb index c5e3824..786a1a1 100644 --- a/lib/ebnf/peg/parser.rb +++ b/lib/ebnf/peg/parser.rb @@ -273,7 +273,6 @@ def error(node, message, **options) m += ", production = #{options[:production].inspect}" if options[:production] @error_log << m unless @recovering @recovering = true - require 'byebug'; byebug debug(node, m, level: 3, **options) if options[:raise] || @options[:validate] raise Error.new(m, lineno: lineno, rest: options[:rest], production: options[:production]) From 98f4ee0e8e9187d2b588c165acbb78490cbcc55c Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Wed, 24 Jun 2020 17:04:26 -0700 Subject: [PATCH 03/50] * Add `Base#validate!, Base#valid?`, `Rule#validate! and `Rule#valid?`. * Update Turtle generation to use rule sym as identifier, and id as dc:identifier. * Fix RANGE and O_RANGE EBNF terminals. * Improve code coverage. * Improve error checking in `Rule#initialize`. * Allow `diff` (`a - b`) to be used with non-termainals in PEG parsing. * Added `not` and `rept` operators (not used directly by EBNF grammar, just yet). * Eventually, a syntax such as `A{1*2}` could be at least one and at most to of `A` are matched. --- README.md | 12 +- ebnf.gemspec | 1 + etc/ebnf.ebnf | 4 +- etc/ebnf.html | 6 +- etc/ebnf.ll1.sxp | 6 +- etc/ebnf.peg.rb | 6 +- etc/ebnf.peg.sxp | 6 +- etc/ebnf.sxp | 6 +- examples/ebnf-ll1-parser/README.md | 2 +- examples/ebnf-ll1-parser/parser.rb | 4 +- examples/ebnf-peg-parser/README.md | 2 +- examples/ebnf-peg-parser/parser.rb | 4 +- lib/ebnf/base.rb | 65 ++- lib/ebnf/ll1.rb | 5 +- lib/ebnf/parser.rb | 8 +- lib/ebnf/peg.rb | 2 +- lib/ebnf/peg/rule.rb | 102 ++-- lib/ebnf/rule.rb | 355 ++++++++---- spec/base_spec.rb | 73 ++- spec/ebnf_spec.rb | 26 +- spec/ll1/data/parser.rb | 11 + spec/ll1/lexer_spec.rb | 229 +++++--- spec/ll1/parser_spec.rb | 6 + spec/ll1_spec.rb | 5 + spec/parser_spec.rb | 15 +- spec/peg/data/parser.rb | 5 + spec/peg/rule_spec.rb | 10 + spec/rule_spec.rb | 836 ++++++++++++++++++++++++++--- spec/spec_helper.rb | 4 + spec/writer_spec.rb | 17 + 30 files changed, 1460 insertions(+), 373 deletions(-) diff --git a/README.md b/README.md index 7c5fee1..2c1866a 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,8 @@ As LL(1) grammars operate using `alt` and `seq` primitives, allowing for a match Of note in this implementation is that the tokenizer and parser are streaming, so that they can process inputs of arbitrary size. +The _exception operator_ (`A - B`) is only supported on terminals. + See {EBNF::LL1} and {EBNF::LL1::Parser} for further information. ### [PEG][]/[Packrat][] Parser @@ -113,7 +115,7 @@ Within the expression on the right-hand side of a rule, the following expression A | B matches A or B. A - B - matches any string that matches A but does not match B. + matches any string that matches A but does not match B. (Only supported on Terminals in LL(1) BNF). A+ matches one or more occurrences of A. Concatenation has higher precedence than alternation; thus A+ | B+ is identical to (A+) | (B+). A* @@ -152,13 +154,17 @@ Different components of an EBNF rule expression are transformed into their own o "string""string" 'string'"string" A (B | C)(seq (A (alt B C))) + A~ extension(not A) A?(opt A) A B(seq A B) A | B(alt A B) - A - B(diff A B) + A - B + (diff A B) for terminals.
+ (seq (not B) A) for non-terminals (PEG parsing only)
A+(plus A) A*(star A) - @pass " "*(pass (star " ")) + A{n*m} extension(rept n m A) + @pass " "*(pass _pass (star " ")) @terminals diff --git a/ebnf.gemspec b/ebnf.gemspec index 37f24de..197c9e4 100755 --- a/ebnf.gemspec +++ b/ebnf.gemspec @@ -28,6 +28,7 @@ Gem::Specification.new do |gem| gem.add_runtime_dependency 'scanf', '~> 1.0' gem.add_runtime_dependency 'rdf', '~> 3.1' # Required by sxp gem.add_development_dependency 'rdf-spec', '~> 3.1' + gem.add_development_dependency 'rdf-turtle', '~> 3.1' gem.add_development_dependency 'haml', '~> 5.0' gem.add_development_dependency 'rspec', '~> 3.9' gem.add_development_dependency 'rspec-its', '~> 1.3' diff --git a/etc/ebnf.ebnf b/etc/ebnf.ebnf index 069b4a5..457bfab 100644 --- a/etc/ebnf.ebnf +++ b/etc/ebnf.ebnf @@ -42,9 +42,9 @@ [15] O_ENUM ::= '[^' R_CHAR+ | HEX+ ']' - [16] RANGE ::= '[' (R_CHAR '-' R_CHAR) | (HEX - HEX) ']' + [16] RANGE ::= '[' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']' - [17] O_RANGE ::= '[^' (R_CHAR '-' R_CHAR) | (HEX - HEX) ']' + [17] O_RANGE ::= '[^' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']' # Strings are unescaped Unicode, excepting control characters and hash (#) [18] STRING1 ::= '"' (CHAR - '"')* '"' diff --git a/etc/ebnf.html b/etc/ebnf.html index 55c7b11..86bb738 100644 --- a/etc/ebnf.html +++ b/etc/ebnf.html @@ -133,7 +133,7 @@ RANGE ::= -"[" (R_CHAR "-" R_CHAR) | (HEX - HEX) "]" +"[" (R_CHAR "-" R_CHAR) | (HEX "-" HEX) "]" @@ -141,7 +141,7 @@ O_RANGE ::= -"[^" (R_CHAR "-" R_CHAR) | (HEX - HEX) "]" +"[^" (R_CHAR "-" R_CHAR) | (HEX "-" HEX) "]" @@ -195,7 +195,7 @@ ([#x00-#x20] | ("#" - "#x" | "//") ([^#x0A#x0Dx])* | "/*" ("*" [^/])? | [^*]* "*/" | "(*" ("*" [^)])? | [^*]* "*)")+ - + @pass diff --git a/etc/ebnf.ll1.sxp b/etc/ebnf.ll1.sxp index 257a40e..baa914f 100644 --- a/etc/ebnf.ll1.sxp +++ b/etc/ebnf.ll1.sxp @@ -1,6 +1,6 @@ ( (rule _empty "0" (first _eps) (seq)) - (pass (seq PASS)) + (pass _pass (seq PASS)) (rule ebnf "1" (start #t) (first "@pass" "@terminals" LHS _eps) @@ -158,8 +158,8 @@ (terminal HEX "13" (seq "#x" (plus (alt (range "a-f") (range "A-F") (range "0-9"))))) (terminal ENUM "14" (diff (alt (seq "[" (plus R_CHAR)) (seq (plus HEX) "]")) LHS)) (terminal O_ENUM "15" (alt (seq "[^" (plus R_CHAR)) (seq (plus HEX) "]"))) - (terminal RANGE "16" (alt (seq "[" (seq R_CHAR "-" R_CHAR)) (seq (diff HEX HEX) "]"))) - (terminal O_RANGE "17" (alt (seq "[^" (seq R_CHAR "-" R_CHAR)) (seq (diff HEX HEX) "]"))) + (terminal RANGE "16" (alt (seq "[" (seq R_CHAR "-" R_CHAR)) (seq (seq HEX "-" HEX) "]"))) + (terminal O_RANGE "17" (alt (seq "[^" (seq R_CHAR "-" R_CHAR)) (seq (seq HEX "-" HEX) "]"))) (terminal STRING1 "18" (seq "\"" (star (diff CHAR "\"")) "\"")) (terminal STRING2 "19" (seq "'" (star (diff CHAR "'")) "'")) (terminal CHAR "20" diff --git a/etc/ebnf.peg.rb b/etc/ebnf.peg.rb index 9aad833..226155f 100644 --- a/etc/ebnf.peg.rb +++ b/etc/ebnf.peg.rb @@ -51,12 +51,12 @@ module Meta EBNF::Rule.new(:_RANGE_1, "16.1", [:seq, "[", :_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_RANGE_3, "16.3", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_RANGE_2, "16.2", [:seq, :_RANGE_4, "]"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_RANGE_4, "16.4", [:diff, :HEX, :HEX], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_4, "16.4", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:O_RANGE, "17", [:alt, :_O_RANGE_1, :_O_RANGE_2], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_O_RANGE_1, "17.1", [:seq, "[^", :_O_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_O_RANGE_3, "17.3", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_O_RANGE_2, "17.2", [:seq, :_O_RANGE_4, "]"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_RANGE_4, "17.4", [:diff, :HEX, :HEX], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_4, "17.4", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:STRING1, "18", [:seq, "\"", :_STRING1_1, "\""], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_STRING1_1, "18.1", [:star, :_STRING1_2], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_STRING1_2, "18.2", [:diff, :CHAR, "\""], kind: :terminal).extend(EBNF::PEG::Rule), @@ -92,7 +92,7 @@ module Meta EBNF::Rule.new(:_PASS_20, "23.20", [:seq, "*", :_PASS_21], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_PASS_21, "23.21", [:range, "^)"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_PASS_19, "23.19", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(nil, nil, [:seq, :PASS], kind: :pass).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_pass, nil, [:seq, :PASS], kind: :pass).extend(EBNF::PEG::Rule), ] end diff --git a/etc/ebnf.peg.sxp b/etc/ebnf.peg.sxp index c442ae5..5cb9433 100644 --- a/etc/ebnf.peg.sxp +++ b/etc/ebnf.peg.sxp @@ -1,5 +1,5 @@ ( - (pass (seq PASS)) + (pass _pass (seq PASS)) (rule ebnf "1" (star _ebnf_1)) (rule _ebnf_1 "1.1" (alt declaration rule)) (rule declaration "2" (alt "@terminals" pass)) @@ -50,12 +50,12 @@ (terminal _RANGE_1 "16.1" (seq "[" _RANGE_3)) (terminal _RANGE_2 "16.2" (seq _RANGE_4 "]")) (terminal _RANGE_3 "16.3" (seq R_CHAR "-" R_CHAR)) - (terminal _RANGE_4 "16.4" (diff HEX HEX)) + (terminal _RANGE_4 "16.4" (seq HEX "-" HEX)) (terminal O_RANGE "17" (alt _O_RANGE_1 _O_RANGE_2)) (terminal _O_RANGE_1 "17.1" (seq "[^" _O_RANGE_3)) (terminal _O_RANGE_2 "17.2" (seq _O_RANGE_4 "]")) (terminal _O_RANGE_3 "17.3" (seq R_CHAR "-" R_CHAR)) - (terminal _O_RANGE_4 "17.4" (diff HEX HEX)) + (terminal _O_RANGE_4 "17.4" (seq HEX "-" HEX)) (terminal STRING1 "18" (seq "\"" _STRING1_1 "\"")) (terminal _STRING1_1 "18.1" (star _STRING1_2)) (terminal _STRING1_2 "18.2" (diff CHAR "\"")) diff --git a/etc/ebnf.sxp b/etc/ebnf.sxp index d40c05c..a00edab 100644 --- a/etc/ebnf.sxp +++ b/etc/ebnf.sxp @@ -1,5 +1,5 @@ ( - (pass (seq PASS)) + (pass _pass (seq PASS)) (rule ebnf "1" (star (alt declaration rule))) (rule declaration "2" (alt "@terminals" pass)) (rule rule "3" (seq LHS expression)) @@ -16,8 +16,8 @@ (terminal HEX "13" (seq "#x" (plus (alt (range "a-f") (range "A-F") (range "0-9"))))) (terminal ENUM "14" (diff (alt (seq "[" (plus R_CHAR)) (seq (plus HEX) "]")) LHS)) (terminal O_ENUM "15" (alt (seq "[^" (plus R_CHAR)) (seq (plus HEX) "]"))) - (terminal RANGE "16" (alt (seq "[" (seq R_CHAR "-" R_CHAR)) (seq (diff HEX HEX) "]"))) - (terminal O_RANGE "17" (alt (seq "[^" (seq R_CHAR "-" R_CHAR)) (seq (diff HEX HEX) "]"))) + (terminal RANGE "16" (alt (seq "[" (seq R_CHAR "-" R_CHAR)) (seq (seq HEX "-" HEX) "]"))) + (terminal O_RANGE "17" (alt (seq "[^" (seq R_CHAR "-" R_CHAR)) (seq (seq HEX "-" HEX) "]"))) (terminal STRING1 "18" (seq "\"" (star (diff CHAR "\"")) "\"")) (terminal STRING2 "19" (seq "'" (star (diff CHAR "'")) "'")) (terminal CHAR "20" diff --git a/examples/ebnf-ll1-parser/README.md b/examples/ebnf-ll1-parser/README.md index e1bbb56..eea9792 100644 --- a/examples/ebnf-ll1-parser/README.md +++ b/examples/ebnf-ll1-parser/README.md @@ -15,7 +15,7 @@ Output rules and terminals as S-Expressions, Turtle or EBNF This generates a S-Expression form of the grammar suitable for use by {EBNF} for generating a BNF representation (avoiding `star`, `plus`, and `opt` expressions), LL(1) first/follow comprehensions and branch tables used for parsing input files based on the grammar. ( - (pass (seq PASS)) + (pass _pass (seq PASS)) (rule ebnf "1" (star (alt declaration rule))) (rule declaration "2" (alt "@terminals" pass)) (rule rule "3" (seq LHS expression)) diff --git a/examples/ebnf-ll1-parser/parser.rb b/examples/ebnf-ll1-parser/parser.rb index a4ae81a..7809225 100644 --- a/examples/ebnf-ll1-parser/parser.rb +++ b/examples/ebnf-ll1-parser/parser.rb @@ -86,14 +86,14 @@ def inspect # Terminal for `RANGE` is matched as part of a `primary` rule. Unescape the values to remove EBNF escapes in the input. # - # [16] `RANGE` ::= '[' (R_CHAR '-' R_CHAR) | (HEX - HEX) ']' + # [16] `RANGE` ::= '[' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']' terminal(:RANGE, RANGE, unescape: true) do |prod, token, input| input[:terminal] = [:range, token.value[1..-2]] end # Terminal for `O_RANGE` is matched as part of a `primary` rule. Unescape the values to remove EBNF escapes in the input. # - # [17] O_RANGE ::= '[^' (R_CHAR '-' R_CHAR) | (HEX - HEX) ']' + # [17] O_RANGE ::= '[^' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']' terminal(:O_RANGE, O_RANGE, unescape: true) do |prod, token, input| input[:terminal] = [:range, token.value[1..-2]] end diff --git a/examples/ebnf-peg-parser/README.md b/examples/ebnf-peg-parser/README.md index 14954ab..0b83d18 100644 --- a/examples/ebnf-peg-parser/README.md +++ b/examples/ebnf-peg-parser/README.md @@ -15,7 +15,7 @@ Output rules and terminals as S-Expressions, Turtle or EBNF This generates a S-Expression form of the grammar suitable for use by {EBNF}. ( - (pass (seq PASS)) + (pass _pass (seq PASS)) (rule ebnf "1" (star (alt declaration rule))) (rule declaration "2" (alt "@terminals" pass)) (rule rule "3" (seq LHS expression)) diff --git a/examples/ebnf-peg-parser/parser.rb b/examples/ebnf-peg-parser/parser.rb index 94606ec..06c9502 100644 --- a/examples/ebnf-peg-parser/parser.rb +++ b/examples/ebnf-peg-parser/parser.rb @@ -82,14 +82,14 @@ def inspect # Terminal for `RANGE` is matched as part of a `primary` rule. # - # [16] `RANGE` ::= '[' (R_CHAR '-' R_CHAR) | (HEX - HEX) ']' + # [16] `RANGE` ::= '[' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']' terminal(:RANGE, RANGE) do |value| [:range, value[1..-2]] end # Terminal for `O_RANGE` is matched as part of a `primary` rule. # - # [17] O_RANGE ::= '[^' (R_CHAR '-' R_CHAR) | (HEX - HEX) ']' + # [17] O_RANGE ::= '[^' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']' terminal(:O_RANGE, O_RANGE) do |value| [:range, value[1..-2]] end diff --git a/lib/ebnf/base.rb b/lib/ebnf/base.rb index d16edd4..5566e5e 100644 --- a/lib/ebnf/base.rb +++ b/lib/ebnf/base.rb @@ -161,6 +161,36 @@ def initialize(input, format: :ebnf, **options) end end + ## + # Validate the grammar. + # + # Makes sure that rules reference either strings or other defined rules. + # + # @raise [RangeError] + def validate! + ast.each do |rule| + begin + rule.validate!(@ast) + rescue SyntaxError => e + error("In rule #{rule.sym}: #{e.message}") + end + end + raise SyntaxError, errors.join("\n") unless errors.empty? + end + + ## + # Is the grammar valid? + # + # Uses `#validate!` and catches `RangeError` + # + # @return [Boolean] + def valid? + validate! + true + rescue SyntaxError + false + end + # Iterate over each rule or terminal, except empty # @param [:termina, :rule] kind # @yield rule @@ -210,30 +240,16 @@ def to_ruby(output = $stdout, grammarFile: nil, mod_name: 'Meta', **options) end # Either output LL(1) BRANCH tables or rules for PEG parsing - if ast.first.is_a?(EBNF::PEG::Rule) - to_ruby_peg(output) - else + if ast.first.first to_ruby_ll1(output) + else + to_ruby_peg(output) end unless output == $stdout output.puts "end" end end - def dup - new_obj = super - new_obj.instance_variable_set(:@ast, @ast.dup) - new_obj - end - - ## - # Find a rule given a symbol - # @param [Symbol] sym - # @return [Rule] - def find_rule(sym) - (@find ||= {})[sym] ||= ast.detect {|r| r.sym == sym} - end - ## # Write out syntax tree as Turtle # @param [String] prefix for language @@ -242,6 +258,7 @@ def find_rule(sym) def to_ttl(prefix = nil, ns = "http://example.org/") unless ast.empty? [ + "@prefix dc: .", "@prefix rdf: .", "@prefix rdfs: .", ("@prefix #{prefix}: <#{ns}>." if prefix), @@ -257,6 +274,20 @@ def to_ttl(prefix = nil, ns = "http://example.org/") ast.sort.map(&:to_ttl).join("\n") end + def dup + new_obj = super + new_obj.instance_variable_set(:@ast, @ast.dup) + new_obj + end + + ## + # Find a rule given a symbol + # @param [Symbol] sym + # @return [Rule] + def find_rule(sym) + (@find ||= {})[sym] ||= ast.detect {|r| r.sym == sym} + end + def depth @depth += 1 ret = yield diff --git a/lib/ebnf/ll1.rb b/lib/ebnf/ll1.rb index 0265f1e..17ec82d 100644 --- a/lib/ebnf/ll1.rb +++ b/lib/ebnf/ll1.rb @@ -214,8 +214,9 @@ def first_follow(*starts) firsts, follows = 0, 0 # add Fi(wi) to Fi(Ai) for every rule Ai → wi # - # For sequences, this is the first rule in the sequence. - # For alts, this is every rule in the sequence + # * For sequences, this is the first rule in the sequence. + # * For alts, this is every rule in the sequence + # * Other rules don't matter, as they don't appear in strict BNF each(:rule) do |ai| # Fi(a w' ) = { a } for every terminal a ai.terminals(ast).each do |t| diff --git a/lib/ebnf/parser.rb b/lib/ebnf/parser.rb index f88b94e..df3b805 100644 --- a/lib/ebnf/parser.rb +++ b/lib/ebnf/parser.rb @@ -208,7 +208,7 @@ def diff(s) return [[:diff, e1, e2], s] else error("diff", "Syntax Error") - raise "Syntax Error" + raise SyntaxError, "diff missing second operand" end end end @@ -297,10 +297,6 @@ def terminal(s) s.match(/([\w\.]+)(.*)$/) l, s = $1, $2 [l.to_sym, s] - when '@' # @pass or @terminals - s.match(/@(#\w+)(.*)$/) - l, s = $1, $2 - [[:"@", l], s] when '-' [[:diff], s[1..-1]] when '?' @@ -315,7 +311,7 @@ def terminal(s) [[m.to_sym], s[1..-1]] else error("terminal", "unrecognized terminal: #{s.inspect}") - raise "Syntax Error, unrecognized terminal: #{s.inspect}" + raise SyntaxError, "unrecognized terminal: #{s.inspect}" end end end diff --git a/lib/ebnf/peg.rb b/lib/ebnf/peg.rb index 6422f78..b94d71c 100644 --- a/lib/ebnf/peg.rb +++ b/lib/ebnf/peg.rb @@ -31,7 +31,7 @@ def make_peg def to_ruby_peg(output, **options) output.puts " RULES = [" ast.each do |rule| - output.puts " " + rule.to_ruby + '.extend(EBNF::PEG::Rule),' + output.puts " " + rule.to_ruby + (rule.is_a?(EBNF::PEG::Rule) ? '.extend(EBNF::PEG::Rule)' : '') + ',' end output.puts " ]" end diff --git a/lib/ebnf/peg/rule.rb b/lib/ebnf/peg/rule.rb index d6681a4..f0a2045 100644 --- a/lib/ebnf/peg/rule.rb +++ b/lib/ebnf/peg/rule.rb @@ -18,8 +18,8 @@ module Rule # # If matched, the input position is updated and the results returned in a Hash. # - # * `alt`: returns the value of the matched production or `:unmatched` - # * `diff`: returns the string value matched, or `:unmatched` + # * `alt`: returns the value of the matched production or `:unmatched`. + # * `diff`: returns the string value matched, or `:unmatched`. # * `hex`: returns a string composed of the matched hex character, or `:unmatched`. # * `opt`: returns the matched production, or `nil` if unmatched. # * `plus`: returns an array of the matches for the specified production, or `:unmatched`, if none are matched. For Terminals, these are concatenated into a single string. @@ -84,7 +84,8 @@ def parse(input) alt when :diff # matches any string that matches A but does not match B. - # XXX: Should this work for arbitrary rules? + # (Note, this is only used for Terminal rules, non-terminals will use :not) + raise "Diff used on non-terminal #{prod}" unless terminal? re1, re2 = Regexp.new(translate_codepoints(expr[1])), Regexp.new(translate_codepoints(expr[2])) matched = input.scan(re1) if !matched || re2.match?(matched) @@ -101,9 +102,9 @@ def parse(input) parser.update_furthest_failure(input.pos, input.lineno, expr.last) :unmatched end - when :opt - # Always matches - opt = case prod = expr[1] + when :not + # matches any string that does not match B. + res = case prod = expr[1] when Symbol rule = parser.find_rule(prod) raise "No rule found for #{prod}" unless rule @@ -111,34 +112,28 @@ def parse(input) when String input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched end - if opt == :unmatched + if res != :unmatched # Update furthest failure for terminals - parser.update_furthest_failure(input.pos, input.lineno, prod) if terminal? - nil + parser.update_furthest_failure(input.pos, input.lineno, sym) if terminal? + :unmatched else - opt + nil end + when :opt + # Result is the matched value or nil + opt = rept(input, 0, 1, expr[1]) + + # Update furthest failure for strings and terminals + parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal? + opt.first when :plus # Result is an array of all expressions while they match, # at least one must match - prod, plus = expr[1], [] - case prod - when Symbol - rule = parser.find_rule(prod) - raise "No rule found for #{prod}" unless rule - while (res = rule.parse(input)) != :unmatched - eat_whitespace(input) - plus << res - end - when String - while res = input.scan(Regexp.new(Regexp.quote(prod))) - eat_whitespace(input) - plus << res - end - end + plus = rept(input, 1, '*', expr[1]) + # Update furthest failure for strings and terminals - parser.update_furthest_failure(input.pos, input.lineno, prod) - plus.empty? ? :unmatched : (terminal? ? plus.compact.join("") : plus.compact) + parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal? + plus.is_a?(Array) && terminal? ? plus.join("") : plus when :range # Matches the specified character range input.scan(to_regexp) || begin @@ -173,24 +168,11 @@ def parse(input) when :star # Result is an array of all expressions while they match, # an empty array of none match - prod, star = expr[1], [] - case prod - when Symbol - rule = parser.find_rule(prod) - raise "No rule found for #{prod}" unless rule - while (res = rule.parse(input)) != :unmatched - eat_whitespace(input) - star << res - end - when String - while res = input.scan(Regexp.new(Regexp.quote(prod))) - eat_whitespace(input) - star << res - end - end + star = rept(input, 0, '*', expr[1]) + # Update furthest failure for strings and terminals - parser.update_furthest_failure(input.pos, input.lineno, prod) - star.compact + parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal? + star.is_a?(Array) && terminal? ? star.join("") : star else raise "attempt to parse unknown rule type: #{expr.first}" end @@ -208,6 +190,38 @@ def parse(input) return parser.packrat[sym][pos][:result] end + ## + # Repitition, 0-1, 0-n, 1-n, ... + # + # Note, nil results are removed from the result, but count towards min/max calculations + # + # @param [Scanner] input + # @param [Integer] min + # @param [Integer] max + # If it is an integer, it stops matching after max entries. + # @param [Symbol, String] prod + # @return [:unmatched, Array] + def rept(input, min, max, prod) + result = [] + + case prod + when Symbol + rule = parser.find_rule(prod) + raise "No rule found for #{prod}" unless rule + while (res = rule.parse(input)) != :unmatched && (max == '*' || result.length < max) + eat_whitespace(input) + result << res + end + when String + while (res = input.scan(Regexp.new(Regexp.quote(prod)))) && (max == '*' || result.length < max) + eat_whitespace(input) + result << res + end + end + + result.length < min ? :unmatched : result.compact + end + ## # Eat whitespace between non-terminal rules def eat_whitespace(input) diff --git a/lib/ebnf/rule.rb b/lib/ebnf/rule.rb index ef53c0c..c37f115 100644 --- a/lib/ebnf/rule.rb +++ b/lib/ebnf/rule.rb @@ -5,11 +5,11 @@ module EBNF class Rule # Operations which are flattened to seprate rules in to_bnf. BNF_OPS = %w{ - alt opt plus seq star + alt diff not opt plus rept seq star }.map(&:to_sym).freeze TERM_OPS = %w{ - diff hex range + hex range }.map(&:to_sym).freeze # Symbol of rule @@ -59,16 +59,23 @@ class Rule # Determines preparation and cleanup rules for reconstituting EBNF ? * + from BNF attr_accessor :cleanup - # @param [Symbol] sym - # @param [Integer] id + # @param [Symbol, nil] sym + # `nil` is allowed only for @pass + # @param [Integer, nil] id # @param [Array] expr - # @param [Symbol] kind (nil) + # @param [:rule, :terminal, :pass, ] kind (nil) # @param [String] ebnf (nil) + # When parsing, records the EBNF string used to create the rule. # @param [Array] first (nil) + # Recorded set of terminals that can proceed this rule (LL(1)) # @param [Array] follow (nil) + # Recorded set of terminals that can follow this rule (LL(1)) # @param [Boolean] start (nil) + # Is this the starting rule for the grammar? # @param [Rule] top_rule (nil) + # The top-most rule. All expressed rules are top-rules, derived rules have the original rule as their top-rule. # @param [Boolean] cleanup (nil) + # Records information useful for cleaning up converted :plus, and :star expansions (LL(1)). def initialize(sym, id, expr, kind: nil, ebnf: nil, first: nil, follow: nil, start: nil, top_rule: nil, cleanup: nil) @sym, @id = sym, id @expr = expr.is_a?(Array) ? expr : [:seq, expr] @@ -79,21 +86,52 @@ def initialize(sym, id, expr, kind: nil, ebnf: nil, first: nil, follow: nil, sta when !BNF_OPS.include?(@expr.first) then :terminal else :rule end + + # Allow @pass to not be named + @sym ||= :_pass if @kind == :pass + + raise ArgumentError, "Rule sym must be a symbol, was #{@sym.inspect}" unless @sym.is_a?(Symbol) + raise ArgumentError, "Rule id must be a string or nil, was #{@id.inspect}" unless (@id || "").is_a?(String) + raise ArgumentError, "Rule kind must be one of :rule, :terminal, or :pass, was #{@kind.inspect}" unless + @kind.is_a?(Symbol) && %w(rule terminal pass).map(&:to_sym).include?(@kind) + + case @expr.first + when :alt + raise ArgumentError, "#{@expr.first} operation must have at least one operand, had #{@expr.length - 1}" unless @expr.length > 1 + when :diff + raise ArgumentError, "#{@expr.first} operation must have exactly two operands, had #{@expr.length - 1}" unless @expr.length == 3 + when :hex, :not, :opt, :plus, :range, :star + raise ArgumentError, "#{@expr.first} operation must have exactly one operand, had #{@expr.length - 1}" unless @expr.length == 2 + when :rept + raise ArgumentError, "#{@expr.first} operation must have exactly three, had #{@expr.length - 1}" unless @expr.length == 4 + raise ArgumentError, "#{@expr.first} operation must an non-negative integer minimum, was #{@expr[1]}" unless + @expr[1].is_a?(Integer) && @expr[1] >= 0 + raise ArgumentError, "#{@expr.first} operation must an non-negative integer maximum or '*', was #{@expr[2]}" unless + @expr[2] == '*' || @expr[2].is_a?(Integer) && @expr[2] >= 0 + when :seq + # It's legal to have a zero-lenght sequence + else + raise ArgumentError, "Rule expression must be an array using a known operator, was #{@expr.first}" + end end ## # Return a rule from its SXP representation: # # @example inputs - # (pass (plus (range "#x20\\t\\r\\n"))) + # (pass _pass (plus (range "#x20\\t\\r\\n"))) # (rule ebnf "1" (star (alt declaration rule))) # (terminal O_ENUM "17" (seq "[^" (plus CHAR) "]")) # # Also may have `(first ...)`, `(follow ...)`, or `(start #t)`. # - # @param [Array] sxp + # @param [String, Array] sxp # @return [Rule] def self.from_sxp(sxp) + if sxp.is_a?(String) + require 'sxp' unless defined?(SXP) + sxp = SXP.parse(sxp) + end expr = sxp.detect {|e| e.is_a?(Array) && ![:first, :follow, :start].include?(e.first.to_sym)} first = sxp.detect {|e| e.is_a?(Array) && e.first.to_sym == :first} first = first[1..-1] if first @@ -152,15 +190,16 @@ def to_sxp # @return [String] def to_ttl @ebnf.debug("to_ttl") {inspect} if @ebnf - comment = orig.to_s.strip. - gsub(/"""/, '\"\"\"'). - gsub("\\", "\\\\"). - sub(/^\"/, '\"'). - sub(/\"$/m, '\"') - statements = [ - %{:#{id} rdfs:label "#{id}"; rdf:value "#{sym}";}, - %{ rdfs:comment #{comment.inspect};}, - ] + statements = [%{:#{sym} rdfs:label "#{sym}";}] + if orig + comment = orig.to_s.strip. + gsub(/"""/, '\"\"\"'). + gsub("\\", "\\\\"). + sub(/^\"/, '\"'). + sub(/\"$/m, '\"') + statements << %{ rdfs:comment #{comment.inspect};} + end + statements << %{ dc:identifier "#{id}";} if id statements += ttl_expr(expr, terminal? ? "re" : "g", 1, false) "\n" + statements.join("\n") @@ -175,12 +214,13 @@ def to_ruby ## # Transform EBNF rule to BNF rules: # - # * Transform (rule a "n" (op1 (op2))) into two rules: - # (rule a "n" (op1 _a_1)) - # (rule _a_1 "n.1" (op2)) - # * Transform (rule a (opt b)) into (rule a (alt _empty b)) - # * Transform (rule a (star b)) into (rule a (alt _empty (seq b a))) - # * Transform (rule a (plus b)) into (rule a (seq b (star b) + # * Transform `(rule a "n" (op1 (op2)))` into two rules: + # + # (rule a "n" (op1 _a_1)) + # (rule _a_1 "n.1" (op2)) + # * Transform `(rule a (opt b))` into `(rule a (alt _empty b))` + # * Transform `(rule a (star b))` into `(rule a (alt _empty (seq b a)))` + # * Transform `(rule a (plus b))` into `(rule a (seq b (star b)` # # Transformation includes information used to re-construct non-transformed. # @@ -231,7 +271,7 @@ def to_bnf # Otherwise, no further transformation necessary new_rules << self elsif [:diff, :hex, :range].include?(expr.first) - # This rules are fine, the just need to be terminals + # This rules are fine, they just need to be terminals raise "Encountered #{expr.first.inspect}, which is a #{self.kind}, not :terminal" unless self.terminal? new_rules << self else @@ -245,9 +285,14 @@ def to_bnf ## # Transform EBNF rule for PEG: # - # * Transform (rule a "n" (op1 ... (op2 y) ...z)) into two rules: - # (rule a "n" (op1 ... _a_1 ... z)) - # (rule _a_1 "n.1" (op2 y)) + # * Transform `(rule a "n" (op1 ... (op2 y) ...z))` into two rules: + # + # (rule a "n" (op1 ... _a_1 ... z)) + # (rule _a_1 "n.1" (op2 y)) + # * Transform `(rule a "n" (diff op1 op2))` into two rules: + # + # (rule a "n" (seq _a_1 op1)) + # (rule _a_1 "n.1" (not op1)) # # @return [Array] def to_peg @@ -268,8 +313,14 @@ def to_peg # Return new rules after recursively applying #to_bnf new_rules = new_rules.map {|r| r.to_peg}.flatten - elsif [:diff, :hex, :range].include?(expr.first) - # This rules are fine, the just need to be terminals + elsif expr.first == :diff && !terminal? + this = dup + new_rule = build([:not, expr[2]]) + this.expr = [:seq, new_rule.sym, expr[1]] + new_rules << this + new_rules << new_rule + elsif [:hex, :range].include?(expr.first) + # This rules are fine, they just need to be terminals raise "Encountered #{expr.first.inspect}, which is a #{self.kind}, not :terminal" unless self.terminal? new_rules << self else @@ -294,45 +345,145 @@ def to_regexp end end - # Return the non-terminals for this rule. For seq, this is the first - # non-terminal in the sequence. For alt, this is every non-terminal in the alt. + # Is this a terminal? + # + # @return [Boolean] + def terminal? + kind == :terminal + end + + # Is this a pass? + # @return [Boolean] + def pass? + kind == :pass + end + + # Is this a rule? + # @return [Boolean] + def rule? + kind == :rule + end + + # Is this rule of the form (alt ...)? + def alt? + expr.is_a?(Array) && expr.first == :alt + end + + # Is this rule of the form (seq ...)? + def seq? + expr.is_a?(Array) && expr.first == :seq + end + + def inspect + "#" + end + + # Two rules are equal if they have the same {#sym}, {#kind} and {#expr}. + # + # @param [Rule] other + # @return [Boolean] + def ==(other) + sym == other.sym && + kind == other.kind && + expr == other.expr + end + + # Two rules are equivalent if they have the same {#expr}. + # + # @param [Rule] other + # @return [Boolean] + def eql?(other) + expr == other.expr + end + + # Rules compare using their ids + def <=>(other) + if id.to_i == other.id.to_i + id.to_s <=> other.id.to_s + else + id.to_i <=> other.id.to_i + end + end + + ## + # Utility function to translate code points of the form '#xN' into ruby unicode characters + def translate_codepoints(str) + str.gsub(/#x\h+/) {|c| c[2..-1].scanf("%x").first.chr(Encoding::UTF_8)} + end + + # Return the non-terminals for this rule. + # + # * `alt` => this is every non-terminal. + # * `diff` => this is every non-terminal. + # * `hex` => nil + # * `not` => this is the last expression, if any. + # * `plus` => this is the last expression, if any. + # * `range` => nil + # * `rept` => this is the last expression, if any. + # * `seq` => this is the first expression in the sequence, if any. + # * `star` => this is the last expression, if any. # # @param [Array] ast # The set of rules, used to turn symbols into rules + # @param [Array] expr (@expr) + # The expression to check, defaults to the rule expression. + # Typically, if the expression is recursive, the embedded expression is called recursively. # @return [Array] - def non_terminals(ast) - @non_terms ||= (alt? ? expr[1..-1] : expr[1,1]).map do |sym| + # @note this is used for LL(1) tansformation, so rule types are limited + def non_terminals(ast, expr = @expr) + ([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).map do |sym| case sym when Symbol r = ast.detect {|r| r.sym == sym} r if r && r.rule? + when Array + non_terminals(ast, sym) else nil end - end.compact + end.flatten.compact.uniq end - # Return the terminals for this rule. For seq, this is the first - # terminals or strings in the seq. For alt, this is every non-terminal ni the alt. + # Return the terminals for this rule. + # + # * `alt` => this is every terminal. + # * `diff` => this is every terminal. + # * `hex` => nil + # * `not` => this is the last expression, if any. + # * `plus` => this is the last expression, if any. + # * `range` => nil + # * `rept` => this is the last expression, if any. + # * `seq` => this is the first expression in the sequence, if any. + # * `star` => this is the last expression, if any. # # @param [Array] ast # The set of rules, used to turn symbols into rules + # @param [Array] expr (@expr) + # The expression to check, defaults to the rule expression. + # Typically, if the expression is recursive, the embedded expression is called recursively. # @return [Array] - def terminals(ast) - @terms ||= (alt? ? expr[1..-1] : expr[1,1]).map do |sym| + # @note this is used for LL(1) tansformation, so rule types are limited + def terminals(ast, expr = @expr) + ([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).map do |sym| case sym when Symbol r = ast.detect {|r| r.sym == sym} r if r && r.terminal? when String sym - else - nil + when Array + terminals(ast, sym) end - end.compact + end.flatten.compact.uniq end - # Does this rule start with a sym? It does if expr is that sym, + ## + # The following are used for LL(1) transformation. + ## + + # Does this rule start with `sym`? It does if expr is that sym, # expr starts with alt and contains that sym, # or expr starts with seq and the next element is that sym. # @@ -349,6 +500,44 @@ def starts_with?(sym) end end + ## + # Validate the rule, with respect to an AST. + # + # @param [Array] ast + # The set of rules, used to turn symbols into rules + # @param [Array] expr (@expr) + # The expression to check, defaults to the rule expression. + # Typically, if the expression is recursive, the embedded expression is called recursively. + # @raise [RangeError] + def validate!(ast, expr = @expr) + ([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).each do |sym| + case sym + when Symbol + r = ast.detect {|r| r.sym == sym} + raise SyntaxError, "No rule found for #{sym}" unless r + when Array + validate!(ast, sym) + else + nil + end + end.compact + end + + ## + # Validate the rule, with respect to an AST. + # + # Uses `#validate!` and catches `RangeError` + # + # @param [Array] ast + # The set of rules, used to turn symbols into rules + # @return [Boolean] + def valid?(ast) + validate!(ast) + true + rescue SyntaxError + false + end + # Do the firsts of this rule include the empty string? # # @return [Boolean] @@ -381,79 +570,6 @@ def add_follow(terminals) terminals.length end - # Is this a terminal? - # - # @return [Boolean] - def terminal? - kind == :terminal - end - - # Is this a pass? - # @return [Boolean] - def pass? - kind == :pass - end - - # Is this a rule? - # @return [Boolean] - def rule? - kind == :rule - end - - # Is this rule of the form (alt ...)? - def alt? - expr.is_a?(Array) && expr.first == :alt - end - - # Is this rule of the form (seq ...)? - def seq? - expr.is_a?(Array) && expr.first == :seq - end - - # Is this rule of the form (alt ...)? - def alt? - expr.is_a?(Array) && expr.first == :alt - end - - def inspect - "#" - end - - # Two rules are equal if they have the same {#sym}, {#kind} and {#expr}. - # - # @param [Rule] other - # @return [Boolean] - def ==(other) - sym == other.sym && - kind == other.kind && - expr == other.expr - end - - # Two rules are equivalent if they have the same {#expr}. - # - # @param [Rule] other - # @return [Boolean] - def equivalent?(other) - expr == other.expr - end - - # Rules compare using their ids - def <=>(other) - if id.to_i == other.id.to_i - id.to_s <=> other.id.to_s - else - id.to_i <=> other.id.to_i - end - end - - ## - # Utility function to translate code points of the form '#xN' into ruby unicode characters - def translate_codepoints(str) - str.gsub(/#x\h+/) {|c| c[2..-1].scanf("%x").first.chr(Encoding::UTF_8)} - end - private def ttl_expr(expr, pfx, depth, is_obj = true) indent = ' ' * depth @@ -469,13 +585,22 @@ def ttl_expr(expr, pfx, depth, is_obj = true) case op when :seq, :alt, :diff + # Multiple operands statements << %{#{indent}#{bra}#{pfx}:#{op} (} expr.each {|a| statements += ttl_expr(a, pfx, depth + 1)} statements << %{#{indent} )#{ket}} - when :opt, :plus, :star + when :opt, :plus, :star, :not + # Single operand statements << %{#{indent}#{bra}#{pfx}:#{op} } statements += ttl_expr(expr.first, pfx, depth + 1) statements << %{#{indent} #{ket}} unless ket.empty? + when :rept + # Three operands (min, max and expr) + statements << %{ #{indent}#{pfx}:min #{expr[0].inspect};} + statements << %{ #{indent}#{pfx}:max #{expr[1].inspect};} + statements << %{#{indent}#{bra}#{pfx}:#{op} } + statements += ttl_expr(expr.last, pfx, depth + 1) + statements << %{#{indent} #{ket}} unless ket.empty? when :_empty, :_eps statements << %{#{indent}"g:#{op.to_s[1..-1]}"} when :"'" diff --git a/spec/base_spec.rb b/spec/base_spec.rb index 50d14d9..22d9175 100644 --- a/spec/base_spec.rb +++ b/spec/base_spec.rb @@ -3,9 +3,12 @@ require 'spec_helper' require 'ebnf' require 'sxp' +require 'rdf/turtle' describe EBNF::Base do - describe ".new" do + subject {PARSED_EBNF_GRAMMAR.dup} + + describe "#initialize" do { %{[2] Prolog ::= BaseDecl? PrefixDecl*} => %{((rule Prolog "2" (seq (opt BaseDecl) (star PrefixDecl))))}, @@ -60,12 +63,80 @@ expect(ast.to_sxp).to produce(expected, @debug) end end + + it "rejects unknown format" do + expect {parse("foo", format: :unknown)}.to raise_error "unknown input format :unknown" + end + end + + describe "#validate!" do + let(:simple) {EBNF.parse("a ::= b")} + it "notes invalid grammar" do + expect do + expect {simple.validate!}.to raise_error SyntaxError, "In rule a: No rule found for b" + end.to write(:something).to(:error) + end + + it "validates EBNF" do + expect {subject.validate!}.not_to raise_error + end + end + + describe "#valid?" do + let(:simple) {EBNF.parse("a ::= b")} + it "notes invalid grammar" do + expect do + expect(simple.valid?).to be_falsey + end.to write(:something).to(:error) + end + + it "validates EBNF" do + expect(subject).to be_valid + end + end + + describe "#each" do + it "yields each rule" do + rules = subject.ast.select {|r| r.rule?} + expect {|b| subject.each(:rule, &b)}.to yield_control.exactly(rules.length).times + end + it "yields each terminal" do + terminals = subject.ast.select {|r| r.terminal?} + expect {|b| subject.each(:terminal, &b)}.to yield_control.exactly(terminals.length).times + end + end + + describe "#to_sxp" do + specify {expect(subject.to_sxp).to include("(rule ebnf")} + end + + describe "#to_s" do + specify {expect(subject.to_s).to include("[1] ebnf")} + end + + describe "#to_html" do + specify {expect(subject.to_s).to include("[1] ebnf")} + end + + describe "#to_ruby" do + specify {expect {subject.to_ruby}.to write(:something).to(:output)} + end + + describe "#to_ttl" do + let(:reader) {RDF::Turtle::Reader.new(subject.to_ttl, base_uri: 'http://example.org/')} + specify {expect(reader).to be_valid} end describe "#dup" do specify {expect(parse(%{[2] Prolog ::= BaseDecl? PrefixDecl*}).dup).to be_a(EBNF::Base)} end + describe "#find_rule" do + it "finds ebnf" do + expect(subject.find_rule(:ebnf).sym).to eql :ebnf + end + end + def parse(value, **options) @debug = [] options = {debug: @debug}.merge(options) diff --git a/spec/ebnf_spec.rb b/spec/ebnf_spec.rb index 4fdfd77..c12cf75 100644 --- a/spec/ebnf_spec.rb +++ b/spec/ebnf_spec.rb @@ -59,7 +59,7 @@ end context "README" do - let(:ebnf) {EBNF.parse(File.open(File.expand_path("../../etc/ebnf.ebnf", __FILE__)))} + let(:ebnf) {PARSED_EBNF_GRAMMAR.dup} subject {ebnf} it "creates ast" do @@ -82,18 +82,24 @@ expect(subject.to_s).not_to be_empty end - context "BNF" do + context "LL1" do before {subject.make_bnf} - context "LL1" do - before do - subject.first_follow(:ebnf) - subject.build_tables - end + before do + subject.first_follow(:ebnf) + subject.build_tables + end + + it "#to_ruby" do + expect {subject.to_ruby}.to write(:something).to(:output) + end + end + + context "PEG" do + before {subject.make_peg} - it "#to_ruby" do - expect {subject.to_ruby}.to write(:something).to(:output) - end + it "#to_ruby" do + expect {subject.to_ruby}.to write(:something).to(:output) end end end diff --git a/spec/ll1/data/parser.rb b/spec/ll1/data/parser.rb index 6317492..86f55d2 100644 --- a/spec/ll1/data/parser.rb +++ b/spec/ll1/data/parser.rb @@ -61,6 +61,11 @@ class EBNFParser input[:terminal] = token.value end + production(:ebnf) do |input, current, callback| + # Cause method_missing to invoke something in our context + to_sxp + end + production(:declaration) do |input, current, callback| # current contains a declaration. # Invoke callback @@ -145,6 +150,8 @@ class EBNFParser end production(:_diff_1) do |input, current, callback| + # Gratuitous call to exercise method + add_prod_data(:_diff_1, "foo") input[:diff] ||= [:diff] # Add optimized value of `postfix`, if any @@ -152,6 +159,8 @@ class EBNFParser end production(:postfix) do |input, current, callback| + # Gratuitous call to exercise method + add_prod_datum(:postfix, "foo") # Push result onto input stack, as the `diff` production can have some number of `postfix` values that are applied recursively input[:postfix] = case current[:postfix] when "*" then [:star, current[:primary]] @@ -162,6 +171,8 @@ class EBNFParser end production(:primary) do |input, current, callback| + # Gratuitous call to exercise method + add_prod_datum(:primary, ["foo"]) input[:primary] = if current[:expression] v = current[:expression][1..-1] v = v.first if v.length == 1 diff --git a/spec/ll1/lexer_spec.rb b/spec/ll1/lexer_spec.rb index f0aa643..76354f9 100644 --- a/spec/ll1/lexer_spec.rb +++ b/spec/ll1/lexer_spec.rb @@ -196,46 +196,6 @@ end end - describe "#lineno" do - it "for white space" do - inputs = { - "" => 1, - "\n" => 2, - "\n\n" => 3, - "\r\n" => 2, - } - inputs.each do |input, lineno| - lexer = tokenize(input) - lexer.to_a # consumes the input - expect(lexer.lineno).to eq lineno - end - end - - context "STRING_LITERAL_LONG_QUOTE" do - it "tracks line numbers" do - input = %( - :Test a rdfs:Class ; - rdfs:subClassOf mf:ManifestEntry; - rdfs:label "Superclass of all CSVW tests" ; - rdfs:comment """ - All CSVW tests have an input file referenced using `mf:action`. Positive - and Negative Evaluation Tests also have a result file referenced using - `mf:result` . Other tests may take different inputs and options as defined - for each test class. - """ ; - :b :c . - ) - expect(tokenize(input).to_a.map(&:lineno)).to include( - 2, 2, 2, 2, - 3, 3, 3, - 4, 4, 4, - 5, 5, 10, - 11, 11, 11 - ) - end - end - end - it "matches input longer than low water mark when buffer is low" do input = StringIO.new %("""123456789 123456789 """ """123456789 123456789 """) lexer = EBNF::LL1::Lexer.new(input, terminals, @@ -246,68 +206,159 @@ expect(lexer.shift.type).to eq :STRING_LITERAL_LONG_QUOTE expect(lexer.shift.type).to eq :STRING_LITERAL_LONG_QUOTE end + end - context "yielding tokens" do - it "annotates tokens with the current line number" do - results = %w(1 2 3 4) - tokenize("1\n2\n3\n4").each_token do |token| - expect(token.type).to eq :INTEGER - expect(token.value).to eq results.shift - end + describe "#valid?" do + it "validates legal input" do + expect(tokenize(%q(:a "b" ))).to be_valid + end + + it "invalidates illegal input" do + expect(tokenize(%q(:a 'open))).not_to be_valid + end + end + + describe "#lineno" do + it "for white space" do + inputs = { + "" => 1, + "\n" => 2, + "\n\n" => 3, + "\r\n" => 2, + } + inputs.each do |input, lineno| + lexer = tokenize(input) + lexer.to_a # consumes the input + expect(lexer.lineno).to eq lineno end end - describe "#first/#shift/#recover" do - subject {tokenize("1\n2\n3\n4")} - it "returns tokens in first/shift sequence" do - %w{1 2 3 4}.each do |v| - expect(subject.first.value).to eq v - subject.shift - end - expect(subject.first).to be_nil + context "STRING_LITERAL_LONG_QUOTE" do + it "tracks line numbers" do + input = %( + :Test a rdfs:Class ; + rdfs:subClassOf mf:ManifestEntry; + rdfs:label "Superclass of all CSVW tests" ; + rdfs:comment """ + All CSVW tests have an input file referenced using `mf:action`. Positive + and Negative Evaluation Tests also have a result file referenced using + `mf:result` . Other tests may take different inputs and options as defined + for each test class. + """ ; + :b :c . + ) + expect(tokenize(input).to_a.map(&:lineno)).to include( + 2, 2, 2, 2, + 3, 3, 3, + 4, 4, 4, + 5, 5, 10, + 11, 11, 11 + ) end + end + end - context "with unrecognized token" do - subject {tokenize("< space > 'foo' 1")} + describe "#first/#shift/#recover" do + subject {tokenize("1\n2\n3\n4")} + it "returns tokens in first/shift sequence" do + %w{1 2 3 4}.each do |v| + expect(subject.first.value).to eq v + subject.shift + end + expect(subject.first).to be_nil + end - it "raises error with #first" do - expect {subject.first}.to raise_error(EBNF::LL1::Lexer::Error, /Invalid token/) - end - - it "recovers to next token" do - subject.recover - expect(subject.first.value).to eq "'foo'" - end + context "with unrecognized token" do + subject {tokenize("< space > 'foo' 1")} + + it "raises error with #first" do + expect {subject.first}.to raise_error(EBNF::LL1::Lexer::Error, /Invalid token/) end + + it "recovers to next token" do + subject.recover + expect(subject.first.value).to eq "'foo'" + end + end - describe "#first" do - it "returns a token when passed as an argument" do - expect(subject.first(:INTEGER)).to be_a(EBNF::LL1::Lexer::Token) - end + describe "#first" do + it "returns a token when passed as an argument" do + expect(subject.first(:INTEGER)).to be_a(EBNF::LL1::Lexer::Token) + end - it "does not return a token unless passed as an argument" do - expect {subject.first(:Double)}.to raise_error(EBNF::LL1::Lexer::Error, 'Invalid token "1"') - end + it "does not return a token unless passed as an argument" do + expect {subject.first(:Double)}.to raise_error(EBNF::LL1::Lexer::Error, 'Invalid token "1"') end end + end - describe EBNF::LL1::Lexer::Terminal do - { - "returns itself with no map entry": { - input: "FOO", - map: {}, - expect: "FOO" - }, - "returns map value if specified": { - input: "FOO", - map: {"foo" => 'bar'}, - expect: "bar" - }, - }.each do |name, params| - it name do - term = described_class.new(:nil, params[:regexp], map: params[:map]) - expect(term.canonicalize(params[:input])).to eq params[:expect] - end + describe EBNF::LL1::Lexer::Token do + subject {described_class.new(:type, 'value', lineno: 1)} + + describe "#type" do + its(:type) {is_expected.to eq :type} + end + + describe "#value" do + its(:value) {is_expected.to eq 'value'} + end + + describe "#lineno" do + its(:lineno) {is_expected.to eq 1} + end + + describe "#[]" do + it "returns type at 0 index" do + expect(subject[0]).to eq :type + end + + it "returns value at 1 index" do + expect(subject[1]).to eq 'value' + end + + it "returns nil for other indexes" do + expect(subject[2]).to be_nil + end + end + + describe "#===" do + specify {expect(subject).to be === :type} + specify {expect(subject).to be === 'value'} + end + + describe "#to_hash" do + specify {expect(subject.to_hash).to eql({type: :type, value: 'value'})} + end + + describe "#to_s" do + specify {expect(subject.to_s).to eq ":type"} + end + + describe "#representation" do + specify {expect(subject.representation).to eq :type} + end + + describe "#to_a" do + specify {expect(subject.to_a).to eq [:type, 'value']} + end + end + + describe EBNF::LL1::Lexer::Terminal do + { + "returns itself with no map entry": { + input: "FOO", + map: {}, + expect: "FOO" + }, + "returns map value if specified": { + input: "FOO", + map: {"foo" => 'bar'}, + expect: "bar" + }, + }.each do |name, params| + it name do + term = described_class.new(:nil, params[:regexp], map: params[:map]) + expect(term.canonicalize(params[:input])).to eq params[:expect] end end end diff --git a/spec/ll1/parser_spec.rb b/spec/ll1/parser_spec.rb index b1cc445..3d85c0b 100644 --- a/spec/ll1/parser_spec.rb +++ b/spec/ll1/parser_spec.rb @@ -58,6 +58,12 @@ class LL1ParserTest subject.parse("foo", nil, branch: {a: {b: ["c"]}}) }.to raise_error(EBNF::LL1::Parser::Error, "Starting production not defined") end + + it "raises error on inalid input" do + expect { + subject.parse("bar", :foo, branch: {foo: {bar: ["baz"]}}) + }.to raise_error(EBNF::LL1::Parser::Error, /Invalid token "bar"/) + end end require_relative "data/parser" diff --git a/spec/ll1_spec.rb b/spec/ll1_spec.rb index 35356bd..94c979c 100644 --- a/spec/ll1_spec.rb +++ b/spec/ll1_spec.rb @@ -372,6 +372,11 @@ expect(false).to produce(true, @debug) }.to raise_error("Table creation failed with errors") expect(ebnf.errors.to_s).to match(expected) + + sio = StringIO.new + ebnf.to_ruby(sio) + sio.rewind + expect(sio.read).to match(/Note, grammar has errors/) end end end diff --git a/spec/parser_spec.rb b/spec/parser_spec.rb index 1da0f06..0edd68b 100644 --- a/spec/parser_spec.rb +++ b/spec/parser_spec.rb @@ -67,7 +67,20 @@ end end end - + + describe "errors" do + { + %(a - '') => /diff missing second operand/, + %(%foo%) => /unrecognized terminal/, + }.each do |input, expected| + it "given #{input.inspect} raises #{expected}" do + expect do + expect {ebnf(:expression, input)}.to raise_error(SyntaxError, expected) + end.to write(:something).to(:error) + end + end + end + def ebnf(method, value, **options) @debug = [] options = {debug: @debug}.merge(options) diff --git a/spec/peg/data/parser.rb b/spec/peg/data/parser.rb index de7cb26..ac91ec0 100644 --- a/spec/peg/data/parser.rb +++ b/spec/peg/data/parser.rb @@ -52,6 +52,11 @@ class EBNFPegParser terminal(:POSTFIX, POSTFIX) + production(:ebnf) do |input| + # Cause method_missing to invoke something in our context + to_sxp + end + production(:declaration, clear_packrat: true) do |value, data, callback| # current contains a declaration. # Invoke callback diff --git a/spec/peg/rule_spec.rb b/spec/peg/rule_spec.rb index 40e8aed..db0dab3 100644 --- a/spec/peg/rule_spec.rb +++ b/spec/peg/rule_spec.rb @@ -29,6 +29,16 @@ input: "C", expect: :unmatched }, + "(not A) with 'A'" => { + rule: [:not, "A"], + input: "A", + expect: :unmatched + }, + "(not A) with 'B'" => { + rule: [:not, "A"], + input: "B", + expect: nil + }, "(opt A) with 'A'" => { rule: [:opt, "A"], input: "A", diff --git a/spec/rule_spec.rb b/spec/rule_spec.rb index 00c630f..d6c7115 100644 --- a/spec/rule_spec.rb +++ b/spec/rule_spec.rb @@ -7,69 +7,309 @@ describe EBNF::Rule do let(:debug) {[]} let(:ebnf) {EBNF.parse("", debug: debug)} - subject {EBNF::Rule.new("rule", "0", [], ebnf: ebnf)} + subject {EBNF::Rule.new(:rule, "0", [:seq, :foo], ebnf: ebnf)} - describe "#ttl_expr" do + describe ".from_sxp" do + context "accepts valid variations" do + { + "ebnf[1]": [ + %{(rule ebnf "1" (star (alt declaration rule)))}, + EBNF::Rule.new(:ebnf, "1", [:star, [:alt, :declaration, :rule]]) + ], + "ebnf[1] parsed": [ + [:rule, :ebnf, "1", [:star, [:alt, :declaration, :rule]]], + EBNF::Rule.new(:ebnf, "1", [:star, [:alt, :declaration, :rule]], kind: :rule) + ], + "pass": [ + %{(pass _pass (plus (range "#x20\\\\t\\\\r\\\\n")))}, + EBNF::Rule.new(nil, nil, [:plus, [:range, "#x20\\t\\r\\n"]], kind: :pass) + ], + "terminal": [ + %{(terminal O_ENUM "17" (seq "[^" (plus CHAR) "]"))}, + EBNF::Rule.new(:O_ENUM, "17", [:seq, "[^", [:plus, :CHAR], "]"]) + ], + "alt": [ + %{(rule alt (alt a b c))}, + EBNF::Rule.new(:alt, nil, [:alt, :a, :b, :c], kind: :rule) + ], + "diff": [ + %{(terminal R_CHAR "21" (diff CHAR "]"))}, + EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal) + ], + "not": [ + %{(rule _a_1 "n.1" (not op1))}, + EBNF::Rule.new(:_a_1, "n.1", [:not, :op1], kind: :rule) + ], + "opt": [ + %{(rule _diff_1 "7.1" (opt _diff_2))}, + EBNF::Rule.new(:_diff_1, "7.1", [:opt, :_diff_2], kind: :rule) + ], + "plus": [ + %{(rule seq "6" (plus diff))}, + EBNF::Rule.new(:seq, "6", [:plus, :diff], kind: :rule) + ], + "rept": [ + %{(rule rept "6" (rept 1 "*" diff))}, + EBNF::Rule.new(:rept, "6", [:rept, 1, "*", :diff]) + ], + "rept m.n": [ + %{(rule rept "6" (rept 3 5 diff))}, + EBNF::Rule.new(:rept, "6", [:rept, 3, 5, :diff]) + ], + "seq": [ + %{(rule seq (seq a b c))}, + EBNF::Rule.new(:seq, nil, [:seq, :a, :b, :c], kind: :rule) + ], + "star": [ + %{(rule _alt_1 "5.1" (star _alt_2))}, + EBNF::Rule.new(:_alt_1, "5.1", [:star, :_alt_2], kind: :rule) + ] + }.each do |title, (sxp, expected)| + it title do + res = EBNF::Rule.from_sxp(sxp) + expect(res).to eq expected + end + end + end + + context "rejects invalid variations" do + { + "alt (empty)": %{(rule alt (alt))}, + "diff (empty)": %{(terminal R_CHAR "21" (diff))}, + "diff (one)": %{(terminal R_CHAR "21" (diff CHAR))}, + "diff (three)": %{(terminal R_CHAR "21" (diff CHAR "]" ","))}, + "not (empty)": %{(rule _a_1 "n.1" (not))}, + "not (two)": %{(rule _a_1 "n.1" (not op1 op2))}, + "opt (empty)": %{(rule _diff_1 "7.1" (opt))}, + "plus (empty)": %{(rule seq "6" (plus))}, + "plus (two)": %{(rule seq "6" (plus diff extra))}, + "rept (empty)": %{(rule rept "6" (rept))}, + "rept (one)": %{(rule rept "6" (rept 1))}, + "rept (two)": %{(rule rept "6" (rept 1 "*"))}, + "rept (four)": %{(rule rept "6" (rept 1 "*" diff extra))}, + "rept (float min)": %{(rule rept "6" (rept 1.1 1 diff))}, + "rept (negative min)": %{(rule rept "6" (rept -1 1 diff))}, + "rept (float max)": %{(rule rept "6" (rept 1 1.1 diff))}, + "rept (negative max)": %{(rule rept "6" (rept 1 -1 diff))}, + "star (empty)": %{(rule _alt_1 "5.1" (star))}, + "star (two)": %{(rule _alt_1 "5.1" (star diff extra))}, + "not op": %{(rule _bad nil (_bad))} + }.each do |title, (sxp, expected)| + it title do + expect {EBNF::Rule.from_sxp(sxp)}.to raise_error(ArgumentError) + end + end + end + end + + describe "#to_sxp" do { - "ebnf[1]" => [ - [:star, [:alt, :declaration, :rule]], - %{g:star [ g:alt ( :declaration :rule ) ] .} + "ebnf[1]": [ + EBNF::Rule.new(:ebnf, "1", [:star, [:alt, :declaration, :rule]]), + %{(rule ebnf "1" (star (alt declaration rule)))}, ], - "ebnf[2]" => [ - [:alt, "@terminals", "@pass"], - %{g:alt ( "@terminals" "@pass" ) .} + "pass": [ + EBNF::Rule.new(nil, nil, [:plus, [:range, "#x20\\t\\r\\n"]], kind: :pass), + %{(pass _pass (plus (range "#x20\\\\t\\\\r\\\\n")))}, ], - "ebnf[5]" => [ - :alt, - %{g:seq ( :alt ) .} + "terminal": [ + EBNF::Rule.new(:O_ENUM, "17", [:seq, "[^", [:plus, :CHAR], "]"]), + %{(terminal O_ENUM "17" (seq "[^" (plus CHAR) "]"))}, ], - "ebnf[9]" => [ - [:seq, :primary, [:opt, [:range, "?*+"]]], - %{g:seq ( :primary [ g:opt [ re:matches "[?*+]" ] ] ) .} + "alt": [ + EBNF::Rule.new(:alt, nil, [:alt, :a, :b, :c], kind: :rule), + %{(rule alt (alt a b c))}, ], - "IRIREF" => [ - [:seq, "<", [:star, [:alt, [:range, "^#x00-#x20<>\"{}|^`\\"], :UCHAR]], ">"], - %{g:seq ( "<" [ g:star [ g:alt ( [ re:matches "[^\\\\u0000-\\\\u0020<>\\\"{}|^`\\\\]" ] :UCHAR ) ] ] ">" ) .} + "diff": [ + EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal), + %{(terminal R_CHAR "21" (diff CHAR "]"))}, + ], + "not": [ + EBNF::Rule.new(:_a_1, "n.1", [:not, :op1], kind: :rule), + %{(rule _a_1 "n.1" (not op1))}, + ], + "opt": [ + EBNF::Rule.new(:_diff_1, "7.1", [:opt, :_diff_2], kind: :rule), + %{(rule _diff_1 "7.1" (opt _diff_2))}, + ], + "plus": [ + EBNF::Rule.new(:seq, "6", [:plus, :diff], kind: :rule), + %{(rule seq "6" (plus diff))}, + ], + "rept": [ + EBNF::Rule.new(:rept, "6", [:rept, 1, "*", :diff]), + %{(rule rept "6" (rept 1 "*" diff))}, + ], + "rept m.n": [ + EBNF::Rule.new(:rept, "6", [:rept, 3, 5, :diff]), + %{(rule rept "6" (rept 3 5 diff))}, + ], + "seq": [ + EBNF::Rule.new(:seq, nil, [:seq, :a, :b, :c], kind: :rule), + %{(rule seq (seq a b c))}, + ], + "star": [ + EBNF::Rule.new(:_alt_1, "5.1", [:star, :_alt_2], kind: :rule), + %{(rule _alt_1 "5.1" (star _alt_2))}, ] - }.each do |title, (expr, expected)| + }.each do |title, (rule, sxp)| it title do - res = subject.send(:ttl_expr, expr, "g", 0, false) - res.each {|r| expect(r).to be_a(String)} + expect(rule.to_sxp).to eq sxp + end + end + end - expect(res.join("\n").gsub(/\s+/, ' ')).to produce(expected, debug) + describe "#to_ttl" do + { + "ebnf[1]": [ + EBNF::Rule.new(:ebnf, "1", [:star, [:alt, :declaration, :rule]]), + %{ + :ebnf rdfs:label "ebnf"; + dc:identifier "1"; + g:star + [ g:alt ( + :declaration + :rule + ) ] .}, + ], + "pass": [ + EBNF::Rule.new(nil, nil, [:plus, [:range, "#x20\\t\\r\\n"]], kind: :pass), + %{ + :_pass rdfs:label "_pass"; + g:plus [ re:matches "[\\\\u0020\\\\t\\\\r\\\\n]" ] .}, + ], + "terminal": [ + EBNF::Rule.new(:O_ENUM, "17", [:seq, "[^", [:plus, :CHAR], "]"]), + %{ + :O_ENUM rdfs:label "O_ENUM"; + dc:identifier "17"; + re:seq ( "[^" [ re:plus :CHAR ] "]" ) .}, + ], + "alt": [ + EBNF::Rule.new(:alt, nil, [:alt, :a, :b, :c], kind: :rule), + %{ + :alt rdfs:label "alt"; + g:alt ( :a :b :c ) .}, + ], + "diff": [ + EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal), + %{ + :R_CHAR rdfs:label "R_CHAR"; + dc:identifier "21"; + re:diff ( :CHAR "]" ) .}, + ], + "not": [ + EBNF::Rule.new(:_a_1, "n.1", [:not, :op1], kind: :rule), + %{ + :_a_1 rdfs:label "_a_1"; + dc:identifier "n.1"; + g:not :op1 .}, + ], + "opt": [ + EBNF::Rule.new(:_diff_1, "7.1", [:opt, :_diff_2], kind: :rule), + %{ + :_diff_1 rdfs:label "_diff_1"; + dc:identifier "7.1"; + g:opt :_diff_2 .}, + ], + "plus": [ + EBNF::Rule.new(:seq, "6", [:plus, :diff], kind: :rule), + %{ + :seq rdfs:label "seq"; + dc:identifier "6"; + g:plus :diff .}, + ], + "rept": [ + EBNF::Rule.new(:rept, "6", [:rept, 1, "*", :diff]), + %{ + :rept rdfs:label "rept"; + dc:identifier "6"; + g:min 1; + g:max "*"; + g:rept :diff .}, + ], + "rept m.n": [ + EBNF::Rule.new(:rept, "6", [:rept, 3, 5, :diff]), + %{ + :rept rdfs:label "rept"; + dc:identifier "6"; + g:min 3; + g:max 5; + g:rept :diff .}, + ], + "seq": [ + EBNF::Rule.new(:seq, nil, [:seq, :a, :b, :c], kind: :rule), + %{ + :seq rdfs:label "seq"; + g:seq ( :a :b :c ) .}, + ], + "star": [ + EBNF::Rule.new(:_alt_1, "5.1", [:star, :_alt_2], kind: :rule), + %{ + :_alt_1 rdfs:label "_alt_1"; + dc:identifier "5.1"; + g:star :_alt_2 .}, + ] + }.each do |title, (rule, ttl)| + it title do + expect(rule.to_ttl.gsub(/\s+/m, " ")).to eq ttl.gsub(/\s+/m, " ") end end end - - describe "#cclass" do + + describe "#to_ruby" do { - "passes normal stuff" => [ - %{^<>'{}|^`}, - %{[^<>'{}|^`]} + "ebnf[1]": [ + EBNF::Rule.new(:ebnf, "1", [:star, [:alt, :declaration, :rule]]), + %{EBNF::Rule.new(:ebnf, "1", [:star, [:alt, :declaration, :rule]])}, ], - "turns regular hex range into unicode range" => [ - %{#x0300-#x036F}, - %{[\\u0300-\\u036F]} + "pass": [ + EBNF::Rule.new(nil, nil, [:plus, [:range, "#x20\\t\\r\\n"]], kind: :pass), + %{EBNF::Rule.new(:_pass, nil, [:plus, [:range, \"#x20\\\\t\\\\r\\\\n\"]], kind: :pass)}, ], - "turns short hex range into unicode range" => [ - %{#xC0-#xD6}, - %{[\\u00C0-\\u00D6]} + "terminal": [ + EBNF::Rule.new(:O_ENUM, "17", [:seq, "[^", [:plus, :CHAR], "]"]), + %{EBNF::Rule.new(:O_ENUM, "17", [:seq, "[^", [:plus, :CHAR], "]"], kind: :terminal)}, ], - "turns 3 char hex range into unicode range" => [ - %{#x370-#x37D}, - %{[\\u0370-\\u037D]} + "alt": [ + EBNF::Rule.new(:alt, nil, [:alt, :a, :b, :c], kind: :rule), + %{EBNF::Rule.new(:alt, nil, [:alt, :a, :b, :c])}, ], - "turns long hex range into unicode range" => [ - %{#x000300-#x00036F}, - %{[\\U00000300-\\U0000036F]} + "diff": [ + EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal), + %{EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal)}, ], - "turns 5 char hex range into unicode range" => [ - %{#x00370-#x0037D}, - %{[\\U00000370-\\U0000037D]} + "not": [ + EBNF::Rule.new(:_a_1, "n.1", [:not, :op1], kind: :rule), + %{EBNF::Rule.new(:_a_1, "n.1", [:not, :op1])}, ], - }.each do |title, (input, expected)| + "opt": [ + EBNF::Rule.new(:_diff_1, "7.1", [:opt, :_diff_2], kind: :rule), + %{EBNF::Rule.new(:_diff_1, "7.1", [:opt, :_diff_2])}, + ], + "plus": [ + EBNF::Rule.new(:seq, "6", [:plus, :diff], kind: :rule), + %{EBNF::Rule.new(:seq, "6", [:plus, :diff])}, + ], + "rept": [ + EBNF::Rule.new(:rept, "6", [:rept, 1, "*", :diff]), + %{EBNF::Rule.new(:rept, "6", [:rept, 1, "*", :diff])}, + ], + "rept m.n": [ + EBNF::Rule.new(:rept, "6", [:rept, 3, 5, :diff]), + %{EBNF::Rule.new(:rept, "6", [:rept, 3, 5, :diff])}, + ], + "seq": [ + EBNF::Rule.new(:seq, nil, [:seq, :a, :b, :c], kind: :rule), + %{EBNF::Rule.new(:seq, nil, [:seq, :a, :b, :c])}, + ], + "star": [ + EBNF::Rule.new(:_alt_1, "5.1", [:star, :_alt_2], kind: :rule), + %{EBNF::Rule.new(:_alt_1, "5.1", [:star, :_alt_2])}, + ] + }.each do |title, (rule, ruby)| it title do - expect(subject.send(:cclass, input)).to produce(expected, debug) + expect(rule.to_ruby).to eq ruby end end end @@ -77,13 +317,13 @@ describe "#to_bnf" do { "no-rewrite" => [ - [:seq], - [EBNF::Rule.new(:rule, "0", [:seq])] + [:seq, :foo], + [EBNF::Rule.new(:rule, "0", [:seq, :foo])] ], "embedded rule" => [ - [:seq, [:alt]], + [:seq, [:alt, :foo]], [EBNF::Rule.new(:rule, "0", [:seq, :_rule_1]), - EBNF::Rule.new(:_rule_1, "0.1", [:alt])] + EBNF::Rule.new(:_rule_1, "0.1", [:alt, :foo])] ], "opt rule" => [ [:opt, :foo], @@ -106,17 +346,18 @@ EBNF::Rule.new(:_rule_1, "0.1", [:alt, :_empty, :_rule_2]), EBNF::Rule.new(:_rule_2, "0.2", [:seq, :foo, :_rule_1])] ], - "diff rule" => [ - [:diff, "a", "b"], - [EBNF::Rule.new(:rule, "0", [:diff, "a", "b"], kind: :terminal)] - ], + # Diff may be a Rule or a Terminal now. + #"diff rule" => [ + # [:diff, "a", "b"], + # [EBNF::Rule.new(:rule, "0", [:diff, "a", "b"])] + #], "hex rule" => [ [:hex, "#x00B7"], [EBNF::Rule.new(:rule, "0", [:hex, "#x00B7"], kind: :terminal)] ], "range rule" => [ - [:range, "a", "b"], - [EBNF::Rule.new(:rule, "0", [:range, "a", "b"], kind: :terminal)] + [:range, "a"], + [EBNF::Rule.new(:rule, "0", [:range, "a"], kind: :terminal)] ], "ebnf[1]" => [ [:star, [:alt, :declaration, :rule]], @@ -150,18 +391,31 @@ end end end + + context "exceptions" do + { + "diff" => [:diff, "foo", "foobar"], + "not" => [:not, "foo"], + "rept" => [:rept, 1, 2, "foo"], + }.each do |title, expr| + it title do + rule = EBNF::Rule.new(:rule, "0", expr) + expect {rule.to_bnf}.to raise_error(RuntimeError) + end + end + end end describe "#to_peg" do { "no-rewrite" => [ - [:seq], - [EBNF::Rule.new(:rule, "0", [:seq])] + [:seq, :foo], + [EBNF::Rule.new(:rule, "0", [:seq, :foo])] ], "embedded rule" => [ - [:seq, [:alt]], + [:seq, [:alt, :foo]], [EBNF::Rule.new(:rule, "0", [:seq, :_rule_1]), - EBNF::Rule.new(:_rule_1, "0.1", [:alt])] + EBNF::Rule.new(:_rule_1, "0.1", [:alt, :foo])] ], "opt rule" => [ [:opt, :foo], @@ -183,15 +437,16 @@ ], "diff rule" => [ [:diff, "a", "b"], - [EBNF::Rule.new(:rule, "0", [:diff, "a", "b"], kind: :terminal)] + [EBNF::Rule.new(:rule, "0", [:seq, :_rule_1, "a"]), + EBNF::Rule.new(:_rule_1, "0.1", [:not, "b"])] ], "hex rule" => [ [:hex, "#x00B7"], [EBNF::Rule.new(:rule, "0", [:hex, "#x00B7"], kind: :terminal)] ], "range rule" => [ - [:range, "a", "b"], - [EBNF::Rule.new(:rule, "0", [:range, "a", "b"], kind: :terminal)] + [:range, "a"], + [EBNF::Rule.new(:rule, "0", [:range, "a"], kind: :terminal)] ], "ebnf[1]" => [ [:star, [:alt, :declaration, :rule]], @@ -219,8 +474,467 @@ end it "extends with EBNF::PEG::Rule" do - rule = EBNF::Rule.new(:rule, "0", [:seq]).to_peg.first + rule = EBNF::Rule.new(:rule, "0", [:seq, :foo]).to_peg.first expect(rule).to be_a(EBNF::PEG::Rule) end end + + describe "#to_regexp" do + { + hex: ["#x20", / /], + range: ["a-b", /[a-b]/], + }.each do |title, (exp, regexp)| + it title do + expect(EBNF::Rule.new(title, nil, [title, exp]).to_regexp).to eql regexp + end + end + + it "raises an error for other operation" do + expect {EBNF::Rule.new(:seq, nil, [:seq, :a]).to_regexp}.to raise_error(/Can't turn/) + end + end + + describe "#terminal?" do + { + "ebnf[1]": [ + EBNF::Rule.new(:ebnf, "1", [:star, [:alt, :declaration, :rule]]), + false, + ], + "pass": [ + EBNF::Rule.new(nil, nil, [:plus, [:range, "#x20\\t\\r\\n"]], kind: :pass), + false, + ], + "terminal": [ + EBNF::Rule.new(:O_ENUM, "17", [:seq, "[^", [:plus, :CHAR], "]"]), + true, + ], + "alt": [ + EBNF::Rule.new(:alt, nil, [:alt, :a, :b, :c], kind: :rule), + false, + ], + "diff": [ + EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal), + true, + ], + "not": [ + EBNF::Rule.new(:_a_1, "n.1", [:not, :op1], kind: :rule), + false, + ], + "opt": [ + EBNF::Rule.new(:_diff_1, "7.1", [:opt, :_diff_2], kind: :rule), + false, + ], + "plus": [ + EBNF::Rule.new(:seq, "6", [:plus, :diff], kind: :rule), + false, + ], + "rept": [ + EBNF::Rule.new(:rept, "6", [:rept, 1, "*", :diff]), + false, + ], + "rept m.n": [ + EBNF::Rule.new(:rept, "6", [:rept, 3, 5, :diff]), + false, + ], + "seq": [ + EBNF::Rule.new(:seq, nil, [:seq, :a, :b, :c], kind: :rule), + false, + ], + "star": [ + EBNF::Rule.new(:_alt_1, "5.1", [:star, :_alt_2], kind: :rule), + false, + ] + }.each do |title, (rule, bool)| + it "#{title} => #{bool.inspect}" do + expect(rule.terminal?).to eq bool + end + end + end + + describe "#pass?" do + { + "ebnf[1]": [ + EBNF::Rule.new(:ebnf, "1", [:star, [:alt, :declaration, :rule]]), + false, + ], + "pass": [ + EBNF::Rule.new(nil, nil, [:plus, [:range, "#x20\\t\\r\\n"]], kind: :pass), + true, + ], + "terminal": [ + EBNF::Rule.new(:O_ENUM, "17", [:seq, "[^", [:plus, :CHAR], "]"]), + false, + ], + "alt": [ + EBNF::Rule.new(:alt, nil, [:alt, :a, :b, :c], kind: :rule), + false, + ], + "diff": [ + EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal), + false, + ], + "not": [ + EBNF::Rule.new(:_a_1, "n.1", [:not, :op1], kind: :rule), + false, + ], + "opt": [ + EBNF::Rule.new(:_diff_1, "7.1", [:opt, :_diff_2], kind: :rule), + false, + ], + "plus": [ + EBNF::Rule.new(:seq, "6", [:plus, :diff], kind: :rule), + false, + ], + "rept": [ + EBNF::Rule.new(:rept, "6", [:rept, 1, "*", :diff]), + false, + ], + "rept m.n": [ + EBNF::Rule.new(:rept, "6", [:rept, 3, 5, :diff]), + false, + ], + "seq": [ + EBNF::Rule.new(:seq, nil, [:seq, :a, :b, :c], kind: :rule), + false, + ], + "star": [ + EBNF::Rule.new(:_alt_1, "5.1", [:star, :_alt_2], kind: :rule), + false, + ] + }.each do |title, (rule, bool)| + it "#{title} => #{bool.inspect}" do + expect(rule.pass?).to eq bool + end + end + end + + describe "#rule?" do + { + "ebnf[1]": [ + EBNF::Rule.new(:ebnf, "1", [:star, [:alt, :declaration, :rule]]), + true, + ], + "pass": [ + EBNF::Rule.new(nil, nil, [:plus, [:range, "#x20\\t\\r\\n"]], kind: :pass), + false, + ], + "terminal": [ + EBNF::Rule.new(:O_ENUM, "17", [:seq, "[^", [:plus, :CHAR], "]"]), + false, + ], + "alt": [ + EBNF::Rule.new(:alt, nil, [:alt, :a, :b, :c], kind: :rule), + true, + ], + "diff": [ + EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal), + false, + ], + "not": [ + EBNF::Rule.new(:_a_1, "n.1", [:not, :op1], kind: :rule), + true, + ], + "opt": [ + EBNF::Rule.new(:_diff_1, "7.1", [:opt, :_diff_2], kind: :rule), + true, + ], + "plus": [ + EBNF::Rule.new(:seq, "6", [:plus, :diff], kind: :rule), + true, + ], + "rept": [ + EBNF::Rule.new(:rept, "6", [:rept, 1, "*", :diff]), + true, + ], + "rept m.n": [ + EBNF::Rule.new(:rept, "6", [:rept, 3, 5, :diff]), + true, + ], + "seq": [ + EBNF::Rule.new(:seq, nil, [:seq, :a, :b, :c], kind: :rule), + true, + ], + "star": [ + EBNF::Rule.new(:_alt_1, "5.1", [:star, :_alt_2], kind: :rule), + true, + ] + }.each do |title, (rule, bool)| + it "#{title} => #{bool.inspect}" do + expect(rule.rule?).to eq bool + end + end + end + + describe "#alt?" do + { + "ebnf[1]": [ + EBNF::Rule.new(:ebnf, "1", [:star, [:alt, :declaration, :rule]]), + false, + ], + "pass": [ + EBNF::Rule.new(nil, nil, [:plus, [:range, "#x20\\t\\r\\n"]], kind: :pass), + false, + ], + "terminal": [ + EBNF::Rule.new(:O_ENUM, "17", [:seq, "[^", [:plus, :CHAR], "]"]), + false, + ], + "alt": [ + EBNF::Rule.new(:alt, nil, [:alt, :a, :b, :c], kind: :rule), + true, + ], + "diff": [ + EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal), + false, + ], + "not": [ + EBNF::Rule.new(:_a_1, "n.1", [:not, :op1], kind: :rule), + false, + ], + "opt": [ + EBNF::Rule.new(:_diff_1, "7.1", [:opt, :_diff_2], kind: :rule), + false, + ], + "plus": [ + EBNF::Rule.new(:seq, "6", [:plus, :diff], kind: :rule), + false, + ], + "rept": [ + EBNF::Rule.new(:rept, "6", [:rept, 1, "*", :diff]), + false, + ], + "rept m.n": [ + EBNF::Rule.new(:rept, "6", [:rept, 3, 5, :diff]), + false, + ], + "seq": [ + EBNF::Rule.new(:seq, nil, [:seq, :a, :b, :c], kind: :rule), + false, + ], + "star": [ + EBNF::Rule.new(:_alt_1, "5.1", [:star, :_alt_2], kind: :rule), + false, + ] + }.each do |title, (rule, bool)| + it "#{title} => #{bool.inspect}" do + expect(rule.alt?).to eq bool + end + end + end + + describe "#seq?" do + { + "ebnf[1]": [ + EBNF::Rule.new(:ebnf, "1", [:star, [:alt, :declaration, :rule]]), + false, + ], + "pass": [ + EBNF::Rule.new(nil, nil, [:plus, [:range, "#x20\\t\\r\\n"]], kind: :pass), + false, + ], + "terminal": [ + EBNF::Rule.new(:O_ENUM, "17", [:seq, "[^", [:plus, :CHAR], "]"]), + true, + ], + "alt": [ + EBNF::Rule.new(:alt, nil, [:alt, :a, :b, :c], kind: :rule), + false, + ], + "diff": [ + EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal), + false, + ], + "not": [ + EBNF::Rule.new(:_a_1, "n.1", [:not, :op1], kind: :rule), + false, + ], + "opt": [ + EBNF::Rule.new(:_diff_1, "7.1", [:opt, :_diff_2], kind: :rule), + false, + ], + "plus": [ + EBNF::Rule.new(:seq, "6", [:plus, :diff], kind: :rule), + false, + ], + "rept": [ + EBNF::Rule.new(:rept, "6", [:rept, 1, "*", :diff]), + false, + ], + "rept m.n": [ + EBNF::Rule.new(:rept, "6", [:rept, 3, 5, :diff]), + false, + ], + "seq": [ + EBNF::Rule.new(:seq, nil, [:seq, :a, :b, :c], kind: :rule), + true, + ], + "star": [ + EBNF::Rule.new(:_alt_1, "5.1", [:star, :_alt_2], kind: :rule), + false, + ] + }.each do |title, (rule, bool)| + it "#{title} => #{bool.inspect}" do + expect(rule.seq?).to eq bool + end + end + end + + describe "#==" do + let(:rule1) {EBNF::Rule.new(:foo, nil, [:seq, "FOO"])} + let(:rule2) {EBNF::Rule.new(:foo, nil, [:seq, "FOO"])} + let(:rule3) {EBNF::Rule.new(:bar, nil, [:seq, "FOO"])} + + it "equals itself" do + expect(rule1).to eq(rule1) + end + it "equals an equivalent rule" do + expect(rule1).to eq(rule2) + end + it "does not equal a rule with a different symbol that has the same expression" do + expect(rule1).not_to eq(rule3) + end + end + + describe "#eql?" do + let(:rule1) {EBNF::Rule.new(:foo, nil, [:seq, "FOO"])} + let(:rule2) {EBNF::Rule.new(:foo, nil, [:seq, "FOO"])} + let(:rule3) {EBNF::Rule.new(:bar, nil, [:seq, "FOO"])} + + it "equals itself" do + expect(rule1).to eql(rule1) + end + it "equals an equivalent rule" do + expect(rule1).to eql(rule2) + end + it "equals a rule with a different symbol that has the same expression" do + expect(rule1).to eql(rule3) + end + end + + describe "#translate_codepoints" do + { + "#x20" => " ", + "#xffff" => "\u{ffff}" + }.each do |str, cp| + specify {expect(subject.translate_codepoints(str)).to eql(cp)} + end + end + + describe "#non_terminals" do + subject {EBNF.parse(File.read File.expand_path("../../etc/ebnf.ebnf", __FILE__))} + { + _pass: [], + ebnf: [:declaration, :rule], + declaration: [:pass], + alt: [:seq], + seq: [:diff], + diff: [:postfix], + postfix: [:primary], + primary: [], + pass: [], + LHS: [], + SYMBOL: [], + HEX: [], + ENUM: [], + O_ENUM: [], + RANGE: [], + O_RANGE: [], + STRING1: [], + STRING2: [], + CHAR: [], + R_CHAR: [], + POSTFIX: [], + PASS: [] + }.each do |sym, expected| + it "#{sym} => #{expected.inspect}" do + res = subject.ast.find {|r| r.sym == sym} + expect(res.non_terminals(subject.ast).map(&:sym)).to eq expected + end + end + end + + describe "#terminals" do + subject {EBNF.parse(File.read File.expand_path("../../etc/ebnf.ebnf", __FILE__))} + { + _pass: [:PASS], + ebnf: [], + declaration: ["@terminals"], + alt: [], + seq: [], + diff: [], + postfix: [], + primary: [:HEX, :SYMBOL, :ENUM, :O_ENUM, :RANGE, :O_RANGE, :STRING1, :STRING2, "("], + pass: ["@pass"], + LHS: ["["], + SYMBOL: ["a-z", "A-Z", "0-9", "_", "."], + HEX: ["#x"], + ENUM: ["[", :HEX, :LHS], + O_ENUM: ["[^", :HEX], + RANGE: ["[", :HEX], + O_RANGE: ["[^", :HEX], + STRING1: ['"'], + STRING2: ["'"], + CHAR: ["#x9#xA#xD", "#x20-#xD7FF", "#xE000-#xFFFD", "#x10000-#x10FFFF"], + R_CHAR: [:CHAR, "]"], + POSTFIX: ["?*+"], + PASS: ["#x00-#x20", "#", "#x", "//", "/*", "(*"] + }.each do |sym, expected| + it "#{sym} => #{expected.inspect}" do + res = subject.ast.find {|r| r.sym == sym} + expect(res.terminals(subject.ast).map {|r| r.is_a?(EBNF::Rule) ? r.sym : r}).to eq expected + end + end + end + + describe "#validate!" do + subject {EBNF.parse("a ::= b")} + it "notes missing rule" do + expect {subject.ast.first.validate!(subject.ast)}.to raise_error SyntaxError, "No rule found for b" + end + end + + describe "#valid?" do + subject {EBNF.parse("a ::= b")} + it "notes missing rule" do + expect(subject.ast.first.valid?(subject.ast)).to be_falsey + end + + it "validates EBNF" do + ebnf = EBNF.parse(File.open(File.expand_path("../../etc/ebnf.ebnf", __FILE__))) + expect(ebnf.ast.first).to be_valid(ebnf.ast) + end + end + + describe "#cclass" do + { + "passes normal stuff" => [ + %{^<>'{}|^`}, + %{[^<>'{}|^`]} + ], + "turns regular hex range into unicode range" => [ + %{#x0300-#x036F}, + %{[\\u0300-\\u036F]} + ], + "turns short hex range into unicode range" => [ + %{#xC0-#xD6}, + %{[\\u00C0-\\u00D6]} + ], + "turns 3 char hex range into unicode range" => [ + %{#x370-#x37D}, + %{[\\u0370-\\u037D]} + ], + "turns long hex range into unicode range" => [ + %{#x000300-#x00036F}, + %{[\\U00000300-\\U0000036F]} + ], + "turns 5 char hex range into unicode range" => [ + %{#x00370-#x0037D}, + %{[\\U00000370-\\U0000037D]} + ], + }.each do |title, (input, expected)| + it title do + expect(subject.send(:cclass, input)).to produce(expected, debug) + end + end + end end \ No newline at end of file diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 2a2b309..bcad127 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -29,3 +29,7 @@ not_jruby: lambda { RUBY_PLATFORM.to_s != 'jruby'} } end + +require 'ebnf' + +PARSED_EBNF_GRAMMAR = EBNF.parse(File.open(File.expand_path("../../etc/ebnf.ebnf", __FILE__))).freeze \ No newline at end of file diff --git a/spec/writer_spec.rb b/spec/writer_spec.rb index 9b6e82f..9479d6e 100644 --- a/spec/writer_spec.rb +++ b/spec/writer_spec.rb @@ -22,6 +22,23 @@ end end + describe ".print" do + { + prolog: [ + %{[2] Prolog ::= BaseDecl? PrefixDecl*}, + %{[2] Prolog ::= BaseDecl? PrefixDecl*\n} + ], + }.each do |title, (grammar, plain)| + context title do + subject {EBNF::Base.new(grammar).ast} + + it "generates plain" do + expect {EBNF::Writer.print(*subject)}.to write(plain).to(:output) + end + end + end + end + context "Existing grammars" do { "EBNF Grammar" => File.expand_path("../../etc/ebnf.ebnf", __FILE__), From 406e87a301eaf942fe1c27630c3357223583df44 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Fri, 26 Jun 2020 16:20:26 -0700 Subject: [PATCH 04/50] Added ISO EBNF (ISO/IEC 14977:1996) example, which parses files using that grammar. This will eventually be used to address issue #1 by creating native support for ISO/IEC 14977. --- README.md | 3 + examples/abnf/examples/postal-address.abnf | 20 + examples/ebnf-ll1-parser/README.md | 4 +- examples/ebnf-ll1-parser/parser.rb | 4 +- examples/ebnf-peg-parser/README.md | 4 +- examples/ebnf-peg-parser/Rakefile | 2 +- examples/ebnf-peg-parser/parser.rb | 19 +- examples/isoebnf/README.md | 133 +++ examples/isoebnf/Rakefile | 32 + examples/isoebnf/doc/layout.mustache | 491 ++++++++ examples/isoebnf/doc/parser.html | 1008 +++++++++++++++++ examples/isoebnf/examples/ebnf.isoebnf | 28 + examples/isoebnf/examples/html.isoebnf | 77 ++ examples/isoebnf/examples/iso-ebnf.isoebnf | 99 ++ examples/isoebnf/examples/pascall.isoebnf | 17 + .../isoebnf/examples/postal-address.isoebnf | 29 + examples/isoebnf/iso-ebnf.ebnf | 110 ++ examples/isoebnf/iso-ebnf.peg.sxp | 74 ++ examples/isoebnf/iso-ebnf.sxp | 61 + examples/isoebnf/meta.rb | 69 ++ examples/isoebnf/parse | 53 + examples/isoebnf/parser.rb | 239 ++++ lib/ebnf/terminals.rb | 2 +- 23 files changed, 2552 insertions(+), 26 deletions(-) create mode 100644 examples/abnf/examples/postal-address.abnf create mode 100644 examples/isoebnf/README.md create mode 100644 examples/isoebnf/Rakefile create mode 100644 examples/isoebnf/doc/layout.mustache create mode 100644 examples/isoebnf/doc/parser.html create mode 100644 examples/isoebnf/examples/ebnf.isoebnf create mode 100644 examples/isoebnf/examples/html.isoebnf create mode 100644 examples/isoebnf/examples/iso-ebnf.isoebnf create mode 100644 examples/isoebnf/examples/pascall.isoebnf create mode 100644 examples/isoebnf/examples/postal-address.isoebnf create mode 100644 examples/isoebnf/iso-ebnf.ebnf create mode 100644 examples/isoebnf/iso-ebnf.peg.sxp create mode 100644 examples/isoebnf/iso-ebnf.sxp create mode 100644 examples/isoebnf/meta.rb create mode 100755 examples/isoebnf/parse create mode 100644 examples/isoebnf/parser.rb diff --git a/README.md b/README.md index 2c1866a..9f03f99 100644 --- a/README.md +++ b/README.md @@ -192,6 +192,8 @@ For an example parser built using this gem that parses the [EBNF][] grammar, see There is also an [EBNF LL(1) Parser example](https://dryruby.github.io/ebnf/examples/ebnf-peg-parser/doc/parser.html). +The [ISO EBNF Parser](https://dryruby.github.io/ebnf/examples/iso-ebnf/doc/parser.html) example parses [ISO/IEC 14977][] into S-Expressions, which can be used to parse compatible grammars using this parser (either PEG or LL(1)). + ## Acknowledgements Much of this work, particularly the generic parser, is inspired by work originally done by Tim Berners-Lee's Python [predictive parser](https://www.w3.org/2000/10/swap/grammar/predictiveParser.py). @@ -239,6 +241,7 @@ A copy of the [Turtle EBNF][] and derived parser files are included in the repos [EBNF]: https://www.w3.org/TR/REC-xml/#sec-notation [EBNF doc]: https://rubydoc.info/github/dryruby/ebnf [First/Follow]: https://en.wikipedia.org/wiki/LL_parser#Constructing_an_LL.281.29_parsing_table +[ISO/IEC 14977]:https://www.iso.org/standard/26153.html [LL(1)]: https://www.csd.uwo.ca/~moreno//CS447/Lectures/Syntax.html/node14.html [LL(1) Parser]: https://en.wikipedia.org/wiki/LL_parser [Logger]: https://ruby-doc.org/stdlib-2.4.0/libdoc/logger/rdoc/Logger.html diff --git a/examples/abnf/examples/postal-address.abnf b/examples/abnf/examples/postal-address.abnf new file mode 100644 index 0000000..4b1e969 --- /dev/null +++ b/examples/abnf/examples/postal-address.abnf @@ -0,0 +1,20 @@ +postal-address = name-part street zip-part + +name-part = *(personal-part SP) last-name [SP suffix] CRLF +name-part =/ personal-part CRLF + +personal-part = first-name / (initial ".") +first-name = *ALPHA +initial = ALPHA +last-name = *ALPHA +suffix = ("Jr." / "Sr." / 1*("I" / "V" / "X")) + +street = [apt SP] house-num SP street-name CRLF +apt = 1*4DIGIT +house-num = 1*8(DIGIT / ALPHA) +street-name = 1*VCHAR + +zip-part = town-name "," SP state 1*2SP zip-code CRLF +town-name = 1*(ALPHA / SP) +state = 2ALPHA +zip-code = 5DIGIT ["-" 4DIGIT] \ No newline at end of file diff --git a/examples/ebnf-ll1-parser/README.md b/examples/ebnf-ll1-parser/README.md index eea9792..655d930 100644 --- a/examples/ebnf-ll1-parser/README.md +++ b/examples/ebnf-ll1-parser/README.md @@ -2,11 +2,11 @@ This example implements an [EBNF][] parser equivalent to the built-in parser. The proximate result is an Abstract S-Expression which can be used to generate parser tables input grammars. Effectively, this is a re-implementation of {EBNF::Parser} itself. -## Parsing an LL(1) Grammar +## Parsing the Grammar require 'parser' - ebnf = EBNFLL1Parser.new(File.open(../../etc/ebnf.ebnf)) + ebnf = EBNFLL1Parser.new(File.open("../../etc/ebnf.ebnf")) Output rules and terminals as S-Expressions, Turtle or EBNF diff --git a/examples/ebnf-ll1-parser/parser.rb b/examples/ebnf-ll1-parser/parser.rb index 7809225..dd87f04 100644 --- a/examples/ebnf-ll1-parser/parser.rb +++ b/examples/ebnf-ll1-parser/parser.rb @@ -272,7 +272,7 @@ def inspect # # [10] pass ::= '@pass' expression production(:pass) do |input, data, callback| - input[:pass] = data[:expression] + input[:pass] = data[:expression].to_ary end # ## Parser invocation. @@ -309,7 +309,7 @@ def initialize(input, **options, &block) # After parsing `@terminals` # This changes the state of the parser to treat subsequent rules as terminals. parsing_terminals = true - rule = EBNF::Rule.new(nil, nil, data.first, kind: :terminal) + next when :pass # After parsing `@pass` # This defines a specific rule for whitespace. diff --git a/examples/ebnf-peg-parser/README.md b/examples/ebnf-peg-parser/README.md index 0b83d18..8e673c1 100644 --- a/examples/ebnf-peg-parser/README.md +++ b/examples/ebnf-peg-parser/README.md @@ -2,11 +2,11 @@ This example implements an [EBNF][] parser equivalent to the built-in parser. The proximate result is an Abstract S-Expression composed of sub-rules which can be directly executed by the parser. Effectively, this is a re-implementation of {EBNF::Parser} itself. -## Parsing an LL(1) Grammar +## Parsing the Grammar require 'ebnf' - ebnf = EBNFPegParser.new(File.open(../../etc/ebnf.ebnf)) + ebnf = EBNFPegParser.new(File.open("../../etc/ebnf.ebnf")) Output rules and terminals as S-Expressions, Turtle or EBNF diff --git a/examples/ebnf-peg-parser/Rakefile b/examples/ebnf-peg-parser/Rakefile index 016d8d9..c9f14eb 100644 --- a/examples/ebnf-peg-parser/Rakefile +++ b/examples/ebnf-peg-parser/Rakefile @@ -1,6 +1,6 @@ task default: [:meta, :doc] -desc 'Build first, follow and branch tables' +desc 'Build rules table' task meta: "meta.rb" file "meta.rb" => "../../etc/ebnf.ebnf" do |t| diff --git a/examples/ebnf-peg-parser/parser.rb b/examples/ebnf-peg-parser/parser.rb index 06c9502..79374e7 100644 --- a/examples/ebnf-peg-parser/parser.rb +++ b/examples/ebnf-peg-parser/parser.rb @@ -12,23 +12,6 @@ class EBNFPegParser include EBNF::PEG::Parser include EBNF::Terminals - class ProdResult - attr_accessor :prod - attr_accessor :values - - def initialize(prod, *values) - @prod, @values = prod, values - end - - def to_ary - values.map {|v| v.respond_to?(:to_ary) ? v.to_ary : v}.unshift(@prod) - end - - def inspect - "(#{prod} #{values.map(&:inspect).join(' ')})" - end - end - # Abstract syntax tree from parse # # @return [Array] @@ -318,7 +301,7 @@ def initialize(input, **options, &block) # After parsing `@terminals` # This changes the state of the parser to treat subsequent rules as terminals. parsing_terminals = true - rule = EBNF::Rule.new(nil, nil, data.first, kind: :terminal) + next when :pass # After parsing `@pass` # This defines a specific rule for whitespace. diff --git a/examples/isoebnf/README.md b/examples/isoebnf/README.md new file mode 100644 index 0000000..9197188 --- /dev/null +++ b/examples/isoebnf/README.md @@ -0,0 +1,133 @@ +# ISO EBNF Parser example + +This example implements an [ISO/IEC 14977][] parser which parses compatible grammars into S-Expressions. This allows the resulting S-Expressions to drive a PEG Parser to parser documents defined using [ISO/IEC 14977][]. + +## Parsing the Grammar + + require 'ebnf' + + ebnf = ISOEBNFPegParser.new(File.open("examples/ebnf.isoebnf")) + +Output rules and terminals as S-Expressions: + + puts ebnf.to_sxp + +This generates a S-Expression form of the grammar suitable for use by {EBNF}. + + ( + (rule syntax (star syntax_rule)) + (rule syntax_rule + (seq meta_identifier defining_symbol definitions_list terminator_symbol)) + (rule definitions_list + (seq single_definition (star (seq definition_separator_symbol definitions_list)))) + (rule single_definition (seq term (star (seq "," term)))) + (rule term (seq factor (opt (seq "-" exception)))) + (rule exception (seq factor)) + (rule factor (seq (opt (seq integer "*")) primary)) + (rule primary + (alt optional_sequence repeated_sequence special_sequence grouped_sequence + meta_identifier terminal_string empty )) + (rule optional_sequence + (seq start_option_symbol definitions_list end_option_symbol)) + (rule repeated_sequence + (seq start_repeat_symbol definitions_list end_repeat_symbol)) + (rule grouped_sequence (seq "(" definitions_list ")")) + (rule letter + (alt "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" + "S" "T" "U" "V" "W" "X" "Y" "Z" "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" + "l" "m" "n" "o" "p" "q" "r" "s" "t" "u" "v" "w" "x" "y" "z" )) + (rule decimal_digit (alt "0" "1" "2" "3" "4" "5" "6" "7" "8" "9")) + (rule integer (seq decimal_digit (star decimal_digit))) + (rule meta_identifier (seq letter (star meta_identifier_character))) + (rule meta_identifier_character (alt letter decimal_digit "_")) + (rule terminal_string + (alt + (seq (seq "'" first_terminal_character (star first_terminal_character) "'")) + (seq (seq "\"" second_terminal_character (star second_terminal_character) "\""))) ) + (rule first_terminal_character (seq terminal_character)) + (rule second_terminal_character (seq terminal_character)) + (rule special_sequence (seq "?" (star special_sequence_character) "?")) + (rule special_sequence_character (seq terminal_character)) + (rule terminal_character + (alt letter decimal_digit concatenate_symbol defining_symbol + definition_separator_symbol end_comment_symbol end_group_symbol + end_option_symbol end_repeat_symbol except_symbol first_quote_symbol + repetition_symbol second_quote_symbol special_sequence_symbol + start_comment_symbol start_group_symbol start_option_symbol + start_repeat_symbol terminator_symbol other_character )) + (rule other_character + (alt " " ":" "+" "_" "%" "@" "&" "#" "$" "<" ">" "\\" "^" "`" "~")) + (rule empty (seq "")) + (rule defining_symbol (alt "=" ":")) + (rule definition_separator_symbol (alt "|" "/" "!")) + (rule terminator_symbol (alt ";" ".")) + (rule start_option_symbol (alt "[" "(/")) + (rule end_option_symbol (alt "]" "/)")) + (rule start_repeat_symbol (alt "{" "(:")) + (rule end_repeat_symbol (alt "}" ":)"))) + +Note, however, that ISO EBNF doesn't distinguish between terminal rules and non-terminal rules, so all rules are parsed as non-terminal rules with strings the only terminals. Whereas, the {file:iso-ebnf.ebnf W3C EBNF definition of the grammar} does use terminal rules. + +This can then be used as input to {EBNF.parse} to transform EBNF to PEG for parsing examples of the grammar using {EBNF::PEG::Parser}. + + ebnf --input-format sxp --peg ebnf.sxp -o ebnf.peg.sxp + +## Example Walkthrough + +This example uses the EBNF grammar from {file:iso-ebnf.ebnf} to generate {file:meta}, which includes the resulting `RULES` table, used by {file:parser} to implement a parser for the grammar. + +The first step is defining regular expressions for terminals used within the grammar. Note that the parser can operate without terminal definitions, but this can greatly improve parser performance. + +The {file:parser} is implemented using the {ISOEBNFPegParser} class, which includes {EBNF::PEG::Parser}. + +### Parser basics +The parser operates directly using the rules from the abstract syntax tree generated by turning the original EBNF grammar using {EBNF::PEG#make_peg}. Tokens are derived from terminal rules defined in the grammar or contained inline through non-terminal rule definitions. Tokens are either strings, which must be matched exactly, or symbols, which identify a regular expression used to match the terminal and yield a token. The association between terminal symbols and their regular expressions along with processing rules to invoke when they are identified are described in [Terminal definitions](#Terminal_definitions). + +The parser starts with the specified rule, `ebnf` in this case, and executes that rule, which is expected to completely parse the input file potentially leaving some whitespace. + +Non-terminal rules have an expression using one of the following: + +`seq` +: A sequence of rules or terminals. If any (other than `opt` or `star`) to not parse, the rule is terminated as unmatched. +`opt` +: An optional rule or terminal. It either results in the matching rule or returns `nil`. +`alt` +: A list of alternative rules, which are attempted in order. It terminates with the first matching rule, or is terminated as unmatched, if no such rule is found. +`plus` +: A sequence of one or more of the matching rule. If there is no such rule, it is terminated as unmatched; otherwise, the result is an array containing all matched input. +`rept m n` +: A sequence of at lest `m` and at most `n` of the matching rule. It will always return an array. +`star` +: A sequence of zero or more of the matching rule. It will always return an array. + +The starting rule will typically be of the form `(star sub_rule)` which will attempt to parse that sub rule until the end of input. + +If a rule matches, it enters a _production_, which may invoke a _start production before matching is attempted, and will call any _production_ either if matched, or unmatched. That _production_ may choose to evaluate the returned abstract syntax tree to simplify the result, or create some semantic representation of that value. + +Due to the nature of [PEG][] parsers, the same rule may be attempted at the same input location many times; this is optimized by use of a [Packrat][] memoizing cache, which remembers the result of a previous successful evaluation and short-circuits further execution. + +Processing continues by continuing to look for productions sequence and pushing those productions onto the stack. When a production is complete, any associated _production handler_ is invoked, after popping off the top of the `prod_data` stack. The just removed hash is passed as `current` to the _production handler_. This is typically where the work of the parser happens. See [Production definitions](#Production_definitions) for more information. + +### Terminal definitions +The {file:parser} uses a DSL to specify `terminals` and `productions` associated with rules in the grammar. Each `terminal` specifies the rule name, associated regular expression, and a block which is invoked when the parser recognizes the terminal: + + terminal(:integer, /\d+/) do |value, prod| + value.to_i + end + +In this terminal definition, the `integer` terminal is recognized using the `/\d+/`. When found, the value of the integer is returned for use by productions which include it. + +### Production definitions +Looking at the grammar itself, we can see that the first declaration is + + [1] syntax ::= syntax_rule* + +[Ruby]: https://ruby-lang.org/ +[YARD]: https://yardoc.org/ +[YARD-GS]: https://rubydoc.info/docs/yard/file/docs/GettingStarted.md +[PDD]: https://lists.w3.org/Archives/Public/public-rdf-ruby/2010May/0013.html +[EBNF]: https://www.w3.org/TR/REC-xml/#sec-notation +[EBNF doc]: https://rubydoc.info/github/dryruby/ebnf/ +[Turtle gem]: https://rubygems.org/gems/rdf-turtle +[Packrat]: https://pdos.csail.mit.edu/~baford/packrat/thesis/ +[ISO/IEC 14977]:https://www.iso.org/standard/26153.html \ No newline at end of file diff --git a/examples/isoebnf/Rakefile b/examples/isoebnf/Rakefile new file mode 100644 index 0000000..764d441 --- /dev/null +++ b/examples/isoebnf/Rakefile @@ -0,0 +1,32 @@ +task default: ['iso-ebnf.sxp', 'iso-ebnf.peg.sxp', :meta, :doc] + +desc 'Build rules table' +task meta: "meta.rb" + +file "meta.rb" => "iso-ebnf.ebnf" do |t| + sh %{ + ebnf --peg --format rb \ + --mod-name ISOEBNFMeta \ + --output meta.rb \ + #{t.prerequisites.first} + } +end + +file 'iso-ebnf.sxp' => "iso-ebnf.ebnf" do |t| + sh %{ + ebnf --output iso-ebnf.sxp #{t.prerequisites.first} + } +end + +file 'iso-ebnf.peg.sxp' => "iso-ebnf.ebnf" do |t| + sh %{ + ebnf --peg --output iso-ebnf.peg.sxp #{t.prerequisites.first} + } +end + +desc "Generate literal documentation for parser" +task doc: %w(doc/parser.html) + +file "doc/parser.html" => "parser.rb" do + `rocco -t doc/layout.mustache parser.rb -o doc` +end diff --git a/examples/isoebnf/doc/layout.mustache b/examples/isoebnf/doc/layout.mustache new file mode 100644 index 0000000..c62137d --- /dev/null +++ b/examples/isoebnf/doc/layout.mustache @@ -0,0 +1,491 @@ + + + + + {{ title }} + + + +
+
+ {{#sources?}} +
+ Jump To … +
+
+ {{#sources}} + {{ basename }} + {{/sources}} +
+
+
+ {{/sources?}} + + + + + + + + + {{#sections}} + + + + + {{/sections}} +

{{ title }}

+
+ +
+ {{{ docs }}} +
+
{{{ code }}}
+
+
+ diff --git a/examples/isoebnf/doc/parser.html b/examples/isoebnf/doc/parser.html new file mode 100644 index 0000000..4102d1a --- /dev/null +++ b/examples/isoebnf/doc/parser.html @@ -0,0 +1,1008 @@ + + + + + parser.rb + + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

parser.rb

+
+ +
+

EBNF Parser for EBNF.

+ +

Produces an Abstract Synatx Tree in S-Expression form for the input grammar file

+
+
require 'ebnf'
+require 'ebnf/terminals'
+require 'ebnf/peg/parser'
+require 'meta'
+require 'sxp'
+require 'logger'
+
+class ISOEBNFPegParser
+  include EBNF::PEG::Parser
+
+
+ +
+

The base for terminal-character, which omits "'", '"', and '?'. +Could be more optimized, and one might quible +with the overly-strictly defined character set, +but it is correct.

+
+
  TERMINAL_CHARACTER_BASE = %r{
+    [a-zA-Z0-9] | # letter | decimal digit
+    ,           | # concatenate symbol
+    =           | # defining symbol
+    [\|\/!]     | # definition separator symbol
+    \*\)        | # end comment symbol
+    \)          | # end group symbol
+    \]          | # end option symbol
+    \}          | # end repeat symbol
+    \-          | # except symbol
+
+# DIVIDER
+
+    \*          | # repetition symbol
+
+# DIVIDER
+
+    \(\*        | # start comment symbol
+    \(          | # start group symbol
+    \[          | # start option symbol
+    \{          | # start repeat symbol
+    [;\.]       | # terminator symbol
+    [:+_%@&$<>^\x20\x23\\]  # other character
+  }x
+
+  TERMINAL_CHARACTER         = %r{#{TERMINAL_CHARACTER_BASE}|['"\?]}
+  FIRST_TERMINAL_CHARACTER   = %r{#{TERMINAL_CHARACTER_BASE}|["\?]}
+  SECOND_TERMINAL_CHARACTER  = %r{#{TERMINAL_CHARACTER_BASE}|['\?]}
+  SPECIAL_SEQUENCE_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"]}
+
+
+ +
+

\' | # first quote symbol

+
+
  attr_reader :ast
+
+
+ +
+

\" | # second quote symbol +\? | # special sequence symbol

+
+
  terminal(:integer, /\d+/) do |value, prod|
+    value.to_i
+  end
+
+
+ +
+

Abstract syntax tree from parse

+ +

@return [ArrayEBNF::Rule]

+
+
  terminal(:meta_identifier, /[a-zA-Z][a-zA-Z0-9_]*/) do |value|
+    value.to_sym
+  end
+
+
+ +
+

[14] integer ::= decimal_digit+

+
+
  terminal(:terminal_string, /(?:'#{FIRST_TERMINAL_CHARACTER}+')|(?:"#{SECOND_TERMINAL_CHARACTER}+")/x) do |value|
+    value[1..-2]
+  end
+
+
+ +
+

[15] meta_identifier ::= letter meta_identifier_character*

+
+
  terminal(:special_sequence, /\?#{SPECIAL_SEQUENCE_CHARACTER}+\?/)
+
+
+ +
+

[17] terminal_string ::= ("'" first_terminal_character+ "'") +| ('"' second_terminal_character+ '"')

+
+
  terminal(:terminal_character, TERMINAL_CHARACTER)
+
+
+ +
+

[20] special_sequence ::= '?' special_sequence_character* '?'

+
+
  terminal(:empty, //)
+
+
+# DIVIDER
+
+  terminal(:definition_separator_symbol, /[\|\/!]/)
+
+
+ +
+

[22] terminal_character ::= [a-zA-Z0-9] +| [,=;*}#x2d?([{;] +| '*)' +| '(*' +| ']' +| other_character

+
+
  terminal(:terminator_symbol, /[;\.]/)
+
+
+ +
+

[25] empty ::= ''

+
+
  terminal(:start_option_symbol, /\[|\(\//)
+
+
+ +
+

[26] definition_separator_symbol ::= '|' | '/' | '!'

+
+
  terminal(:end_option_symbol, /[\]\/]/)
+
+
+ +
+

[27] terminator_symbol ::= ';' | '.'

+
+
  terminal(:start_repeat_symbol, /{|\(:/)
+
+
+ +
+

[28] start_option_symbol ::= '[' | '(/'

+
+
  terminal(:end_repeat_symbol, /}|:\)/)
+
+
+ +
+

[29] end_option_symbol ::= ']' | '/)'

+
+
+
+
+ +
+

[30] start_repeat_symbol ::= '{' | '(:'

+
+
  production(:syntax_rule, clear_packrat: true) do |value, data, callback|
+
+
+ +
+

[31] end_repeat_symbol ::= '}' | ':)'

+
+
    sym = value[0][:meta_identifier]
+    definitions_list = value[2][:definitions_list]
+    callback.call(:rule, EBNF::Rule.new(sym.to_sym, nil, definitions_list))
+    nil
+  end
+
+
+ +
+

Non-terminal productions

+
+
  production(:definitions_list) do |value|
+    if value.last[:_definitions_list_1].length > 0
+      [:alt, value.first[:single_definition]] + value.last[:_definitions_list_1]
+    else
+      value.first[:single_definition]
+    end
+  end
+  production(:_definitions_list_1) do |value|
+    Array(value.first)
+  end
+  production(:_definitions_list_2) do |value|
+    if Array(value.last[:definitions_list]).first == :alt
+      value.last[:definitions_list][1..-1]
+    else
+      [value.last[:definitions_list]]
+    end
+  end
+
+
+ +
+

[2] syntax_rule ::= meta_identifier '=' definitions_list terminator_symbol

+
+
  production(:single_definition) do |value|
+    if value.last[:_single_definition_1].length > 0
+      [:seq, value.first[:term]] + value.last[:_single_definition_1]
+    else
+      value.first[:term]
+    end
+  end
+  production(:_single_definition_1) do |value|
+    value.map {|a1| a1.last[:term]}.compact # Get rid of '|'
+  end
+
+
+ +
+

value contains an expression. +Invoke callback

+
+
  production(:term) do |value|
+    if value.last[:_diff_1]
+      [:diff, value.first[:postfix], value.last[:_term_1]]
+    else
+      value.first[:factor]
+    end
+  end
+  production(:_term_1) do |value|
+    value.last[:exception] if value
+  end
+
+
+ +
+

[3] definitions_list ::= single_definition (definition_separator_symbol definitions_list)*

+
+
  production(:exception) do |value|
+    value.first[:factor]
+  end
+
+
+ +
+

[4] single_definition ::= term (',' term)*

+
+
  production(:factor) do |value|
+    if value.first[:_factor_1]
+      [:rept, value.first[:_factor_1], value.first[:_factor_1], value.last[:primary]]
+    else
+      value.last[:primary]
+    end
+  end
+  production(:_factor_2) do |value|
+    value.first[:integer]
+  end
+
+
+ +
+

[5] term ::= factor ('-' exception)?

+
+
  production(:primary) do |value|
+    value
+  end
+
+
+ +
+

[6] exception ::= factor

+
+
  production(:optional_sequence) do |value|
+    [:opt, value[1][:definitions_list]]
+  end
+
+
+ +
+

[7] factor ::= (integer '*')? primary

+
+
  production(:repeated_sequence) do |value|
+    [:star, value[1][:definitions_list]]
+  end
+
+
+ +
+

[8] primary ::= optional_sequence +| repeated_sequence +| special_sequence +| grouped_sequence +| meta_identifier +| terminal_string +| empty

+
+
  production(:grouped_sequence) do |value|
+    [:seq, value[1][:definitions_list]]
+  end
+
+
+ +
+

[9] optional_sequence ::= start_option_symbol definitions_list end_option_symbol

+
+
  def initialize(input, **options, &block)
+
+
+ +
+

[10] repeated_sequence ::= start_repeat_symbol definitions_list end_repeat_symbol

+
+
    if options.has_key?(:level)
+      options[:logger] = Logger.new(STDERR)
+      options[:logger].level = options[:level]
+      options[:logger].formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"}
+    end
+
+
+ +
+

[11] grouped_sequence ::= '(' definitions_list ')'

+
+
    @input = input.respond_to?(:read) ? input.read : input.to_s
+
+    parsing_terminals = false
+    @ast = []
+    parse(@input, :syntax, ISOEBNFMeta::RULES,
+                           whitespace: %r{([\x09-\x0d\x20]|(?:\(\*(?:(?:\*[^\)])|[^*])*\*\)))+},
+                           **options
+    ) do |context, *data|
+      rule = case context
+      when :rule
+
+
+ +
+

Parser invocation.

+ +

On start, yield ourselves if a block is given, otherwise, return this parser instance

+ +

@param [#read, #to_s] input +@param [Hash{Symbol => Object}] options +@option options [Boolean] :level + Trace level. 0(debug), 1(info), 2(warn), 3(error). +@return [EBNFParser]

+
+
        rule = data.first
+        rule.kind = :terminal if parsing_terminals
+        rule
+      end
+      @ast << rule if rule
+    end
+    @ast
+  end
+
+
+ +
+

If the level option is set, instantiate a logger for collecting trace information.

+
+
  def to_sxp
+    require 'sxp' unless defined?(SXP)
+
+
+ +
+

Read input, if necessary, which will be used in a Scanner.

+
+
    SXP::Generator.string(@ast.map(&:for_sxp))
+  end
+end
+
+
+ +
+

A rule which has already been turned into a Rule object.

+
+
+
+
+ +
+

Output formatted S-Expression of grammar

+
+
+
+
+ +
+

Output rules as a formatted S-Expression

+ +
+
+
+
+ diff --git a/examples/isoebnf/examples/ebnf.isoebnf b/examples/isoebnf/examples/ebnf.isoebnf new file mode 100644 index 0000000..06e2fa3 --- /dev/null +++ b/examples/isoebnf/examples/ebnf.isoebnf @@ -0,0 +1,28 @@ +letter = "A" | "B" | "C" | "D" | "E" | "F" | "G" + | "H" | "I" | "J" | "K" | "L" | "M" | "N" + | "O" | "P" | "Q" | "R" | "S" | "T" | "U" + | "V" | "W" | "X" | "Y" | "Z" | "a" | "b" + | "c" | "d" | "e" | "f" | "g" | "h" | "i" + | "j" | "k" | "l" | "m" | "n" | "o" | "p" + | "q" | "r" | "s" | "t" | "u" | "v" | "w" + | "x" | "y" | "z" ; +digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; +symbol = "[" | "]" | "{" | "}" | "(" | ")" | "<" | ">" + | "'" | '"' | "=" | "|" | "." | "," | ";" ; +character = letter | digit | symbol | "_" ; + +identifier = letter , { letter | digit | "_" } ; +terminal = "'" , character , { character } , "'" + | '"' , character , { character } , '"' ; + +lhs = identifier ; +rhs = identifier + | terminal + | "[" , rhs , "]" + | "{" , rhs , "}" + | "(" , rhs , ")" + | rhs , "|" , rhs + | rhs , "," , rhs ; + +rule = lhs , "=" , rhs , ";" ; +grammar = { rule } ; diff --git a/examples/isoebnf/examples/html.isoebnf b/examples/isoebnf/examples/html.isoebnf new file mode 100644 index 0000000..035d0b3 --- /dev/null +++ b/examples/isoebnf/examples/html.isoebnf @@ -0,0 +1,77 @@ +(* from https://tomassetti.me/ebnf/ *) +htmlDocument + = {scriptlet | SEA_WS}, [xml], {scriptlet | SEA_WS}, [dtd], {scriptlet | SEA_WS}, {htmlElements} + ; + +htmlElements + : {htmlMisc}, htmlElement, {htmlMisc} + ; + +htmlElement + : TAG_OPEN, htmlTagName, {htmlAttribute}, TAG_CLOSE, htmlContent, TAG_OPEN, TAG_SLASH, htmlTagName, TAG_CLOSE + | TAG_OPEN, htmlTagName, {htmlAttribute}, TAG_SLASH_CLOSE + | TAG_OPEN, htmlTagName, {htmlAttribute}, TAG_CLOSE + | scriptlet + | script + | style + ; + +htmlContent + : [htmlChardata], {(htmlElement | xhtmlCDATA | htmlComment), [htmlChardata]} + ; + +htmlAttribute + : htmlAttributeName, TAG_EQUALS, htmlAttributeValue + | htmlAttributeName + ; + +htmlAttributeName + : TAG_NAME + ; + +htmlAttributeValue + : ATTVALUE_VALUE + ; + +htmlTagName + : TAG_NAME + ; + +htmlChardata + : HTML_TEXT + | SEA_WS + ; + +htmlMisc + : htmlComment + | SEA_WS + ; + +htmlComment + : HTML_COMMENT + | HTML_CONDITIONAL_COMMENT + ; + +xhtmlCDATA + : CDATA + ; + +dtd + : DTD + ; + +xml + : XML_DECLARATION + ; + +scriptlet + : SCRIPTLET + ; + +script + : SCRIPT_OPEN, ( SCRIPT_BODY | SCRIPT_SHORT_BODY) + ; + +style + : STYLE_OPEN, ( STYLE_BODY | STYLE_SHORT_BODY) + ; \ No newline at end of file diff --git a/examples/isoebnf/examples/iso-ebnf.isoebnf b/examples/isoebnf/examples/iso-ebnf.isoebnf new file mode 100644 index 0000000..7ae2aa3 --- /dev/null +++ b/examples/isoebnf/examples/iso-ebnf.isoebnf @@ -0,0 +1,99 @@ +(* W3C EBNF for ISO/IEC 14977 : 1996 EBNF *) +(* Scoured from https://www.cl.cam.ac.uk/~mgk25/iso-14977.pdf *) + +syntax = {syntax_rule} ; + +syntax_rule = meta_identifier, defining_symbol, definitions_list, terminator_symbol ; + +definitions_list = single_definition, {definition_separator_symbol, definitions_list} ; + +single_definition = term, {',', term} ; + +term = factor, ['-', exception] ; + +exception = factor ; + +factor = [integer, '*'], primary ; + +primary = optional_sequence + | repeated_sequence + | special_sequence + | grouped_sequence + | meta_identifier + | terminal_string + | empty + ; + +optional_sequence = start_option_symbol, definitions_list, end_option_symbol ; + +repeated_sequence = start_repeat_symbol, definitions_list, end_repeat_symbol ; + +grouped_sequence = '(', definitions_list, ')' ; + +letter = "A" | "B" | "C" | "D" | "E" | "F" | "G" + | "H" | "I" | "J" | "K" | "L" | "M" | "N" + | "O" | "P" | "Q" | "R" | "S" | "T" | "U" + | "V" | "W" | "X" | "Y" | "Z" | "a" | "b" + | "c" | "d" | "e" | "f" | "g" | "h" | "i" + | "j" | "k" | "l" | "m" | "n" | "o" | "p" + | "q" | "r" | "s" | "t" | "u" | "v" | "w" + | "x" | "y" | "z" + ; + +decimal_digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; + +integer = decimal_digit, {decimal_digit} ; + +meta_identifier = letter, {meta_identifier_character} ; + +(* Extended to allow '_' *) +meta_identifier_character = letter | decimal_digit | '_' ; + +terminal_string = ("'", first_terminal_character, {first_terminal_character}, "'") + | ('"', second_terminal_character, {second_terminal_character}, '"') + ; + +first_terminal_character = terminal_character - "'" ; + +second_terminal_character = terminal_character - '"' ; + +special_sequence = '?', {special_sequence_character}, '?' ; + +special_sequence_character = terminal_character - '?' ; + +terminal_character = letter + | decimal_digit + | concatenate_symbol + | defining_symbol + | definition_separator_symbol + | end_comment_symbol + | end_group_symbol + | end_option_symbol + | end_repeat_symbol + | except_symbol + | first_quote_symbol + | repetition_symbol + | second_quote_symbol + | special_sequence_symbol + | start_comment_symbol + | start_group_symbol + | start_option_symbol + | start_repeat_symbol + | terminator_symbol + | other_character + ; + +other_character = ' ' | ':' | '+' | '_' | '%' | '@' | '&' + | '#' | '$' | '<' | '>' | '\' | '^' | '`' + | '~' ; + +empty = ; + +(* Simple terminals that are often extended *) +defining_symbol = '=' | ':' ; +definition_separator_symbol = '|' | '/' | '!' ; +terminator_symbol = ';' | '.' ; +start_option_symbol = '[' | '(/' ; +end_option_symbol = ']' | '/)' ; +start_repeat_symbol = '{' | '(:' ; +end_repeat_symbol = '}' | ':)' ; diff --git a/examples/isoebnf/examples/pascall.isoebnf b/examples/isoebnf/examples/pascall.isoebnf new file mode 100644 index 0000000..acd114b --- /dev/null +++ b/examples/isoebnf/examples/pascall.isoebnf @@ -0,0 +1,17 @@ + (* a simple program syntax in EBNF − Wikipedia *) + program = 'PROGRAM', white_space, identifier, white_space, + 'BEGIN', white_space, + { assignment, ";", white_space }, + 'END.' ; + identifier = alphabetic_character, { alphabetic_character | digit } ; + number = [ "-" ], digit, { digit } ; + string = '"' , { all_characters - '"' }, '"' ; + assignment = identifier , ":=" , ( number | identifier | string ) ; + alphabetic_character = "A" | "B" | "C" | "D" | "E" | "F" | "G" + | "H" | "I" | "J" | "K" | "L" | "M" | "N" + | "O" | "P" | "Q" | "R" | "S" | "T" | "U" + | "V" | "W" | "X" | "Y" | "Z" ; + digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; + white_space = ? white_space characters ? ; + all_characters = ? all visible characters ? ; + \ No newline at end of file diff --git a/examples/isoebnf/examples/postal-address.isoebnf b/examples/isoebnf/examples/postal-address.isoebnf new file mode 100644 index 0000000..2c36d5b --- /dev/null +++ b/examples/isoebnf/examples/postal-address.isoebnf @@ -0,0 +1,29 @@ +postal_address = name_part, street, zip_part ; + +name_part = {personal_part, SP}, last_name, [SP, suffix], CRLF + | personal_part, CRLF + ; + +personal_part = first_name | (initial, ".") ; +first_name = {ALPHA} ; +initial = ALPHA ; +last_name = {ALPHA} ; +suffix = ("Jr." | "Sr." | ("I" | "V" | "X"), {"I" | "V" | "X"}) ; + +street = [apt, SP], house_num, SP, street_name, CRLF ; +apt = DIGIT, [DIGIT, [DIGIT, [DIGIT]]] ; +house_num = (DIGIT | ALPHA), + [(DIGIT | ALPHA), + [(DIGIT | ALPHA), + [(DIGIT | ALPHA), + [(DIGIT | ALPHA), + [(DIGIT | ALPHA), + [(DIGIT | ALPHA), + [(DIGIT | ALPHA)]]]]]]] + ; +street_name = VCHAR, {VCHAR} ; + +zip_part = town_name, ",", SP, state, SP, [SP], zip_code, CRLF ; +town_name = (ALPHA | SP), {ALPHA | SP} ; +state = 2*ALPHA ; +zip_code = 5*DIGIT, ["-", 4*DIGIT] ; \ No newline at end of file diff --git a/examples/isoebnf/iso-ebnf.ebnf b/examples/isoebnf/iso-ebnf.ebnf new file mode 100644 index 0000000..5440e4c --- /dev/null +++ b/examples/isoebnf/iso-ebnf.ebnf @@ -0,0 +1,110 @@ +# W3C EBNF for ISO/IEC 14977 : 1996 EBNF +# (Scoured from https://www.cl.cam.ac.uk/~mgk25/iso-14977.pdf) + +[1] syntax ::= syntax_rule* + +[2] syntax_rule ::= meta_identifier defining_symbol definitions_list terminator_symbol + +[3] definitions_list ::= single_definition (definition_separator_symbol definitions_list)* + +[4] single_definition ::= term (',' term)* + +[5] term ::= factor ('-' exception)? + +[6] exception ::= factor + +[7] factor ::= (integer '*')? primary + +[8] primary ::= optional_sequence + | repeated_sequence + | special_sequence + | grouped_sequence + | meta_identifier + | terminal_string + | empty + +[9] optional_sequence ::= start_option_symbol definitions_list end_option_symbol + +[10] repeated_sequence ::= start_repeat_symbol definitions_list end_repeat_symbol + +[11] grouped_sequence ::= '(' definitions_list ')' + +# Note, the following are nominally terminal rules, +# although ISO EBNF does not really distinguish between non-terminal and terminal rules. + +@terminals + +[12] letter ::= [a-zA-Z] +[13] decimal_digit ::= [0-9] + +[14] integer ::= decimal_digit+ + +[15] meta_identifier ::= letter meta_identifier_character* + +# Extended to allow '_' +[16] meta_identifier_character ::= letter | decimal_digit | '_' + +[17] terminal_string ::= ("'" first_terminal_character+ "'") + | ('"' second_terminal_character+ '"') + +[18] first_terminal_character ::= terminal_character - "'" + +[19] second_terminal_character ::= terminal_character - '"' + +[20] special_sequence ::= '?' special_sequence_character* '?' + +[21] special_sequence_character ::= terminal_character - '?' + +[22] terminal_character ::= letter + | decimal_digit + | concatenate_symbol + | defining_symbol + | definition_separator_symbol + | end_comment_symbol + | end_group_symbol + | end_option_symbol + | end_repeat_symbol + | except_symbol + | first_quote_symbol + | repetition_symbol + | second_quote_symbol + | special_sequence_symbol + | start_comment_symbol + | start_group_symbol + | start_option_symbol + | start_repeat_symbol + | terminator_symbol + | other_character + +[23] other_character ::= [:+_%@&$<>^` ̃#x20#x23] | '\' + +[24] gap_separator ::= [#x9#xa#xb#xc#xd#x20] + +@pass gap_separator+ + +[25] empty ::= '' + +# Simple terminals that are often extended +[26] defining_symbol ::= '=' | ':' +[27] definition_separator_symbol ::= '|' | '/' | '!' +[28] terminator_symbol ::= ';' | '.' +[29] start_option_symbol ::= '[' | '(/' +[30] end_option_symbol ::= ']' | '/)' +[31] start_repeat_symbol ::= '{' | '(:' +[32] end_repeat_symbol ::= '}' | ':)' + +# Symbols described, but not actually used. + +[33] gap_free_symbol ::= (terminal_character - ['"]) + | terminal_string + +[34] repetition_symbol ::= '*' +[35] except_symbol ::= '-' +[36] concatenate_symbol ::= ',' +[37] first_quote_symbol ::= "'" +[38] second_quote_symbol ::= '"' +[39] start_comment_symbol ::= '(*' +[40] end_comment_symbol ::= '*)' +[41] start_group_symbol ::= '(' +[42] end_group_symbol ::= ')' +[43] special_sequence_symbol ::= '?' diff --git a/examples/isoebnf/iso-ebnf.peg.sxp b/examples/isoebnf/iso-ebnf.peg.sxp new file mode 100644 index 0000000..2f85a2e --- /dev/null +++ b/examples/isoebnf/iso-ebnf.peg.sxp @@ -0,0 +1,74 @@ +( + (pass _pass (plus gap_separator)) + (rule syntax "1" (star syntax_rule)) + (rule syntax_rule "2" + (seq meta_identifier defining_symbol definitions_list terminator_symbol)) + (rule definitions_list "3" (seq single_definition _definitions_list_1)) + (rule _definitions_list_1 "3.1" (star _definitions_list_2)) + (rule _definitions_list_2 "3.2" (seq definition_separator_symbol definitions_list)) + (rule single_definition "4" (seq term _single_definition_1)) + (rule _single_definition_1 "4.1" (star _single_definition_2)) + (rule _single_definition_2 "4.2" (seq "," term)) + (rule term "5" (seq factor _term_1)) + (rule _term_1 "5.1" (opt _term_2)) + (rule _term_2 "5.2" (seq "-" exception)) + (rule exception "6" (seq factor)) + (rule factor "7" (seq _factor_1 primary)) + (rule _factor_1 "7.1" (opt _factor_2)) + (rule _factor_2 "7.2" (seq integer "*")) + (rule primary "8" + (alt optional_sequence repeated_sequence special_sequence grouped_sequence + meta_identifier terminal_string empty )) + (rule optional_sequence "9" + (seq start_option_symbol definitions_list end_option_symbol)) + (rule repeated_sequence "10" + (seq start_repeat_symbol definitions_list end_repeat_symbol)) + (rule grouped_sequence "11" (seq "(" definitions_list ")")) + (terminal letter "12" (range "a-zA-Z")) + (terminal decimal_digit "13" (range "0-9")) + (terminal integer "14" (plus decimal_digit)) + (terminal meta_identifier "15" (seq letter _meta_identifier_1)) + (rule _meta_identifier_1 "15.1" (star meta_identifier_character)) + (terminal meta_identifier_character "16" (alt letter decimal_digit "_")) + (terminal terminal_string "17" (alt _terminal_string_1 _terminal_string_2)) + (rule _terminal_string_1 "17.1" (seq "'" _terminal_string_3 "'")) + (rule _terminal_string_2 "17.2" (seq "\"" _terminal_string_4 "\"")) + (rule _terminal_string_3 "17.3" (plus first_terminal_character)) + (rule _terminal_string_4 "17.4" (plus second_terminal_character)) + (terminal first_terminal_character "18" (diff terminal_character "'")) + (terminal second_terminal_character "19" (diff terminal_character "\"")) + (terminal special_sequence "20" (seq "?" _special_sequence_1 "?")) + (rule _special_sequence_1 "20.1" (star special_sequence_character)) + (terminal special_sequence_character "21" (diff terminal_character "?")) + (terminal terminal_character "22" + (alt letter decimal_digit concatenate_symbol defining_symbol + definition_separator_symbol end_comment_symbol end_group_symbol + end_option_symbol end_repeat_symbol except_symbol first_quote_symbol + repetition_symbol second_quote_symbol special_sequence_symbol + start_comment_symbol start_group_symbol start_option_symbol + start_repeat_symbol terminator_symbol other_character )) + (terminal other_character "23" (alt _other_character_1 "\\")) + (terminal _other_character_1 "23.1" (range ":+_%@&$<>^` ̃#x20#x23")) + (terminal gap_separator "24" (range "#x9#xa#xb#xc#xd#x20")) + (terminal empty "25" (seq ())) + (terminal defining_symbol "26" (alt "=" ":")) + (terminal definition_separator_symbol "27" (alt "|" "/" "!")) + (terminal terminator_symbol "28" (alt ";" ".")) + (terminal start_option_symbol "29" (alt "[" "(/")) + (terminal end_option_symbol "30" (alt "]" "/)")) + (terminal start_repeat_symbol "31" (alt "{" "(:")) + (terminal end_repeat_symbol "32" (alt "}" ":)")) + (terminal gap_free_symbol "33" (alt _gap_free_symbol_1 terminal_string)) + (rule _gap_free_symbol_1 "33.1" (seq _gap_free_symbol_3 terminal_character)) + (terminal _gap_free_symbol_2 "33.2" (range "'\"")) + (rule _gap_free_symbol_3 "33.3" (not _gap_free_symbol_2)) + (terminal repetition_symbol "34" (seq "*")) + (terminal except_symbol "35" (seq "-")) + (terminal concatenate_symbol "36" (seq ",")) + (terminal first_quote_symbol "37" (seq "'")) + (terminal second_quote_symbol "38" (seq "\"")) + (terminal start_comment_symbol "39" (seq "(*")) + (terminal end_comment_symbol "40" (seq "*)")) + (terminal start_group_symbol "41" (seq "(")) + (terminal end_group_symbol "42" (seq ")")) + (terminal special_sequence_symbol "43" (seq "?"))) diff --git a/examples/isoebnf/iso-ebnf.sxp b/examples/isoebnf/iso-ebnf.sxp new file mode 100644 index 0000000..cce7932 --- /dev/null +++ b/examples/isoebnf/iso-ebnf.sxp @@ -0,0 +1,61 @@ +( + (pass _pass (plus gap_separator)) + (rule syntax "1" (star syntax_rule)) + (rule syntax_rule "2" + (seq meta_identifier defining_symbol definitions_list terminator_symbol)) + (rule definitions_list "3" + (seq single_definition (star (seq definition_separator_symbol definitions_list)))) + (rule single_definition "4" (seq term (star (seq "," term)))) + (rule term "5" (seq factor (opt (seq "-" exception)))) + (rule exception "6" (seq factor)) + (rule factor "7" (seq (opt (seq integer "*")) primary)) + (rule primary "8" + (alt optional_sequence repeated_sequence special_sequence grouped_sequence + meta_identifier terminal_string empty )) + (rule optional_sequence "9" + (seq start_option_symbol definitions_list end_option_symbol)) + (rule repeated_sequence "10" + (seq start_repeat_symbol definitions_list end_repeat_symbol)) + (rule grouped_sequence "11" (seq "(" definitions_list ")")) + (terminal letter "12" (range "a-zA-Z")) + (terminal decimal_digit "13" (range "0-9")) + (terminal integer "14" (plus decimal_digit)) + (terminal meta_identifier "15" (seq letter (star meta_identifier_character))) + (terminal meta_identifier_character "16" (alt letter decimal_digit "_")) + (terminal terminal_string "17" + (alt + (seq "'" (plus first_terminal_character) "'") + (seq "\"" (plus second_terminal_character) "\"")) ) + (terminal first_terminal_character "18" (diff terminal_character "'")) + (terminal second_terminal_character "19" (diff terminal_character "\"")) + (terminal special_sequence "20" (seq "?" (star special_sequence_character) "?")) + (terminal special_sequence_character "21" (diff terminal_character "?")) + (terminal terminal_character "22" + (alt letter decimal_digit concatenate_symbol defining_symbol + definition_separator_symbol end_comment_symbol end_group_symbol + end_option_symbol end_repeat_symbol except_symbol first_quote_symbol + repetition_symbol second_quote_symbol special_sequence_symbol + start_comment_symbol start_group_symbol start_option_symbol + start_repeat_symbol terminator_symbol other_character )) + (terminal other_character "23" (alt (range ":+_%@&$<>^` ̃#x20#x23") "\\")) + (terminal gap_separator "24" (range "#x9#xa#xb#xc#xd#x20")) + (terminal empty "25" (seq ())) + (terminal defining_symbol "26" (alt "=" ":")) + (terminal definition_separator_symbol "27" (alt "|" "/" "!")) + (terminal terminator_symbol "28" (alt ";" ".")) + (terminal start_option_symbol "29" (alt "[" "(/")) + (terminal end_option_symbol "30" (alt "]" "/)")) + (terminal start_repeat_symbol "31" (alt "{" "(:")) + (terminal end_repeat_symbol "32" (alt "}" ":)")) + (terminal gap_free_symbol "33" + (alt (diff terminal_character (range "'\"")) terminal_string)) + (terminal repetition_symbol "34" (seq "*")) + (terminal except_symbol "35" (seq "-")) + (terminal concatenate_symbol "36" (seq ",")) + (terminal first_quote_symbol "37" (seq "'")) + (terminal second_quote_symbol "38" (seq "\"")) + (terminal start_comment_symbol "39" (seq "(*")) + (terminal end_comment_symbol "40" (seq "*)")) + (terminal start_group_symbol "41" (seq "(")) + (terminal end_group_symbol "42" (seq ")")) + (terminal special_sequence_symbol "43" (seq "?"))) diff --git a/examples/isoebnf/meta.rb b/examples/isoebnf/meta.rb new file mode 100644 index 0000000..c903214 --- /dev/null +++ b/examples/isoebnf/meta.rb @@ -0,0 +1,69 @@ +# This file is automatically generated by ebnf version 2.0.0 +# Derived from iso-ebnf.ebnf +module ISOEBNFMeta + RULES = [ + EBNF::Rule.new(:syntax, "1", [:star, :syntax_rule]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:syntax_rule, "2", [:seq, :meta_identifier, :defining_symbol, :definitions_list, :terminator_symbol]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:definitions_list, "3", [:seq, :single_definition, :_definitions_list_1]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_definitions_list_1, "3.1", [:star, :_definitions_list_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_definitions_list_2, "3.2", [:seq, :definition_separator_symbol, :definitions_list]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:single_definition, "4", [:seq, :term, :_single_definition_1]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_single_definition_1, "4.1", [:star, :_single_definition_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_single_definition_2, "4.2", [:seq, ",", :term]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:term, "5", [:seq, :factor, :_term_1]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_term_1, "5.1", [:opt, :_term_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_term_2, "5.2", [:seq, "-", :exception]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:exception, "6", [:seq, :factor]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:factor, "7", [:seq, :_factor_1, :primary]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_factor_1, "7.1", [:opt, :_factor_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_factor_2, "7.2", [:seq, :integer, "*"]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:primary, "8", [:alt, :optional_sequence, :repeated_sequence, :special_sequence, :grouped_sequence, :meta_identifier, :terminal_string, :empty]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:optional_sequence, "9", [:seq, :start_option_symbol, :definitions_list, :end_option_symbol]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:repeated_sequence, "10", [:seq, :start_repeat_symbol, :definitions_list, :end_repeat_symbol]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:grouped_sequence, "11", [:seq, "(", :definitions_list, ")"]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:letter, "12", [:range, "a-zA-Z"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:decimal_digit, "13", [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:integer, "14", [:plus, :decimal_digit], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:meta_identifier, "15", [:seq, :letter, :_meta_identifier_1], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_meta_identifier_1, "15.1", [:star, :meta_identifier_character]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:meta_identifier_character, "16", [:alt, :letter, :decimal_digit, "_"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:terminal_string, "17", [:alt, :_terminal_string_1, :_terminal_string_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_terminal_string_1, "17.1", [:seq, "'", :_terminal_string_3, "'"]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_terminal_string_3, "17.3", [:plus, :first_terminal_character]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_terminal_string_2, "17.2", [:seq, "\"", :_terminal_string_4, "\""]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_terminal_string_4, "17.4", [:plus, :second_terminal_character]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:first_terminal_character, "18", [:diff, :terminal_character, "'"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:second_terminal_character, "19", [:diff, :terminal_character, "\""], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:special_sequence, "20", [:seq, "?", :_special_sequence_1, "?"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_special_sequence_1, "20.1", [:star, :special_sequence_character]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:special_sequence_character, "21", [:diff, :terminal_character, "?"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:terminal_character, "22", [:alt, :letter, :decimal_digit, :concatenate_symbol, :defining_symbol, :definition_separator_symbol, :end_comment_symbol, :end_group_symbol, :end_option_symbol, :end_repeat_symbol, :except_symbol, :first_quote_symbol, :repetition_symbol, :second_quote_symbol, :special_sequence_symbol, :start_comment_symbol, :start_group_symbol, :start_option_symbol, :start_repeat_symbol, :terminator_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:other_character, "23", [:alt, :_other_character_1, "\\"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_other_character_1, "23.1", [:range, ":+_%@&$<>^` ̃#x20#x23"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:gap_separator, "24", [:range, "#x9#xa#xb#xc#xd#x20"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_pass, nil, [:plus, :gap_separator], kind: :pass).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:empty, "25", [:seq, []], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:defining_symbol, "26", [:alt, "=", ":"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:definition_separator_symbol, "27", [:alt, "|", "/", "!"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:terminator_symbol, "28", [:alt, ";", "."], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:start_option_symbol, "29", [:alt, "[", "(/"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:end_option_symbol, "30", [:alt, "]", "/)"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:start_repeat_symbol, "31", [:alt, "{", "(:"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:end_repeat_symbol, "32", [:alt, "}", ":)"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:gap_free_symbol, "33", [:alt, :_gap_free_symbol_1, :terminal_string], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_gap_free_symbol_1, "33.1", [:seq, :_gap_free_symbol_3, :terminal_character]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_gap_free_symbol_3, "33.3", [:not, :_gap_free_symbol_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_gap_free_symbol_2, "33.2", [:range, "'\""], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:repetition_symbol, "34", [:seq, "*"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:except_symbol, "35", [:seq, "-"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:concatenate_symbol, "36", [:seq, ","], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:first_quote_symbol, "37", [:seq, "'"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:second_quote_symbol, "38", [:seq, "\""], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:start_comment_symbol, "39", [:seq, "(*"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:end_comment_symbol, "40", [:seq, "*)"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:start_group_symbol, "41", [:seq, "("], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:end_group_symbol, "42", [:seq, ")"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:special_sequence_symbol, "43", [:seq, "?"], kind: :terminal).extend(EBNF::PEG::Rule), + ] +end + diff --git a/examples/isoebnf/parse b/examples/isoebnf/parse new file mode 100755 index 0000000..c00eae1 --- /dev/null +++ b/examples/isoebnf/parse @@ -0,0 +1,53 @@ +#!/usr/bin/env ruby +# parse --- Process EBNF to generate AST S-Expression + +$:.unshift(File.expand_path("../../../lib", __FILE__)) +$:.unshift(File.expand_path("..", __FILE__)) +require 'rubygems' +require 'getoptlong' +require 'parser' +require 'sxp' + +out = STDOUT + +OPT_ARGS = [ + ["--evaluate","-e", GetoptLong::REQUIRED_ARGUMENT, "Evaluate argument"], + ["--trace", GetoptLong::OPTIONAL_ARGUMENT, "Trace output level (0-3)"], + ["--help", "-?", GetoptLong::NO_ARGUMENT, "This message"] +] +def usage + require 'ebnf' + STDERR.puts %{#{$0} Version #{EBNF::VERSION}} + STDERR.puts %{Usage: #{$0} [options] file ...} + width = OPT_ARGS.map do |o| + l = o.first.length + l += o[1].length + 2 if o[1].is_a?(String) + l + end.max + OPT_ARGS.each do |o| + s = " %-*s " % [width, (o[1].is_a?(String) ? "#{o[0,2].join(', ')}" : o[0])] + s += o.last + STDERR.puts s + end + exit(1) +end + +options = {} +input = nil + +opts = GetoptLong.new(*OPT_ARGS.map {|o| o[0..-2]}) + +opts.each do |opt, arg| + case opt + when '--evaluate' then input = arg + when '--trace' then options[:level] = arg.to_i + when '--help' then usage + end +end + +input = File.open(ARGV[0]) if ARGV[0] + +# Collect rules +ebnf = ISOEBNFPegParser.new(input || STDIN, **options) + +puts ebnf.to_sxp diff --git a/examples/isoebnf/parser.rb b/examples/isoebnf/parser.rb new file mode 100644 index 0000000..9741957 --- /dev/null +++ b/examples/isoebnf/parser.rb @@ -0,0 +1,239 @@ +# # EBNF Parser for EBNF. +# +# Produces an Abstract Synatx Tree in S-Expression form for the input grammar file +require 'ebnf' +require 'ebnf/terminals' +require 'ebnf/peg/parser' +require 'meta' +require 'sxp' +require 'logger' + +class ISOEBNFPegParser + include EBNF::PEG::Parser + + # The base for terminal-character, which omits "'", '"', and '?'. + # Could be more optimized, and one might quible + # with the overly-strictly defined character set, + # but it is correct. + TERMINAL_CHARACTER_BASE = %r{ + [a-zA-Z0-9] | # letter | decimal digit + , | # concatenate symbol + = | # defining symbol + [\|\/!] | # definition separator symbol + \*\) | # end comment symbol + \) | # end group symbol + \] | # end option symbol + \} | # end repeat symbol + \- | # except symbol + #\' | # first quote symbol + \* | # repetition symbol + #\" | # second quote symbol + #\? | # special sequence symbol + \(\* | # start comment symbol + \( | # start group symbol + \[ | # start option symbol + \{ | # start repeat symbol + [;\.] | # terminator symbol + [:+_%@&$<>^\x20\x23\\`~] # other character + }x + + TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"\?]} + FIRST_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|["\?]} + SECOND_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['\?]} + SPECIAL_SEQUENCE_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"]} + + # Abstract syntax tree from parse + # + # @return [Array] + attr_reader :ast + + # `[14] integer ::= decimal_digit+` + terminal(:integer, /\d+/) do |value, prod| + value.to_i + end + + # `[15] meta_identifier ::= letter meta_identifier_character*` + terminal(:meta_identifier, /[a-zA-Z][a-zA-Z0-9_]*/) do |value| + value.to_sym + end + + # `[17] terminal_string ::= ("'" first_terminal_character+ "'")` + # ` | ('"' second_terminal_character+ '"')` + terminal(:terminal_string, /(?:'#{FIRST_TERMINAL_CHARACTER}+')|(?:"#{SECOND_TERMINAL_CHARACTER}+")/x) do |value| + value[1..-2] + end + + # `[20] special_sequence ::= '?' special_sequence_character* '?'` + terminal(:special_sequence, /\?#{SPECIAL_SEQUENCE_CHARACTER}+\?/) + + # `[22] terminal_character ::= [a-zA-Z0-9]` + # ` | [,=;*}#x2d?([{;]` + # ` | '*)'` + # ` | '(*'` + # ` | ']'` + # ` | other_character` + terminal(:terminal_character, TERMINAL_CHARACTER) + + # `[25] empty ::= ''` + terminal(:empty, //) + + # `[26] definition_separator_symbol ::= '|' | '/' | '!'` + terminal(:definition_separator_symbol, /[\|\/!]/) + + # `[27] terminator_symbol ::= ';' | '.'` + terminal(:terminator_symbol, /[;\.]/) + + # `[28] start_option_symbol ::= '[' | '(/'` + terminal(:start_option_symbol, /\[|\(\//) + + # `[29] end_option_symbol ::= ']' | '/)'` + terminal(:end_option_symbol, /[\]\/]/) + + # `[30] start_repeat_symbol ::= '{' | '(:'` + terminal(:start_repeat_symbol, /{|\(:/) + + # `[31] end_repeat_symbol ::= '}' | ':)'` + terminal(:end_repeat_symbol, /}|:\)/) + + # ## Non-terminal productions + + # `[2] syntax_rule ::= meta_identifier '=' definitions_list terminator_symbol` + production(:syntax_rule, clear_packrat: true) do |value, data, callback| + # value contains an expression. + # Invoke callback + sym = value[0][:meta_identifier] + definitions_list = value[2][:definitions_list] + callback.call(:rule, EBNF::Rule.new(sym.to_sym, nil, definitions_list)) + nil + end + + # `[3] definitions_list ::= single_definition (definition_separator_symbol definitions_list)*` + production(:definitions_list) do |value| + if value.last[:_definitions_list_1].length > 0 + [:alt, value.first[:single_definition]] + value.last[:_definitions_list_1] + else + value.first[:single_definition] + end + end + production(:_definitions_list_1) do |value| + Array(value.first) + end + production(:_definitions_list_2) do |value| + if Array(value.last[:definitions_list]).first == :alt + value.last[:definitions_list][1..-1] + else + [value.last[:definitions_list]] + end + end + + # `[4] single_definition ::= term (',' term)*` + production(:single_definition) do |value| + if value.last[:_single_definition_1].length > 0 + [:seq, value.first[:term]] + value.last[:_single_definition_1] + else + value.first[:term] + end + end + production(:_single_definition_1) do |value| + value.map {|a1| a1.last[:term]}.compact # Get rid of '|' + end + + # `[5] term ::= factor ('-' exception)?` + production(:term) do |value| + if value.last[:_diff_1] + [:diff, value.first[:postfix], value.last[:_term_1]] + else + value.first[:factor] + end + end + production(:_term_1) do |value| + value.last[:exception] if value + end + + # `[6] exception ::= factor` + production(:exception) do |value| + value.first[:factor] + end + + # `[7] factor ::= (integer '*')? primary` + production(:factor) do |value| + if value.first[:_factor_1] + [:rept, value.first[:_factor_1], value.first[:_factor_1], value.last[:primary]] + else + value.last[:primary] + end + end + production(:_factor_2) do |value| + value.first[:integer] + end + + # `[8] primary ::= optional_sequence` + # ` | repeated_sequence` + # ` | special_sequence` + # ` | grouped_sequence` + # ` | meta_identifier` + # ` | terminal_string` + # ` | empty` + production(:primary) do |value| + value + end + + # `[9] optional_sequence ::= start_option_symbol definitions_list end_option_symbol` + production(:optional_sequence) do |value| + [:opt, value[1][:definitions_list]] + end + + # `[10] repeated_sequence ::= start_repeat_symbol definitions_list end_repeat_symbol` + production(:repeated_sequence) do |value| + [:star, value[1][:definitions_list]] + end + + # `[11] grouped_sequence ::= '(' definitions_list ')'` + production(:grouped_sequence) do |value| + [:seq, value[1][:definitions_list]] + end + + # ## Parser invocation. + # On start, yield ourselves if a block is given, otherwise, return this parser instance + # + # @param [#read, #to_s] input + # @param [Hash{Symbol => Object}] options + # @option options [Boolean] :level + # Trace level. 0(debug), 1(info), 2(warn), 3(error). + # @return [EBNFParser] + def initialize(input, **options, &block) + # If the `level` option is set, instantiate a logger for collecting trace information. + if options.has_key?(:level) + options[:logger] = Logger.new(STDERR) + options[:logger].level = options[:level] + options[:logger].formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"} + end + + # Read input, if necessary, which will be used in a Scanner. + @input = input.respond_to?(:read) ? input.read : input.to_s + + parsing_terminals = false + @ast = [] + parse(@input, :syntax, ISOEBNFMeta::RULES, + whitespace: %r{([\x09-\x0d\x20]|(?:\(\*(?:(?:\*[^\)])|[^*])*\*\)))+}, + **options + ) do |context, *data| + rule = case context + when :rule + # A rule which has already been turned into a `Rule` object. + rule = data.first + rule.kind = :terminal if parsing_terminals + rule + end + @ast << rule if rule + end + @ast + end + + # Output formatted S-Expression of grammar + def to_sxp + require 'sxp' unless defined?(SXP) + # Output rules as a formatted S-Expression + SXP::Generator.string(@ast.map(&:for_sxp)) + end +end diff --git a/lib/ebnf/terminals.rb b/lib/ebnf/terminals.rb index 68e969b..5b4ca6f 100644 --- a/lib/ebnf/terminals.rb +++ b/lib/ebnf/terminals.rb @@ -7,7 +7,7 @@ module EBNF::Terminals R_CHAR = %r([\u0009\u000A\u000D\u0020-\u005C\u005E-\uD7FF\u{10000}-\u{10FFFF}])u.freeze RANGE = %r(\[(?:(?:#{R_CHAR})\-(?:#{R_CHAR})|(?:#{HEX})-(?:#{HEX}))\])u.freeze ENUM_BASE = %r(\[(?:(?:#{R_CHAR})+|(?:#{HEX})+)\])u.freeze - ENUM = %r((?:#{ENUM_BASE})(?!\s+#{SYMBOL}))u.freeze + ENUM = %r((?:#{ENUM_BASE})(?!\s+#{SYMBOL}\s*::=))u.freeze LHS = %r(\[(?:(?:#{SYMBOL})+\]\s+)?(?:#{SYMBOL})\s*::=)u.freeze O_RANGE = %r(\[^(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}-#{HEX})\])u.freeze O_ENUM = %r(\[^(?:#{R_CHAR})+\])u.freeze From 843f0ace81de2f972106a7a0fdfddd9057267414 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Sat, 27 Jun 2020 12:07:11 -0700 Subject: [PATCH 05/50] Don't eat whitespace inside terminals in PEG::Rules. --- examples/isoebnf/README.md | 6 ++++-- lib/ebnf/peg/rule.rb | 6 +++--- spec/peg/parser_spec.rb | 3 ++- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/examples/isoebnf/README.md b/examples/isoebnf/README.md index 9197188..c0590ac 100644 --- a/examples/isoebnf/README.md +++ b/examples/isoebnf/README.md @@ -66,12 +66,14 @@ This generates a S-Expression form of the grammar suitable for use by {EBNF}. (rule start_repeat_symbol (alt "{" "(:")) (rule end_repeat_symbol (alt "}" ":)"))) -Note, however, that ISO EBNF doesn't distinguish between terminal rules and non-terminal rules, so all rules are parsed as non-terminal rules with strings the only terminals. Whereas, the {file:iso-ebnf.ebnf W3C EBNF definition of the grammar} does use terminal rules. - This can then be used as input to {EBNF.parse} to transform EBNF to PEG for parsing examples of the grammar using {EBNF::PEG::Parser}. ebnf --input-format sxp --peg ebnf.sxp -o ebnf.peg.sxp +Note, however, that ISO EBNF doesn't distinguish between terminal rules and non-terminal rules, so all rules are parsed as non-terminal rules with strings the only terminals. Whereas, the {file:iso-ebnf.ebnf W3C EBNF definition of the grammar} does use terminal rules. + +When parsing files with this grammar, rules that are all capitalized _will_ be treated as terminal productions, although this is an proprietary interpretation of the specification. + ## Example Walkthrough This example uses the EBNF grammar from {file:iso-ebnf.ebnf} to generate {file:meta}, which includes the resulting `RULES` table, used by {file:parser} to implement a parser for the grammar. diff --git a/lib/ebnf/peg/rule.rb b/lib/ebnf/peg/rule.rb index f0a2045..ee56edc 100644 --- a/lib/ebnf/peg/rule.rb +++ b/lib/ebnf/peg/rule.rb @@ -144,7 +144,7 @@ def parse(input) when :seq # Evaluate each expression into an array of hashes where each hash contains a key from the associated production and the value is the parsed value of that production. Returns :unmatched if the input does not match the production. Value ordering is ensured by native Hash ordering. seq = expr[1..-1].each_with_object([]) do |prod, accumulator| - eat_whitespace(input) unless accumulator.empty? + eat_whitespace(input) unless accumulator.empty? || terminal? res = case prod when Symbol rule = parser.find_rule(prod) @@ -209,12 +209,12 @@ def rept(input, min, max, prod) rule = parser.find_rule(prod) raise "No rule found for #{prod}" unless rule while (res = rule.parse(input)) != :unmatched && (max == '*' || result.length < max) - eat_whitespace(input) + eat_whitespace(input) unless terminal? result << res end when String while (res = input.scan(Regexp.new(Regexp.quote(prod)))) && (max == '*' || result.length < max) - eat_whitespace(input) + eat_whitespace(input) unless terminal? result << res end end diff --git a/spec/peg/parser_spec.rb b/spec/peg/parser_spec.rb index 1df502f..5fffbc5 100644 --- a/spec/peg/parser_spec.rb +++ b/spec/peg/parser_spec.rb @@ -81,9 +81,10 @@ class PegParserTest { "" => %r{syntax error, expecting "0-9", :integer }, - "10 x 1" => %r{syntax error, expecting "0-9", "\+", :operator}, + "10 x 1" => %r{syntax error, expecting "\+", :operator}, "1-1" => %r{syntax error, expecting "0-9", "\+", :operator}, "foo" => %r{syntax error, expecting "0-9", :integer}, + "3 1 + 2" => %r{syntax error, expecting "\+", :operator} }.each do |input, expected| it "fails to parse #{input.inspect} to #{expected.inspect}" do expect { From 4e19c66fad44717be148c2bdf7c33537ad4b45ed Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Sat, 27 Jun 2020 12:57:48 -0700 Subject: [PATCH 06/50] Allow `start_production` to take an option, which can include `as_hash: true`, which is provided to `PEG::Rule#parse` on the return from `PEG::Parser#onStart`. This allows `seq` matching to return a hash rather than an array of hashes. --- examples/ebnf-peg-parser/doc/parser.html | 124 ++++++++------------ examples/ebnf-peg-parser/meta.rb | 6 +- examples/ebnf-peg-parser/parser.rb | 38 ++++--- examples/isoebnf/doc/parser.html | 113 +++++++++--------- examples/isoebnf/parser.rb | 51 ++++----- lib/ebnf/peg/parser.rb | 12 +- lib/ebnf/peg/rule.rb | 29 +++-- spec/peg/data/parser.rb | 26 +++-- spec/peg/rule_spec.rb | 139 ++++++++++++++++++++++- 9 files changed, 324 insertions(+), 214 deletions(-) diff --git a/examples/ebnf-peg-parser/doc/parser.html b/examples/ebnf-peg-parser/doc/parser.html index fe223ff..37d74a4 100644 --- a/examples/ebnf-peg-parser/doc/parser.html +++ b/examples/ebnf-peg-parser/doc/parser.html @@ -474,29 +474,13 @@

EBNF Parser for EBNF.

require 'ebnf'
 require 'ebnf/terminals'
 require 'ebnf/peg/parser'
+require 'meta'
 require 'sxp'
 require 'logger'
 
 class EBNFPegParser
   include EBNF::PEG::Parser
-  include EBNF::Terminals
-
-  class ProdResult
-    attr_accessor :prod
-    attr_accessor :values
-
-    def initialize(prod, *values)
-      @prod, @values = prod, values
-    end
-
-    def to_ary
-      values.map {|v| v.respond_to?(:to_ary) ? v.to_ary : v}.unshift(@prod)
-    end
-
-    def inspect
-      "(#{prod} #{values.map(&:inspect).join(' ')})"
-    end
-  end
+ include EBNF::Terminals @@ -620,7 +604,7 @@

Terminals

Terminal for RANGE is matched as part of a primary rule.

-
[16] `RANGE`      ::= '[' (R_CHAR '-' R_CHAR) | (HEX - HEX) ']'
+
[16] `RANGE`      ::= '[' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']'
 
@@ -636,7 +620,7 @@

Terminals

Terminal for O_RANGE is matched as part of a primary rule.

-
[17] O_RANGE    ::= '[^' (R_CHAR '-' R_CHAR) | (HEX - HEX) ']'
+
[17] O_RANGE    ::= '[^' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']'
 
@@ -777,7 +761,7 @@

Non-terminal productions

Production for end of rule non-terminal.

-

The value parameter, is of the form [{LHS: "v"}, {expression: "v"}].

+

By setting as_hash: true in the start_production, the value parameter will be in the form {LHS: "v", expression: "v"}. Otherwise, it would be expressed using an array of hashes of the form [{LHS: "v"}, {expression: "v"}].

Clears the packrat parser when called.

@@ -787,7 +771,8 @@

Non-terminal productions

-
  production(:rule, clear_packrat: true) do |value, data, callback|
+
  start_production(:rule, as_hash: true)
+  production(:rule, clear_packrat: true) do |value, data, callback|
@@ -799,8 +784,8 @@

Non-terminal productions

Invoke callback

-
    id, sym = value.first[:LHS]
-    expression = value.last[:expression]
+        
    id, sym = value[:LHS]
+    expression = value[:expression]
     callback.call(:rule, EBNF::Rule.new(sym.to_sym, id, expression))
     nil
   end
@@ -836,7 +821,7 @@

Non-terminal productions

Production for end of alt non-terminal. Passes through the optimized value of the seq production as follows:

-

The value parameter, is of the form [{seq: "v"}, {_alt_1: "v"}].

+

The value parameter, is of the form {seq: "v", _alt_1: "v"}.

[:seq foo] => foo
 [:seq foo bar] => [:seq foo bar]
@@ -848,11 +833,12 @@ 

Non-terminal productions

-
  production(:alt) do |value|
-    if value.last[:_alt_1].length > 0
-      [:alt, value.first[:seq]] + value.last[:_alt_1]
+        
  start_production(:alt, as_hash: true)
+  production(:alt) do |value|
+    if value[:_alt_1].length > 0
+      [:alt, value[:seq]] + value[:_alt_1]
     else
-      value.first[:seq]
+      value[:seq]
     end
   end
@@ -908,17 +894,18 @@

Non-terminal productions

Diff production returns concatenated postfix values

-

The value parameter, is of the form [{postfix: "v"}, {_diff_1: "v"}].

+

The value parameter, is of the form {postfix: "v", _diff_1: "v"}.

[7] diff        ::= postfix ('-' postfix)?
 
-
  production(:diff) do |value|
-    if value.last[:_diff_1]
-      [:diff, value.first[:postfix], value.last[:_diff_1]]
+        
  start_production(:diff, as_hash: true)
+  production(:diff) do |value|
+    if value[:_diff_1]
+      [:diff, value[:postfix], value[:_diff_1]]
     else
-      value.first[:postfix]
+      value[:postfix]
     end
   end
 
@@ -935,7 +922,7 @@ 

Non-terminal productions

Production for end of postfix non-terminal. Either returns the primary production value, or as modified by the postfix.

-

The value parameter, is of the form [{primary: "v"}, {_postfix_1: "v"}].

+

The value parameter, is of the form {primary: "v", _postfix_1: "v"}.

[:primary] => [:primary]
 [:primary, '*'] => [:star, :primary]
@@ -946,7 +933,8 @@ 

Non-terminal productions

-
  production(:postfix) do |value|
+
  start_production(:postfix, as_hash: true)
+  production(:postfix) do |value|
@@ -957,11 +945,11 @@

Non-terminal productions

Push result onto input stack, as the diff production can have some number of postfix values that are applied recursively

-
    case value.last[:_postfix_1]
-    when "*" then [:star, value.first[:primary]]
-    when "+" then [:plus, value.first[:primary]]
-    when "?" then [:opt, value.first[:primary]]
-    else value.first[:primary]
+        
    case value[:_postfix_1]
+    when "*" then [:star, value[:primary]]
+    when "+" then [:plus, value[:primary]]
+    when "?" then [:opt, value[:primary]]
+    else value[:primary]
     end
   end
@@ -1045,28 +1033,6 @@

Parser invocation.

-

Intantiate grammar from ebnf.ebnf

- - -
    ebnf = File.expand_path("../../../etc/ebnf.peg.sxp", __FILE__)
- - - - -
- -
-

Perform PEG-specific transformation to the associated rules, which will be passed directly to the parser.

- - -
    rules = EBNF.parse(File.open(ebnf), format: :sxp).make_peg.ast
- - - - -
- -

If the level option is set, instantiate a logger for collecting trace information.

@@ -1077,10 +1043,10 @@

Parser invocation.

end
- +
- +

Read input, if necessary, which will be used in a Scanner.

@@ -1089,13 +1055,13 @@

Parser invocation.

parsing_terminals = false @ast = [] - parse(@input, :ebnf, rules,
+ parse(@input, :ebnf, EBNFPegMeta::RULES,
- +
- +

Use an optimized Regexp for whitespace

@@ -1107,24 +1073,24 @@

Parser invocation.

when :terminal
- +
- +

After parsing @terminals This changes the state of the parser to treat subsequent rules as terminals.

        parsing_terminals = true
-        rule = EBNF::Rule.new(nil, nil, data.first, kind: :terminal)
+        next
       when :pass
- +
- +

After parsing @pass This defines a specific rule for whitespace.

@@ -1134,10 +1100,10 @@

Parser invocation.

when :rule
- +
- +

A rule which has already been turned into a Rule object.

@@ -1152,10 +1118,10 @@

Parser invocation.

end - +
- +

Output formatted S-Expression of grammar

@@ -1164,10 +1130,10 @@

Parser invocation.

require 'sxp' unless defined?(SXP) - +
- +

Output rules as a formatted S-Expression

diff --git a/examples/ebnf-peg-parser/meta.rb b/examples/ebnf-peg-parser/meta.rb index bae601f..67f1d0c 100644 --- a/examples/ebnf-peg-parser/meta.rb +++ b/examples/ebnf-peg-parser/meta.rb @@ -51,12 +51,12 @@ module EBNFPegMeta EBNF::Rule.new(:_RANGE_1, "16.1", [:seq, "[", :_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_RANGE_3, "16.3", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_RANGE_2, "16.2", [:seq, :_RANGE_4, "]"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_RANGE_4, "16.4", [:diff, :HEX, :HEX], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_4, "16.4", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:O_RANGE, "17", [:alt, :_O_RANGE_1, :_O_RANGE_2], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_O_RANGE_1, "17.1", [:seq, "[^", :_O_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_O_RANGE_3, "17.3", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_O_RANGE_2, "17.2", [:seq, :_O_RANGE_4, "]"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_RANGE_4, "17.4", [:diff, :HEX, :HEX], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_4, "17.4", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:STRING1, "18", [:seq, "\"", :_STRING1_1, "\""], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_STRING1_1, "18.1", [:star, :_STRING1_2], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_STRING1_2, "18.2", [:diff, :CHAR, "\""], kind: :terminal).extend(EBNF::PEG::Rule), @@ -92,7 +92,7 @@ module EBNFPegMeta EBNF::Rule.new(:_PASS_20, "23.20", [:seq, "*", :_PASS_21], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_PASS_21, "23.21", [:range, "^)"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_PASS_19, "23.19", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(nil, nil, [:seq, :PASS], kind: :pass).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_pass, nil, [:seq, :PASS], kind: :pass).extend(EBNF::PEG::Rule), ] end diff --git a/examples/ebnf-peg-parser/parser.rb b/examples/ebnf-peg-parser/parser.rb index 79374e7..1ef3dd1 100644 --- a/examples/ebnf-peg-parser/parser.rb +++ b/examples/ebnf-peg-parser/parser.rb @@ -131,18 +131,19 @@ class EBNFPegParser # Production for end of `rule` non-terminal. # - # The `value` parameter, is of the form `[{LHS: "v"}, {expression: "v"}]`. + # By setting `as_hash: true` in the `start_production`, the `value` parameter will be in the form `{LHS: "v", expression: "v"}`. Otherwise, it would be expressed using an array of hashes of the form `[{LHS: "v"}, {expression: "v"}]`. # # Clears the packrat parser when called. # # Create rule from expression value and pass to callback # # [3] rule ::= LHS expression + start_production(:rule, as_hash: true) production(:rule, clear_packrat: true) do |value, data, callback| # value contains an expression. # Invoke callback - id, sym = value.first[:LHS] - expression = value.last[:expression] + id, sym = value[:LHS] + expression = value[:expression] callback.call(:rule, EBNF::Rule.new(sym.to_sym, id, expression)) nil end @@ -163,7 +164,7 @@ class EBNFPegParser # Production for end of `alt` non-terminal. # Passes through the optimized value of the seq production as follows: # - # The `value` parameter, is of the form `[{seq: "v"}, {_alt_1: "v"}]`. + # The `value` parameter, is of the form `{seq: "v", _alt_1: "v"}`. # # [:seq foo] => foo # [:seq foo bar] => [:seq foo bar] @@ -171,11 +172,12 @@ class EBNFPegParser # Note that this also may just pass through from `_alt_1` # # [5] alt ::= seq ('|' seq)* + start_production(:alt, as_hash: true) production(:alt) do |value| - if value.last[:_alt_1].length > 0 - [:alt, value.first[:seq]] + value.last[:_alt_1] + if value[:_alt_1].length > 0 + [:alt, value[:seq]] + value[:_alt_1] else - value.first[:seq] + value[:seq] end end @@ -206,14 +208,15 @@ class EBNFPegParser # `Diff` production returns concatenated postfix values # - # The `value` parameter, is of the form `[{postfix: "v"}, {_diff_1: "v"}]`. + # The `value` parameter, is of the form `{postfix: "v", _diff_1: "v"}`. # # [7] diff ::= postfix ('-' postfix)? + start_production(:diff, as_hash: true) production(:diff) do |value| - if value.last[:_diff_1] - [:diff, value.first[:postfix], value.last[:_diff_1]] + if value[:_diff_1] + [:diff, value[:postfix], value[:_diff_1]] else - value.first[:postfix] + value[:postfix] end end @@ -224,7 +227,7 @@ class EBNFPegParser # Production for end of `postfix` non-terminal. # Either returns the `primary` production value, or as modified by the `postfix`. # - # The `value` parameter, is of the form `[{primary: "v"}, {_postfix_1: "v"}]`. + # The `value` parameter, is of the form `{primary: "v", _postfix_1: "v"}`. # # [:primary] => [:primary] # [:primary, '*'] => [:star, :primary] @@ -232,13 +235,14 @@ class EBNFPegParser # [:primary, '?'] => [:opt, :primary] # # [8] postfix ::= primary POSTFIX? + start_production(:postfix, as_hash: true) production(:postfix) do |value| # Push result onto input stack, as the `diff` production can have some number of `postfix` values that are applied recursively - case value.last[:_postfix_1] - when "*" then [:star, value.first[:primary]] - when "+" then [:plus, value.first[:primary]] - when "?" then [:opt, value.first[:primary]] - else value.first[:primary] + case value[:_postfix_1] + when "*" then [:star, value[:primary]] + when "+" then [:plus, value[:primary]] + when "?" then [:opt, value[:primary]] + else value[:primary] end end diff --git a/examples/isoebnf/doc/parser.html b/examples/isoebnf/doc/parser.html index 4102d1a..acc74f0 100644 --- a/examples/isoebnf/doc/parser.html +++ b/examples/isoebnf/doc/parser.html @@ -515,7 +515,7 @@

EBNF Parser for EBNF.

\[ | # start option symbol \{ | # start repeat symbol [;\.] | # terminator symbol - [:+_%@&$<>^\x20\x23\\] # other character + [:+_%@&$<>^\x20\x23\\`~] # other character }x TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"\?]} @@ -721,21 +721,23 @@

EBNF Parser for EBNF.

Non-terminal productions

-
  production(:definitions_list) do |value|
-    if value.last[:_definitions_list_1].length > 0
-      [:alt, value.first[:single_definition]] + value.last[:_definitions_list_1]
+        
  start_production(:definitions_list, as_hash: true)
+  production(:definitions_list) do |value|
+    if value[:_definitions_list_1].length > 0
+      [:alt, value[:single_definition]] + value[:_definitions_list_1]
     else
-      value.first[:single_definition]
+      value[:single_definition]
     end
   end
   production(:_definitions_list_1) do |value|
     Array(value.first)
   end
+  start_production(:_definitions_list_2, as_hash: true)
   production(:_definitions_list_2) do |value|
-    if Array(value.last[:definitions_list]).first == :alt
-      value.last[:definitions_list][1..-1]
+    if Array(value[:definitions_list]).first == :alt
+      value[:definitions_list][1..-1]
     else
-      [value.last[:definitions_list]]
+      [value[:definitions_list]]
     end
   end
@@ -748,11 +750,12 @@

Non-terminal productions

[2] syntax_rule ::= meta_identifier '=' definitions_list terminator_symbol

-
  production(:single_definition) do |value|
-    if value.last[:_single_definition_1].length > 0
-      [:seq, value.first[:term]] + value.last[:_single_definition_1]
+        
  start_production(:single_definition, as_hash: true)
+  production(:single_definition) do |value|
+    if value[:_single_definition_1].length > 0
+      [:seq, value[:term]] + value[:_single_definition_1]
     else
-      value.first[:term]
+      value[:term]
     end
   end
   production(:_single_definition_1) do |value|
@@ -769,11 +772,12 @@ 

Non-terminal productions

Invoke callback

-
  production(:term) do |value|
-    if value.last[:_diff_1]
-      [:diff, value.first[:postfix], value.last[:_term_1]]
+        
  start_production(:term, as_hash: true)
+  production(:term) do |value|
+    if value[:_diff_1]
+      [:diff, value[:postfix], value[:_term_1]]
     else
-      value.first[:factor]
+      value[:factor]
     end
   end
   production(:_term_1) do |value|
@@ -786,11 +790,14 @@ 

Non-terminal productions

-

[3] definitions_list ::= single_definition (definition_separator_symbol definitions_list)*

+

Setting as_hash: true in the start production makes the value of the form of a hash, rather than an array of hashes.

+ +

[3] definitions_list ::= single_definition (definition_separator_symbol definitions_list)*

-
  production(:exception) do |value|
-    value.first[:factor]
+        
  start_production(:exception, as_hash: true)
+  production(:exception) do |value|
+    value[:factor]
   end
@@ -802,11 +809,12 @@

Non-terminal productions

[4] single_definition ::= term (',' term)*

-
  production(:factor) do |value|
-    if value.first[:_factor_1]
-      [:rept, value.first[:_factor_1], value.first[:_factor_1], value.last[:primary]]
+        
  start_production(:factor, as_hash: true)
+  production(:factor) do |value|
+    if value[:_factor_1]
+      [:rept, value[:_factor_1], value[:_factor_1], value[:primary]]
     else
-      value.last[:primary]
+      value[:primary]
     end
   end
   production(:_factor_2) do |value|
@@ -822,8 +830,8 @@ 

Non-terminal productions

[5] term ::= factor ('-' exception)?

-
  production(:primary) do |value|
-    value
+        
  production(:optional_sequence) do |value|
+    [:opt, value[1][:definitions_list]]
   end
@@ -835,8 +843,8 @@

Non-terminal productions

[6] exception ::= factor

-
  production(:optional_sequence) do |value|
-    [:opt, value[1][:definitions_list]]
+        
  production(:repeated_sequence) do |value|
+    [:star, value[1][:definitions_list]]
   end
@@ -847,35 +855,16 @@

Non-terminal productions

[7] factor ::= (integer '*')? primary

- -
  production(:repeated_sequence) do |value|
-    [:star, value[1][:definitions_list]]
-  end
- - - - -
- -
-

[8] primary ::= optional_sequence -| repeated_sequence -| special_sequence -| grouped_sequence -| meta_identifier -| terminal_string -| empty

-
  production(:grouped_sequence) do |value|
     [:seq, value[1][:definitions_list]]
   end
- +
- +

[9] optional_sequence ::= start_option_symbol definitions_list end_option_symbol

@@ -883,10 +872,10 @@

Non-terminal productions

  def initialize(input, **options, &block)
- +
- +

[10] repeated_sequence ::= start_repeat_symbol definitions_list end_repeat_symbol

@@ -898,10 +887,10 @@

Non-terminal productions

end
- +
- +

[11] grouped_sequence ::= '(' definitions_list ')'

@@ -944,10 +933,10 @@

Parser invocation.

end
- +
- +

If the level option is set, instantiate a logger for collecting trace information.

@@ -956,10 +945,10 @@

Parser invocation.

require 'sxp' unless defined?(SXP)
- +
- +

Read input, if necessary, which will be used in a Scanner.

@@ -969,10 +958,10 @@

Parser invocation.

end
- +
- +

A rule which has already been turned into a Rule object.

@@ -980,10 +969,10 @@

Parser invocation.

- +
- +

Output formatted S-Expression of grammar

@@ -991,10 +980,10 @@

Parser invocation.

- +
- +

Output rules as a formatted S-Expression

diff --git a/examples/isoebnf/parser.rb b/examples/isoebnf/parser.rb index 9741957..80e5f6a 100644 --- a/examples/isoebnf/parser.rb +++ b/examples/isoebnf/parser.rb @@ -107,31 +107,36 @@ class ISOEBNFPegParser nil end + # Setting `as_hash: true` in the start production makes the value of the form of a hash, rather than an array of hashes. + # # `[3] definitions_list ::= single_definition (definition_separator_symbol definitions_list)*` + start_production(:definitions_list, as_hash: true) production(:definitions_list) do |value| - if value.last[:_definitions_list_1].length > 0 - [:alt, value.first[:single_definition]] + value.last[:_definitions_list_1] + if value[:_definitions_list_1].length > 0 + [:alt, value[:single_definition]] + value[:_definitions_list_1] else - value.first[:single_definition] + value[:single_definition] end end production(:_definitions_list_1) do |value| Array(value.first) end + start_production(:_definitions_list_2, as_hash: true) production(:_definitions_list_2) do |value| - if Array(value.last[:definitions_list]).first == :alt - value.last[:definitions_list][1..-1] + if Array(value[:definitions_list]).first == :alt + value[:definitions_list][1..-1] else - [value.last[:definitions_list]] + [value[:definitions_list]] end end # `[4] single_definition ::= term (',' term)*` + start_production(:single_definition, as_hash: true) production(:single_definition) do |value| - if value.last[:_single_definition_1].length > 0 - [:seq, value.first[:term]] + value.last[:_single_definition_1] + if value[:_single_definition_1].length > 0 + [:seq, value[:term]] + value[:_single_definition_1] else - value.first[:term] + value[:term] end end production(:_single_definition_1) do |value| @@ -139,11 +144,12 @@ class ISOEBNFPegParser end # `[5] term ::= factor ('-' exception)?` + start_production(:term, as_hash: true) production(:term) do |value| - if value.last[:_diff_1] - [:diff, value.first[:postfix], value.last[:_term_1]] + if value[:_diff_1] + [:diff, value[:postfix], value[:_term_1]] else - value.first[:factor] + value[:factor] end end production(:_term_1) do |value| @@ -151,33 +157,24 @@ class ISOEBNFPegParser end # `[6] exception ::= factor` + start_production(:exception, as_hash: true) production(:exception) do |value| - value.first[:factor] + value[:factor] end # `[7] factor ::= (integer '*')? primary` + start_production(:factor, as_hash: true) production(:factor) do |value| - if value.first[:_factor_1] - [:rept, value.first[:_factor_1], value.first[:_factor_1], value.last[:primary]] + if value[:_factor_1] + [:rept, value[:_factor_1], value[:_factor_1], value[:primary]] else - value.last[:primary] + value[:primary] end end production(:_factor_2) do |value| value.first[:integer] end - # `[8] primary ::= optional_sequence` - # ` | repeated_sequence` - # ` | special_sequence` - # ` | grouped_sequence` - # ` | meta_identifier` - # ` | terminal_string` - # ` | empty` - production(:primary) do |value| - value - end - # `[9] optional_sequence ::= start_option_symbol definitions_list end_option_symbol` production(:optional_sequence) do |value| [:opt, value[1][:definitions_list]] diff --git a/lib/ebnf/peg/parser.rb b/lib/ebnf/peg/parser.rb index 786a1a1..84b38cf 100644 --- a/lib/ebnf/peg/parser.rb +++ b/lib/ebnf/peg/parser.rb @@ -51,6 +51,7 @@ def self.included(base) # DSL for creating terminals and productions module ClassMethods def start_handlers; (@start_handlers ||= {}); end + def start_options; (@start_hoptions ||= {}); end def production_handlers; (@production_handlers ||= {}); end def terminal_handlers; (@terminal_handlers ||= {}); end def terminal_regexps; (@terminal_regexps ||= {}); end @@ -97,6 +98,10 @@ def terminal(term, regexp = nil, **options, &block) # # @param [Symbol] term # The rule name + # @param [Hash{Symbol => Object}] + # Options which are returned from {Parser#onStart}. + # @option options [Boolean] :as_hash (false) + # If the production is a `seq`, causes the value to be represented as a single hash, rather than an array of individual hashes for each sub-production. Note that this is not always advisable due to the possibility of repeated productions within the sequence. # @yield [data, block] # @yieldparam [Hash] data # A Hash defined for the current production, during :start @@ -106,8 +111,9 @@ def terminal(term, regexp = nil, **options, &block) # Block passed to initialization for yielding to calling parser. # Should conform to the yield specs for #initialize # Yield to generate a triple - def start_production(term, &block) + def start_production(term, **options, &block) start_handlers[term] = block + start_options[term] = options.freeze end ## @@ -350,6 +356,9 @@ def debug(*args, &block) # Start for production # Adds data avoiable during the processing of the production + # + # @return [Hash] composed of production options. Currently only `as_hash` is supported. + # @see ClassMethods#start_production def onStart(prod) handler = self.class.start_handlers[prod] @productions << prod @@ -375,6 +384,7 @@ def onStart(prod) # explicit start handler @prod_data << {} end + return self.class.start_options.fetch(prod, {}) # any options on this production end # Finish of production diff --git a/lib/ebnf/peg/rule.rb b/lib/ebnf/peg/rule.rb index ee56edc..2d82e65 100644 --- a/lib/ebnf/peg/rule.rb +++ b/lib/ebnf/peg/rule.rb @@ -19,13 +19,14 @@ module Rule # If matched, the input position is updated and the results returned in a Hash. # # * `alt`: returns the value of the matched production or `:unmatched`. - # * `diff`: returns the string value matched, or `:unmatched`. + # * `diff`: returns the value matched, or `:unmatched`. # * `hex`: returns a string composed of the matched hex character, or `:unmatched`. - # * `opt`: returns the matched production, or `nil` if unmatched. - # * `plus`: returns an array of the matches for the specified production, or `:unmatched`, if none are matched. For Terminals, these are concatenated into a single string. - # * `range`: returns a string composed of the character matching the range, or `:unmatched`. - # * `seq`: returns an array composed of single-entry hashes for each matched production indexed by the production name, or `:unmatched` if any production fails to match. For Terminals, returns a string created by concatenating these values. - # * `star`: returns an array of the matches for the specified production.For Terminals, these are concatenated into a single string. + # * `opt`: returns the value matched, or `nil` if unmatched. + # * `plus`: returns an array of the values matched for the specified production, or `:unmatched`, if none are matched. For Terminals, these are concatenated into a single string. + # * `range`: returns a string composed of the values matched, or `:unmatched`, if less than `min` are matched. + # * `seq`: returns an array composed of single-entry hashes for each matched production indexed by the production name, or `:unmatched` if any production fails to match. For Terminals, returns a string created by concatenating these values. Via option in a `production` or definition, the result can be a single hash with values for each matched production; note that this is not always possible due to the possibility of repeated productions within the sequence. + # * `star`: returns an array of the values matched for the specified production. For Terminals, these are concatenated into a single string. + # # @param [Scanner] input # @return [Hash{Symbol => Object}, :unmatched] A hash with keys for matched component of the expression. Returns :unmatched if the input does not match the production. def parse(input) @@ -58,7 +59,7 @@ def parse(input) else eat_whitespace(input) end - parser.onStart(sym) + start_options = parser.onStart(sym) result = case expr.first when :alt @@ -160,11 +161,15 @@ def parse(input) end accumulator << {prod.to_sym => res} end - seq == :unmatched ? - :unmatched : - (terminal? ? - seq.map(&:values).compact.join("") : # Concat values for terminal production - seq) + if seq == :unmatched + :unmatched + elsif terminal? + seq.map(&:values).compact.join("") # Concat values for terminal production + elsif start_options[:as_hash] + seq.inject {|memo, h| memo.merge(h)} + else + seq + end when :star # Result is an array of all expressions while they match, # an empty array of none match diff --git a/spec/peg/data/parser.rb b/spec/peg/data/parser.rb index ac91ec0..c426e66 100644 --- a/spec/peg/data/parser.rb +++ b/spec/peg/data/parser.rb @@ -63,11 +63,12 @@ class EBNFPegParser callback.call(:terminal) if value == '@terminals' end + start_production(:rule, as_hash: true) production(:rule, clear_packrat: true) do |value, data, callback| # current contains an expression. # Invoke callback - id, sym = value.first[:LHS] - expression = value.last[:expression] + id, sym = value[:LHS] + expression = value[:expression] callback.call(:rule, EBNF::Rule.new(sym.to_sym, id, expression)) end @@ -91,11 +92,12 @@ class EBNFPegParser value.length == 1 ? value.first : ([:seq] + value) end + start_production(:diff, as_hash: true) production(:diff) do |value| - if value.last[:_diff_1] - [:diff, value.first[:postfix], value.last[:_diff_1]] + if value[:_diff_1] + [:diff, value[:postfix], value[:_diff_1]] else - value.first[:postfix] + value[:postfix] end end @@ -103,13 +105,14 @@ class EBNFPegParser value.last[:postfix] if value end + start_production(:postfix, as_hash: true) production(:postfix) do |value| # Push result onto input stack, as the `diff` production can have some number of `postfix` values that are applied recursively - case value.last[:_postfix_1] - when "*" then [:star, value.first[:primary]] - when "+" then [:plus, value.first[:primary]] - when "?" then [:opt, value.first[:primary]] - else value.first[:primary] + case value[:_postfix_1] + when "*" then [:star, value[:primary]] + when "+" then [:plus, value[:primary]] + when "?" then [:opt, value[:primary]] + else value[:primary] end end @@ -117,9 +120,10 @@ class EBNFPegParser Array(value).length > 2 ? value[1][:expression] : value end + start_production(:pass, as_hash: true) production(:pass) do |value, data, callback| # Invoke callback - callback.call(:pass, value.last[:expression]) + callback.call(:pass, value[:expression]) end # ## Parser invocation. diff --git a/spec/peg/rule_spec.rb b/spec/peg/rule_spec.rb index db0dab3..1126b28 100644 --- a/spec/peg/rule_spec.rb +++ b/spec/peg/rule_spec.rb @@ -133,13 +133,148 @@ it name do rule = EBNF::Rule.new(:rule, "0", params[:rule]).extend(EBNF::PEG::Rule) rule.parser = parser - expect(parser).to receive(:onStart).with(Symbol) + expect(parser).to receive(:onStart).with(Symbol).and_return({}) expect(parser).to receive(:onFinish).with(params[:expect]).and_return(params[:expect]) expect(parser).not_to receive(:onTerminal).with(Symbol) expect(rule.parse(EBNF::LL1::Scanner.new(params[:input]))).to eql(params[:expect]) end end + + context "with as_hash: true" do + { + "(alt 'A' 'B') with 'A'" => { + rule: [:alt, "A", "B"], + input: "A", + expect: "A" + }, + "(alt 'A' 'B') with ' A '" => { + rule: [:alt, "A", "B"], + input: " A ", + expect: "A" + }, + "(alt 'A' 'B') with 'B'" => { + rule: [:alt, "A", "B"], + input: "B", + expect: "B" + }, + "(alt 'A' 'B') with 'C'" => { + rule: [:alt, "A", "B"], + input: "C", + expect: :unmatched + }, + "(not A) with 'A'" => { + rule: [:not, "A"], + input: "A", + expect: :unmatched + }, + "(not A) with 'B'" => { + rule: [:not, "A"], + input: "B", + expect: nil + }, + "(opt A) with 'A'" => { + rule: [:opt, "A"], + input: "A", + expect: "A" + }, + "(opt A) with 'A' and whitespace" => { + rule: [:opt, "A"], + input: " A", + expect: "A" + }, + "(opt A) with 'B'" => { + rule: [:opt, "A"], + input: "B", + expect: nil + }, + "(plus A) with ''" => { + rule: [:plus, "A"], + input: "", + expect: :unmatched + }, + "(plus A) with 'A'" => { + rule: [:plus, "A"], + input: "A", + expect: %w(A) + }, + "(plus A) with 'A B'" => { + rule: [:plus, "A"], + input: "A B", + expect: %w(A) + }, + "(plus A) with 'AAA'" => { + rule: [:plus, "A"], + input: "AAA", + expect: %w(A A A) + }, + "(plus A) with ' A A A '" => { + rule: [:plus, "A"], + input: " A A A ", + expect: %w(A A A) + }, + "(seq 'A' 'B')" => { + rule: [:seq, "A", "B"], + input: "A B", + expect: {A: "A", B: "B"} + }, + "(seq 'A' 'B') with no whitespace" => { + rule: [:seq, "A", "B"], + input: "AB", + expect: {A: "A", B: "B"} + }, + "(seq 'A' 'B') with added whitespace" => { + rule: [:seq, "A", "B"], + input: " A B ", + expect: {A: "A", B: "B"} + }, + "(seq 'A' 'B') with 'A'" => { + rule: [:seq, "A", "B"], + input: " A ", + expect: :unmatched + }, + "(seq 'A' 'B') with 'AC'" => { + rule: [:seq, "A", "B"], + input: "AC", + expect: :unmatched + }, + "(star A) with ''" => { + rule: [:star, "A"], + input: "", + expect: [] + }, + "(star A) with 'A'" => { + rule: [:star, "A"], + input: "A", + expect: %w(A) + }, + "(star A) with 'A B'" => { + rule: [:star, "A"], + input: "A B", + expect: %w(A) + }, + "(star A) with 'AAA'" => { + rule: [:star, "A"], + input: "AAA", + expect: %w(A A A) + }, + "(star A) with ' A A A '" => { + rule: [:star, "A"], + input: " A A A ", + expect: %w(A A A) + }, + }.each do |name, params| + it name do + rule = EBNF::Rule.new(:rule, "0", params[:rule]).extend(EBNF::PEG::Rule) + rule.parser = parser + expect(parser).to receive(:onStart).with(Symbol).and_return({as_hash: true}) + expect(parser).to receive(:onFinish).with(params[:expect]).and_return(params[:expect]) + expect(parser).not_to receive(:onTerminal).with(Symbol) + + expect(rule.parse(EBNF::LL1::Scanner.new(params[:input]))).to eql(params[:expect]) + end + end + end end context "terminal rules" do @@ -263,7 +398,7 @@ it name do rule = EBNF::Rule.new(:rule, "0", params[:rule], kind: :terminal).extend(EBNF::PEG::Rule) rule.parser = parser - expect(parser).to receive(:onStart).with(Symbol) + expect(parser).to receive(:onStart).with(Symbol).and_return({}) expect(parser).to receive(:onFinish).with(params[:expect]).and_return(params[:expect]) expect(parser).not_to receive(:onTerminal) expect(parser).to receive(:find_terminal_regexp).with(:rule) From 8697631eda618eace11863072d4a9326ac938c73 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Sat, 27 Jun 2020 14:50:40 -0700 Subject: [PATCH 07/50] * Split SYMBOL into SYMBOL_BASE and make SYMBOL have a negative lookahead of `::=` to avoid matching a new rule. * Fix RANGE and O_RANGE to allow repeated ranges, and update the grammar per the original EBNF definition. --- etc/ebnf.ebnf | 6 +- etc/ebnf.html | 6 +- etc/ebnf.ll1.sxp | 7 +- etc/ebnf.peg.rb | 17 ++- etc/ebnf.peg.sxp | 17 ++- etc/ebnf.sxp | 7 +- examples/ebnf-ll1-parser/README.md | 7 +- examples/ebnf-ll1-parser/doc/parser.html | 14 +- examples/ebnf-ll1-parser/meta.rb | 5 +- examples/ebnf-ll1-parser/parser.rb | 6 +- examples/ebnf-peg-parser/README.md | 7 +- examples/ebnf-peg-parser/doc/parser.html | 2 +- examples/ebnf-peg-parser/meta.rb | 17 ++- examples/ebnf-peg-parser/parser.rb | 2 +- examples/isoebnf/iso-ebnf.ebnf | 170 +++++++++++------------ examples/isoebnf/iso-ebnf.peg.sxp | 124 ++++++++--------- examples/isoebnf/iso-ebnf.sxp | 89 ++++++------ examples/isoebnf/meta.rb | 122 ++++++++-------- lib/ebnf/parser.rb | 2 +- lib/ebnf/peg/rule.rb | 2 +- lib/ebnf/terminals.rb | 13 +- spec/parser_spec.rb | 17 +++ spec/rule_spec.rb | 4 +- 23 files changed, 342 insertions(+), 321 deletions(-) diff --git a/etc/ebnf.ebnf b/etc/ebnf.ebnf index 457bfab..a76a55a 100644 --- a/etc/ebnf.ebnf +++ b/etc/ebnf.ebnf @@ -32,7 +32,7 @@ @terminals - [11] LHS ::= ('[' SYMBOL+ ']' ' '+)? SYMBOL ' '* '::=' + [11] LHS ::= ('[' SYMBOL ']' ' '+)? SYMBOL ' '* '::=' [12] SYMBOL ::= ([a-z] | [A-Z] | [0-9] | '_' | '.')+ @@ -42,9 +42,9 @@ [15] O_ENUM ::= '[^' R_CHAR+ | HEX+ ']' - [16] RANGE ::= '[' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']' + [16] RANGE ::= '[' ((R_CHAR '-' R_CHAR) | (HEX '-' HEX))+ ']' - [17] O_RANGE ::= '[^' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']' + [17] O_RANGE ::= '[^' ((R_CHAR '-' R_CHAR) | (HEX '-' HEX))+ ']' # Strings are unescaped Unicode, excepting control characters and hash (#) [18] STRING1 ::= '"' (CHAR - '"')* '"' diff --git a/etc/ebnf.html b/etc/ebnf.html index 86bb738..0946fc6 100644 --- a/etc/ebnf.html +++ b/etc/ebnf.html @@ -93,7 +93,7 @@ LHS ::= -("[" SYMBOL+ "]" #x20+)? SYMBOL #x20* "::=" +("[" SYMBOL "]" #x20+)? SYMBOL #x20* "::=" @@ -133,7 +133,7 @@ RANGE ::= -"[" (R_CHAR "-" R_CHAR) | (HEX "-" HEX) "]" +"[" (R_CHAR "-" R_CHAR | HEX "-" HEX)+ "]" @@ -141,7 +141,7 @@ O_RANGE ::= -"[^" (R_CHAR "-" R_CHAR) | (HEX "-" HEX) "]" +"[^" (R_CHAR "-" R_CHAR | HEX "-" HEX)+ "]" diff --git a/etc/ebnf.ll1.sxp b/etc/ebnf.ll1.sxp index baa914f..65408e7 100644 --- a/etc/ebnf.ll1.sxp +++ b/etc/ebnf.ll1.sxp @@ -153,13 +153,14 @@ (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL) (follow "@pass" "@terminals" LHS _eof) (seq expression)) - (terminal LHS "11" (seq (opt (seq "[" (plus SYMBOL) "]" (plus " "))) SYMBOL (star " ") "::=")) + (terminal LHS "11" (seq (opt (seq "[" SYMBOL "]" (plus " "))) SYMBOL (star " ") "::=")) (terminal SYMBOL "12" (plus (alt (range "a-z") (range "A-Z") (range "0-9") "_" "."))) (terminal HEX "13" (seq "#x" (plus (alt (range "a-f") (range "A-F") (range "0-9"))))) (terminal ENUM "14" (diff (alt (seq "[" (plus R_CHAR)) (seq (plus HEX) "]")) LHS)) (terminal O_ENUM "15" (alt (seq "[^" (plus R_CHAR)) (seq (plus HEX) "]"))) - (terminal RANGE "16" (alt (seq "[" (seq R_CHAR "-" R_CHAR)) (seq (seq HEX "-" HEX) "]"))) - (terminal O_RANGE "17" (alt (seq "[^" (seq R_CHAR "-" R_CHAR)) (seq (seq HEX "-" HEX) "]"))) + (terminal RANGE "16" (seq "[" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) + (terminal O_RANGE "17" + (seq "[^" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) (terminal STRING1 "18" (seq "\"" (star (diff CHAR "\"")) "\"")) (terminal STRING2 "19" (seq "'" (star (diff CHAR "'")) "'")) (terminal CHAR "20" diff --git a/etc/ebnf.peg.rb b/etc/ebnf.peg.rb index 226155f..575975c 100644 --- a/etc/ebnf.peg.rb +++ b/etc/ebnf.peg.rb @@ -21,9 +21,8 @@ module Meta EBNF::Rule.new(:pass, "10", [:seq, "@pass", :expression]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:LHS, "11", [:seq, :_LHS_1, :SYMBOL, :_LHS_2, "::="], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_LHS_1, "11.1", [:opt, :_LHS_3], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_LHS_3, "11.3", [:seq, "[", :_LHS_4, "]", :_LHS_5], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_LHS_4, "11.4", [:plus, :SYMBOL], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_LHS_5, "11.5", [:plus, " "], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_LHS_3, "11.3", [:seq, "[", :SYMBOL, "]", :_LHS_4], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_LHS_4, "11.4", [:plus, " "], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_LHS_2, "11.2", [:star, " "], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:SYMBOL, "12", [:plus, :_SYMBOL_1], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_SYMBOL_1, "12.1", [:alt, :_SYMBOL_2, :_SYMBOL_3, :_SYMBOL_4, "_", "."], kind: :terminal).extend(EBNF::PEG::Rule), @@ -47,15 +46,15 @@ module Meta EBNF::Rule.new(:_O_ENUM_3, "15.3", [:plus, :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_O_ENUM_2, "15.2", [:seq, :_O_ENUM_4, "]"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_O_ENUM_4, "15.4", [:plus, :HEX], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:RANGE, "16", [:alt, :_RANGE_1, :_RANGE_2], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_RANGE_1, "16.1", [:seq, "[", :_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:RANGE, "16", [:seq, "[", :_RANGE_1, "]"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_1, "16.1", [:plus, :_RANGE_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_2, "16.2", [:alt, :_RANGE_3, :_RANGE_4], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_RANGE_3, "16.3", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_RANGE_2, "16.2", [:seq, :_RANGE_4, "]"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_RANGE_4, "16.4", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:O_RANGE, "17", [:alt, :_O_RANGE_1, :_O_RANGE_2], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_RANGE_1, "17.1", [:seq, "[^", :_O_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:O_RANGE, "17", [:seq, "[^", :_O_RANGE_1, "]"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_1, "17.1", [:plus, :_O_RANGE_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_2, "17.2", [:alt, :_O_RANGE_3, :_O_RANGE_4], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_O_RANGE_3, "17.3", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_RANGE_2, "17.2", [:seq, :_O_RANGE_4, "]"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_O_RANGE_4, "17.4", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:STRING1, "18", [:seq, "\"", :_STRING1_1, "\""], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_STRING1_1, "18.1", [:star, :_STRING1_2], kind: :terminal).extend(EBNF::PEG::Rule), diff --git a/etc/ebnf.peg.sxp b/etc/ebnf.peg.sxp index 5cb9433..df6697b 100644 --- a/etc/ebnf.peg.sxp +++ b/etc/ebnf.peg.sxp @@ -21,9 +21,8 @@ (terminal LHS "11" (seq _LHS_1 SYMBOL _LHS_2 "::=")) (terminal _LHS_1 "11.1" (opt _LHS_3)) (terminal _LHS_2 "11.2" (star " ")) - (terminal _LHS_3 "11.3" (seq "[" _LHS_4 "]" _LHS_5)) - (terminal _LHS_4 "11.4" (plus SYMBOL)) - (terminal _LHS_5 "11.5" (plus " ")) + (terminal _LHS_3 "11.3" (seq "[" SYMBOL "]" _LHS_4)) + (terminal _LHS_4 "11.4" (plus " ")) (terminal SYMBOL "12" (plus _SYMBOL_1)) (terminal _SYMBOL_1 "12.1" (alt _SYMBOL_2 _SYMBOL_3 _SYMBOL_4 "_" ".")) (terminal _SYMBOL_2 "12.2" (range "a-z")) @@ -46,14 +45,14 @@ (terminal _O_ENUM_2 "15.2" (seq _O_ENUM_4 "]")) (terminal _O_ENUM_3 "15.3" (plus R_CHAR)) (terminal _O_ENUM_4 "15.4" (plus HEX)) - (terminal RANGE "16" (alt _RANGE_1 _RANGE_2)) - (terminal _RANGE_1 "16.1" (seq "[" _RANGE_3)) - (terminal _RANGE_2 "16.2" (seq _RANGE_4 "]")) + (terminal RANGE "16" (seq "[" _RANGE_1 "]")) + (terminal _RANGE_1 "16.1" (plus _RANGE_2)) + (terminal _RANGE_2 "16.2" (alt _RANGE_3 _RANGE_4)) (terminal _RANGE_3 "16.3" (seq R_CHAR "-" R_CHAR)) (terminal _RANGE_4 "16.4" (seq HEX "-" HEX)) - (terminal O_RANGE "17" (alt _O_RANGE_1 _O_RANGE_2)) - (terminal _O_RANGE_1 "17.1" (seq "[^" _O_RANGE_3)) - (terminal _O_RANGE_2 "17.2" (seq _O_RANGE_4 "]")) + (terminal O_RANGE "17" (seq "[^" _O_RANGE_1 "]")) + (terminal _O_RANGE_1 "17.1" (plus _O_RANGE_2)) + (terminal _O_RANGE_2 "17.2" (alt _O_RANGE_3 _O_RANGE_4)) (terminal _O_RANGE_3 "17.3" (seq R_CHAR "-" R_CHAR)) (terminal _O_RANGE_4 "17.4" (seq HEX "-" HEX)) (terminal STRING1 "18" (seq "\"" _STRING1_1 "\"")) diff --git a/etc/ebnf.sxp b/etc/ebnf.sxp index a00edab..1d4d831 100644 --- a/etc/ebnf.sxp +++ b/etc/ebnf.sxp @@ -11,13 +11,14 @@ (rule primary "9" (alt HEX SYMBOL ENUM O_ENUM RANGE O_RANGE STRING1 STRING2 (seq "(" expression ")"))) (rule pass "10" (seq "@pass" expression)) - (terminal LHS "11" (seq (opt (seq "[" (plus SYMBOL) "]" (plus " "))) SYMBOL (star " ") "::=")) + (terminal LHS "11" (seq (opt (seq "[" SYMBOL "]" (plus " "))) SYMBOL (star " ") "::=")) (terminal SYMBOL "12" (plus (alt (range "a-z") (range "A-Z") (range "0-9") "_" "."))) (terminal HEX "13" (seq "#x" (plus (alt (range "a-f") (range "A-F") (range "0-9"))))) (terminal ENUM "14" (diff (alt (seq "[" (plus R_CHAR)) (seq (plus HEX) "]")) LHS)) (terminal O_ENUM "15" (alt (seq "[^" (plus R_CHAR)) (seq (plus HEX) "]"))) - (terminal RANGE "16" (alt (seq "[" (seq R_CHAR "-" R_CHAR)) (seq (seq HEX "-" HEX) "]"))) - (terminal O_RANGE "17" (alt (seq "[^" (seq R_CHAR "-" R_CHAR)) (seq (seq HEX "-" HEX) "]"))) + (terminal RANGE "16" (seq "[" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) + (terminal O_RANGE "17" + (seq "[^" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) (terminal STRING1 "18" (seq "\"" (star (diff CHAR "\"")) "\"")) (terminal STRING2 "19" (seq "'" (star (diff CHAR "'")) "'")) (terminal CHAR "20" diff --git a/examples/ebnf-ll1-parser/README.md b/examples/ebnf-ll1-parser/README.md index 655d930..c9f76e2 100644 --- a/examples/ebnf-ll1-parser/README.md +++ b/examples/ebnf-ll1-parser/README.md @@ -27,13 +27,14 @@ This generates a S-Expression form of the grammar suitable for use by {EBNF} for (rule primary "9" (alt HEX SYMBOL ENUM O_ENUM RANGE O_RANGE STRING1 STRING2 (seq "(" expression ")"))) (rule pass "10" (seq "@pass" expression)) - (terminal LHS "11" (seq (opt (seq "[" (plus SYMBOL) "]" (plus " "))) SYMBOL (star " ") "::=")) + (terminal LHS "11" (seq (opt (seq "[" SYMBOL "]" (plus " "))) SYMBOL (star " ") "::=")) (terminal SYMBOL "12" (plus (alt (range "a-z") (range "A-Z") (range "0-9") "_" "."))) (terminal HEX "13" (seq "#x" (plus (alt (range "a-f") (range "A-F") (range "0-9"))))) (terminal ENUM "14" (diff (alt (seq "[" (plus R_CHAR)) (seq (plus HEX) "]")) LHS)) (terminal O_ENUM "15" (alt (seq "[^" (plus R_CHAR)) (seq (plus HEX) "]"))) - (terminal RANGE "16" (alt (seq "[" (seq R_CHAR "-" R_CHAR)) (seq (diff HEX HEX) "]"))) - (terminal O_RANGE "17" (alt (seq "[^" (seq R_CHAR "-" R_CHAR)) (seq (diff HEX HEX) "]"))) + (terminal RANGE "16" (seq "[" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) + (terminal O_RANGE "17" + (seq "[^" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) (terminal STRING1 "18" (seq "\"" (star (diff CHAR "\"")) "\"")) (terminal STRING2 "19" (seq "'" (star (diff CHAR "'")) "'")) (terminal CHAR "20" diff --git a/examples/ebnf-ll1-parser/doc/parser.html b/examples/ebnf-ll1-parser/doc/parser.html index ad81f37..526fdeb 100644 --- a/examples/ebnf-ll1-parser/doc/parser.html +++ b/examples/ebnf-ll1-parser/doc/parser.html @@ -478,7 +478,7 @@

EBNF Parser for EBNF.

require 'sxp' require 'logger' -class EBNLL1FParser +class EBNFLL1Parser include EBNF::LL1::Parser include EBNFParserMeta include EBNF::Terminals
@@ -558,7 +558,7 @@

Terminals

  terminal(:LHS, LHS) do |prod, token, input|
-    input[:id], input[:symbol] = token.value.to_s.scan(/\[([^\]]+)\]\s*(\w+)\s*::=/).first
+    input[:id], input[:symbol] = token.value.to_s.scan(/(?:\[([^\]]+)\])?\s*(\w+)\s*::=/).first
   end
@@ -633,7 +633,7 @@

Terminals

Terminal for RANGE is matched as part of a primary rule. Unescape the values to remove EBNF escapes in the input.

-
[16] `RANGE`      ::= '[' (R_CHAR '-' R_CHAR) | (HEX - HEX) ']'
+
[16] `RANGE`      ::= '[' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']'
 
@@ -649,7 +649,7 @@

Terminals

Terminal for O_RANGE is matched as part of a primary rule. Unescape the values to remove EBNF escapes in the input.

-
[17] O_RANGE    ::= '[^' (R_CHAR '-' R_CHAR) | (HEX - HEX) ']'
+
[17] O_RANGE    ::= '[^' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']'
 
@@ -1013,7 +1013,9 @@

Non-terminal productions

  production(:pass) do |input, data, callback|
-    input[:pass] = data[:expression]
+    expression = data[:expression]
+    expression = expression.to_ary if expression.respond_to?(:to_ary)
+    input[:pass] = expression
   end
@@ -1085,7 +1087,7 @@

Parser invocation.

        parsing_terminals = true
-        rule = EBNF::Rule.new(nil, nil, data.first, kind: :terminal)
+        next
       when :pass
diff --git a/examples/ebnf-ll1-parser/meta.rb b/examples/ebnf-ll1-parser/meta.rb index 9e9c081..7cd0756 100644 --- a/examples/ebnf-ll1-parser/meta.rb +++ b/examples/ebnf-ll1-parser/meta.rb @@ -1,8 +1,7 @@ -# This file is automatically generated by /Users/gregg/Projects/ebnf/lib/ebnf/ll1.rb -# BRANCH derived from ../../etc/ebnf.ebnf +# This file is automatically generated by ebnf version 2.0.0 +# Derived from ../../etc/ebnf.ebnf module EBNFParserMeta START = :ebnf - BRANCH = { :alt => { "(" => [:seq, :_alt_1], diff --git a/examples/ebnf-ll1-parser/parser.rb b/examples/ebnf-ll1-parser/parser.rb index dd87f04..0ea4ff5 100644 --- a/examples/ebnf-ll1-parser/parser.rb +++ b/examples/ebnf-ll1-parser/parser.rb @@ -53,7 +53,7 @@ def inspect # # [11] LHS ::= ('[' SYMBOL+ ']' ' '+)? SYMBOL ' '* '::=' terminal(:LHS, LHS) do |prod, token, input| - input[:id], input[:symbol] = token.value.to_s.scan(/\[([^\]]+)\]\s*(\w+)\s*::=/).first + input[:id], input[:symbol] = token.value.to_s.scan(/(?:\[([^\]]+)\])?\s*(\w+)\s*::=/).first end # Match `SYMBOL` terminal @@ -272,7 +272,9 @@ def inspect # # [10] pass ::= '@pass' expression production(:pass) do |input, data, callback| - input[:pass] = data[:expression].to_ary + expression = data[:expression] + expression = expression.to_ary if expression.respond_to?(:to_ary) + input[:pass] = expression end # ## Parser invocation. diff --git a/examples/ebnf-peg-parser/README.md b/examples/ebnf-peg-parser/README.md index 8e673c1..efa116c 100644 --- a/examples/ebnf-peg-parser/README.md +++ b/examples/ebnf-peg-parser/README.md @@ -27,13 +27,14 @@ This generates a S-Expression form of the grammar suitable for use by {EBNF}. (rule primary "9" (alt HEX SYMBOL ENUM O_ENUM RANGE O_RANGE STRING1 STRING2 (seq "(" expression ")"))) (rule pass "10" (seq "@pass" expression)) - (terminal LHS "11" (seq (opt (seq "[" (plus SYMBOL) "]" (plus " "))) SYMBOL (star " ") "::=")) + (terminal LHS "11" (seq (opt (seq "[" SYMBOL "]" (plus " "))) SYMBOL (star " ") "::=")) (terminal SYMBOL "12" (plus (alt (range "a-z") (range "A-Z") (range "0-9") "_" "."))) (terminal HEX "13" (seq "#x" (plus (alt (range "a-f") (range "A-F") (range "0-9"))))) (terminal ENUM "14" (diff (alt (seq "[" (plus R_CHAR)) (seq (plus HEX) "]")) LHS)) (terminal O_ENUM "15" (alt (seq "[^" (plus R_CHAR)) (seq (plus HEX) "]"))) - (terminal RANGE "16" (alt (seq "[" (seq R_CHAR "-" R_CHAR)) (seq (diff HEX HEX) "]"))) - (terminal O_RANGE "17" (alt (seq "[^" (seq R_CHAR "-" R_CHAR)) (seq (diff HEX HEX) "]"))) + (terminal RANGE "16" (seq "[" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) + (terminal O_RANGE "17" + (seq "[^" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) (terminal STRING1 "18" (seq "\"" (star (diff CHAR "\"")) "\"")) (terminal STRING2 "19" (seq "'" (star (diff CHAR "'")) "'")) (terminal CHAR "20" diff --git a/examples/ebnf-peg-parser/doc/parser.html b/examples/ebnf-peg-parser/doc/parser.html index 37d74a4..cefa617 100644 --- a/examples/ebnf-peg-parser/doc/parser.html +++ b/examples/ebnf-peg-parser/doc/parser.html @@ -531,7 +531,7 @@

Terminals

  terminal(:LHS, LHS) do |value, prod|
-    value.to_s.scan(/\[([^\]]+)\]\s*(\w+)\s*::=/).first
+    value.to_s.scan(/(?:\[([^\]]+)\])?\s*(\w+)\s*::=/).first
   end
diff --git a/examples/ebnf-peg-parser/meta.rb b/examples/ebnf-peg-parser/meta.rb index 67f1d0c..2aaa876 100644 --- a/examples/ebnf-peg-parser/meta.rb +++ b/examples/ebnf-peg-parser/meta.rb @@ -21,9 +21,8 @@ module EBNFPegMeta EBNF::Rule.new(:pass, "10", [:seq, "@pass", :expression]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:LHS, "11", [:seq, :_LHS_1, :SYMBOL, :_LHS_2, "::="], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_LHS_1, "11.1", [:opt, :_LHS_3], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_LHS_3, "11.3", [:seq, "[", :_LHS_4, "]", :_LHS_5], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_LHS_4, "11.4", [:plus, :SYMBOL], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_LHS_5, "11.5", [:plus, " "], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_LHS_3, "11.3", [:seq, "[", :SYMBOL, "]", :_LHS_4], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_LHS_4, "11.4", [:plus, " "], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_LHS_2, "11.2", [:star, " "], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:SYMBOL, "12", [:plus, :_SYMBOL_1], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_SYMBOL_1, "12.1", [:alt, :_SYMBOL_2, :_SYMBOL_3, :_SYMBOL_4, "_", "."], kind: :terminal).extend(EBNF::PEG::Rule), @@ -47,15 +46,15 @@ module EBNFPegMeta EBNF::Rule.new(:_O_ENUM_3, "15.3", [:plus, :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_O_ENUM_2, "15.2", [:seq, :_O_ENUM_4, "]"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_O_ENUM_4, "15.4", [:plus, :HEX], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:RANGE, "16", [:alt, :_RANGE_1, :_RANGE_2], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_RANGE_1, "16.1", [:seq, "[", :_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:RANGE, "16", [:seq, "[", :_RANGE_1, "]"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_1, "16.1", [:plus, :_RANGE_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_2, "16.2", [:alt, :_RANGE_3, :_RANGE_4], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_RANGE_3, "16.3", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_RANGE_2, "16.2", [:seq, :_RANGE_4, "]"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_RANGE_4, "16.4", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:O_RANGE, "17", [:alt, :_O_RANGE_1, :_O_RANGE_2], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_RANGE_1, "17.1", [:seq, "[^", :_O_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:O_RANGE, "17", [:seq, "[^", :_O_RANGE_1, "]"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_1, "17.1", [:plus, :_O_RANGE_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_2, "17.2", [:alt, :_O_RANGE_3, :_O_RANGE_4], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_O_RANGE_3, "17.3", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_RANGE_2, "17.2", [:seq, :_O_RANGE_4, "]"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_O_RANGE_4, "17.4", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:STRING1, "18", [:seq, "\"", :_STRING1_1, "\""], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_STRING1_1, "18.1", [:star, :_STRING1_2], kind: :terminal).extend(EBNF::PEG::Rule), diff --git a/examples/ebnf-peg-parser/parser.rb b/examples/ebnf-peg-parser/parser.rb index 1ef3dd1..ac573ac 100644 --- a/examples/ebnf-peg-parser/parser.rb +++ b/examples/ebnf-peg-parser/parser.rb @@ -34,7 +34,7 @@ class EBNFPegParser # # [11] LHS ::= ('[' SYMBOL+ ']' ' '+)? SYMBOL ' '* '::=' terminal(:LHS, LHS) do |value, prod| - value.to_s.scan(/\[([^\]]+)\]\s*(\w+)\s*::=/).first + value.to_s.scan(/(?:\[([^\]]+)\])?\s*(\w+)\s*::=/).first end # Match `SYMBOL` terminal diff --git a/examples/isoebnf/iso-ebnf.ebnf b/examples/isoebnf/iso-ebnf.ebnf index 5440e4c..78157b8 100644 --- a/examples/isoebnf/iso-ebnf.ebnf +++ b/examples/isoebnf/iso-ebnf.ebnf @@ -1,110 +1,110 @@ # W3C EBNF for ISO/IEC 14977 : 1996 EBNF # (Scoured from https://www.cl.cam.ac.uk/~mgk25/iso-14977.pdf) -[1] syntax ::= syntax_rule* - -[2] syntax_rule ::= meta_identifier defining_symbol definitions_list terminator_symbol - -[3] definitions_list ::= single_definition (definition_separator_symbol definitions_list)* - -[4] single_definition ::= term (',' term)* - -[5] term ::= factor ('-' exception)? - -[6] exception ::= factor - -[7] factor ::= (integer '*')? primary +syntax ::= syntax_rule* + +syntax_rule ::= meta_identifier defining_symbol definitions_list terminator_symbol + +definitions_list ::= single_definition (definition_separator_symbol definitions_list)* + +single_definition ::= term (',' term)* + +term ::= factor ('-' exception)? + +exception ::= factor + +factor ::= (integer '*')? primary -[8] primary ::= optional_sequence - | repeated_sequence - | special_sequence - | grouped_sequence - | meta_identifier - | terminal_string - | empty +primary ::= optional_sequence + | repeated_sequence + | special_sequence + | grouped_sequence + | meta_identifier + | terminal_string + | empty -[9] optional_sequence ::= start_option_symbol definitions_list end_option_symbol +optional_sequence ::= start_option_symbol definitions_list end_option_symbol -[10] repeated_sequence ::= start_repeat_symbol definitions_list end_repeat_symbol +repeated_sequence ::= start_repeat_symbol definitions_list end_repeat_symbol -[11] grouped_sequence ::= '(' definitions_list ')' +grouped_sequence ::= '(' definitions_list ')' # Note, the following are nominally terminal rules, # although ISO EBNF does not really distinguish between non-terminal and terminal rules. @terminals -[12] letter ::= [a-zA-Z] -[13] decimal_digit ::= [0-9] +letter ::= [a-zA-Z] +decimal_digit ::= [0-9] -[14] integer ::= decimal_digit+ +integer ::= decimal_digit+ -[15] meta_identifier ::= letter meta_identifier_character* +meta_identifier ::= letter meta_identifier_character* # Extended to allow '_' -[16] meta_identifier_character ::= letter | decimal_digit | '_' - -[17] terminal_string ::= ("'" first_terminal_character+ "'") - | ('"' second_terminal_character+ '"') - -[18] first_terminal_character ::= terminal_character - "'" - -[19] second_terminal_character ::= terminal_character - '"' - -[20] special_sequence ::= '?' special_sequence_character* '?' - -[21] special_sequence_character ::= terminal_character - '?' - -[22] terminal_character ::= letter - | decimal_digit - | concatenate_symbol - | defining_symbol - | definition_separator_symbol - | end_comment_symbol - | end_group_symbol - | end_option_symbol - | end_repeat_symbol - | except_symbol - | first_quote_symbol - | repetition_symbol - | second_quote_symbol - | special_sequence_symbol - | start_comment_symbol - | start_group_symbol - | start_option_symbol - | start_repeat_symbol - | terminator_symbol - | other_character - -[23] other_character ::= [:+_%@&$<>^` ̃#x20#x23] | '\' - -[24] gap_separator ::= [#x9#xa#xb#xc#xd#x20] +meta_identifier_character ::= letter | decimal_digit | '_' + +terminal_string ::= ("'" first_terminal_character+ "'") + | ('"' second_terminal_character+ '"') + +first_terminal_character ::= terminal_character - "'" + +second_terminal_character ::= terminal_character - '"' + +special_sequence ::= '?' special_sequence_character* '?' + +special_sequence_character ::= terminal_character - '?' + +terminal_character ::= letter + | decimal_digit + | concatenate_symbol + | defining_symbol + | definition_separator_symbol + | end_comment_symbol + | end_group_symbol + | end_option_symbol + | end_repeat_symbol + | except_symbol + | first_quote_symbol + | repetition_symbol + | second_quote_symbol + | special_sequence_symbol + | start_comment_symbol + | start_group_symbol + | start_option_symbol + | start_repeat_symbol + | terminator_symbol + | other_character + +other_character ::= [:+_%@&$<>^` ̃#x20#x23] | '\' + +gap_separator ::= [#x9#xa#xb#xc#xd#x20] @pass gap_separator+ -[25] empty ::= '' +empty ::= '' # Simple terminals that are often extended -[26] defining_symbol ::= '=' | ':' -[27] definition_separator_symbol ::= '|' | '/' | '!' -[28] terminator_symbol ::= ';' | '.' -[29] start_option_symbol ::= '[' | '(/' -[30] end_option_symbol ::= ']' | '/)' -[31] start_repeat_symbol ::= '{' | '(:' -[32] end_repeat_symbol ::= '}' | ':)' +defining_symbol ::= '=' | ':' +definition_separator_symbol ::= '|' | '/' | '!' +terminator_symbol ::= ';' | '.' +start_option_symbol ::= '[' | '(/' +end_option_symbol ::= ']' | '/)' +start_repeat_symbol ::= '{' | '(:' +end_repeat_symbol ::= '}' | ':)' # Symbols described, but not actually used. -[33] gap_free_symbol ::= (terminal_character - ['"]) - | terminal_string - -[34] repetition_symbol ::= '*' -[35] except_symbol ::= '-' -[36] concatenate_symbol ::= ',' -[37] first_quote_symbol ::= "'" -[38] second_quote_symbol ::= '"' -[39] start_comment_symbol ::= '(*' -[40] end_comment_symbol ::= '*)' -[41] start_group_symbol ::= '(' -[42] end_group_symbol ::= ')' -[43] special_sequence_symbol ::= '?' +gap_free_symbol ::= (terminal_character - ['"]) + | terminal_string + +repetition_symbol ::= '*' +except_symbol ::= '-' +concatenate_symbol ::= ',' +first_quote_symbol ::= "'" +second_quote_symbol ::= '"' +start_comment_symbol ::= '(*' +end_comment_symbol ::= '*)' +start_group_symbol ::= '(' +end_group_symbol ::= ')' +special_sequence_symbol ::= '?' diff --git a/examples/isoebnf/iso-ebnf.peg.sxp b/examples/isoebnf/iso-ebnf.peg.sxp index 2f85a2e..3900ccc 100644 --- a/examples/isoebnf/iso-ebnf.peg.sxp +++ b/examples/isoebnf/iso-ebnf.peg.sxp @@ -1,74 +1,74 @@ ( - (pass _pass (plus gap_separator)) - (rule syntax "1" (star syntax_rule)) - (rule syntax_rule "2" + (terminal special_sequence_symbol (seq "?")) + (rule syntax_rule (seq meta_identifier defining_symbol definitions_list terminator_symbol)) - (rule definitions_list "3" (seq single_definition _definitions_list_1)) - (rule _definitions_list_1 "3.1" (star _definitions_list_2)) - (rule _definitions_list_2 "3.2" (seq definition_separator_symbol definitions_list)) - (rule single_definition "4" (seq term _single_definition_1)) - (rule _single_definition_1 "4.1" (star _single_definition_2)) - (rule _single_definition_2 "4.2" (seq "," term)) - (rule term "5" (seq factor _term_1)) - (rule _term_1 "5.1" (opt _term_2)) - (rule _term_2 "5.2" (seq "-" exception)) - (rule exception "6" (seq factor)) - (rule factor "7" (seq _factor_1 primary)) - (rule _factor_1 "7.1" (opt _factor_2)) - (rule _factor_2 "7.2" (seq integer "*")) - (rule primary "8" + (rule definitions_list (seq single_definition _definitions_list_1)) + (terminal defining_symbol (alt "=" ":")) + (terminal definition_separator_symbol (alt "|" "/" "!")) + (terminal terminator_symbol (alt ";" ".")) + (terminal start_option_symbol (alt "[" "(/")) + (terminal end_option_symbol (alt "]" "/)")) + (terminal start_repeat_symbol (alt "{" "(:")) + (terminal end_repeat_symbol (alt "}" ":)")) + (terminal gap_free_symbol (alt _gap_free_symbol_1 terminal_string)) + (terminal repetition_symbol (seq "*")) + (terminal except_symbol (seq "-")) + (terminal concatenate_symbol (seq ",")) + (terminal first_quote_symbol (seq "'")) + (terminal second_quote_symbol (seq "\"")) + (terminal start_comment_symbol (seq "(*")) + (terminal end_comment_symbol (seq "*)")) + (terminal start_group_symbol (seq "(")) + (terminal end_group_symbol (seq ")")) + (rule syntax (star syntax_rule)) + (rule single_definition (seq term _single_definition_1)) + (rule term (seq factor _term_1)) + (rule exception (seq factor)) + (rule factor (seq _factor_1 primary)) + (rule primary (alt optional_sequence repeated_sequence special_sequence grouped_sequence meta_identifier terminal_string empty )) - (rule optional_sequence "9" + (rule optional_sequence (seq start_option_symbol definitions_list end_option_symbol)) - (rule repeated_sequence "10" + (rule repeated_sequence (seq start_repeat_symbol definitions_list end_repeat_symbol)) - (rule grouped_sequence "11" (seq "(" definitions_list ")")) - (terminal letter "12" (range "a-zA-Z")) - (terminal decimal_digit "13" (range "0-9")) - (terminal integer "14" (plus decimal_digit)) - (terminal meta_identifier "15" (seq letter _meta_identifier_1)) - (rule _meta_identifier_1 "15.1" (star meta_identifier_character)) - (terminal meta_identifier_character "16" (alt letter decimal_digit "_")) - (terminal terminal_string "17" (alt _terminal_string_1 _terminal_string_2)) - (rule _terminal_string_1 "17.1" (seq "'" _terminal_string_3 "'")) - (rule _terminal_string_2 "17.2" (seq "\"" _terminal_string_4 "\"")) - (rule _terminal_string_3 "17.3" (plus first_terminal_character)) - (rule _terminal_string_4 "17.4" (plus second_terminal_character)) - (terminal first_terminal_character "18" (diff terminal_character "'")) - (terminal second_terminal_character "19" (diff terminal_character "\"")) - (terminal special_sequence "20" (seq "?" _special_sequence_1 "?")) - (rule _special_sequence_1 "20.1" (star special_sequence_character)) - (terminal special_sequence_character "21" (diff terminal_character "?")) - (terminal terminal_character "22" + (rule grouped_sequence (seq "(" definitions_list ")")) + (terminal letter (range "a-zA-Z")) + (terminal decimal_digit (range "0-9")) + (terminal integer (plus decimal_digit)) + (terminal meta_identifier (seq letter _meta_identifier_1)) + (terminal meta_identifier_character (alt letter decimal_digit "_")) + (terminal terminal_string (alt _terminal_string_1 _terminal_string_2)) + (terminal first_terminal_character (diff terminal_character "'")) + (terminal second_terminal_character (diff terminal_character "\"")) + (terminal special_sequence (seq "?" _special_sequence_1 "?")) + (terminal special_sequence_character (diff terminal_character "?")) + (terminal terminal_character (alt letter decimal_digit concatenate_symbol defining_symbol definition_separator_symbol end_comment_symbol end_group_symbol end_option_symbol end_repeat_symbol except_symbol first_quote_symbol repetition_symbol second_quote_symbol special_sequence_symbol start_comment_symbol start_group_symbol start_option_symbol start_repeat_symbol terminator_symbol other_character )) - (terminal other_character "23" (alt _other_character_1 "\\")) - (terminal _other_character_1 "23.1" (range ":+_%@&$<>^` ̃#x20#x23")) - (terminal gap_separator "24" (range "#x9#xa#xb#xc#xd#x20")) - (terminal empty "25" (seq ())) - (terminal defining_symbol "26" (alt "=" ":")) - (terminal definition_separator_symbol "27" (alt "|" "/" "!")) - (terminal terminator_symbol "28" (alt ";" ".")) - (terminal start_option_symbol "29" (alt "[" "(/")) - (terminal end_option_symbol "30" (alt "]" "/)")) - (terminal start_repeat_symbol "31" (alt "{" "(:")) - (terminal end_repeat_symbol "32" (alt "}" ":)")) - (terminal gap_free_symbol "33" (alt _gap_free_symbol_1 terminal_string)) - (rule _gap_free_symbol_1 "33.1" (seq _gap_free_symbol_3 terminal_character)) - (terminal _gap_free_symbol_2 "33.2" (range "'\"")) - (rule _gap_free_symbol_3 "33.3" (not _gap_free_symbol_2)) - (terminal repetition_symbol "34" (seq "*")) - (terminal except_symbol "35" (seq "-")) - (terminal concatenate_symbol "36" (seq ",")) - (terminal first_quote_symbol "37" (seq "'")) - (terminal second_quote_symbol "38" (seq "\"")) - (terminal start_comment_symbol "39" (seq "(*")) - (terminal end_comment_symbol "40" (seq "*)")) - (terminal start_group_symbol "41" (seq "(")) - (terminal end_group_symbol "42" (seq ")")) - (terminal special_sequence_symbol "43" (seq "?"))) + (terminal other_character (alt _other_character_1 "\\")) + (terminal gap_separator (range "#x9#xa#xb#xc#xd#x20")) + (pass _pass (plus gap_separator)) + (terminal empty (seq ())) + (rule _definitions_list_1 ".1" (star _definitions_list_2)) + (rule _gap_free_symbol_1 ".1" (seq _gap_free_symbol_3 terminal_character)) + (rule _single_definition_1 ".1" (star _single_definition_2)) + (terminal _other_character_1 ".1" (range ":+_%@&$<>^` ̃#x20#x23")) + (rule _term_1 ".1" (opt _term_2)) + (rule _meta_identifier_1 ".1" (star meta_identifier_character)) + (rule _special_sequence_1 ".1" (star special_sequence_character)) + (rule _factor_1 ".1" (opt _factor_2)) + (rule _terminal_string_1 ".1" (seq "'" _terminal_string_3 "'")) + (rule _terminal_string_2 ".2" (seq "\"" _terminal_string_4 "\"")) + (rule _definitions_list_2 ".2" (seq definition_separator_symbol definitions_list)) + (rule _single_definition_2 ".2" (seq "," term)) + (terminal _gap_free_symbol_2 ".2" (range "'\"")) + (rule _term_2 ".2" (seq "-" exception)) + (rule _factor_2 ".2" (seq integer "*")) + (rule _gap_free_symbol_3 ".3" (not _gap_free_symbol_2)) + (rule _terminal_string_3 ".3" (plus first_terminal_character)) + (rule _terminal_string_4 ".4" (plus second_terminal_character))) diff --git a/examples/isoebnf/iso-ebnf.sxp b/examples/isoebnf/iso-ebnf.sxp index cce7932..4ef3718 100644 --- a/examples/isoebnf/iso-ebnf.sxp +++ b/examples/isoebnf/iso-ebnf.sxp @@ -1,61 +1,60 @@ ( - (pass _pass (plus gap_separator)) - (rule syntax "1" (star syntax_rule)) - (rule syntax_rule "2" + (terminal special_sequence_symbol (seq "?")) + (rule syntax_rule (seq meta_identifier defining_symbol definitions_list terminator_symbol)) - (rule definitions_list "3" + (rule definitions_list (seq single_definition (star (seq definition_separator_symbol definitions_list)))) - (rule single_definition "4" (seq term (star (seq "," term)))) - (rule term "5" (seq factor (opt (seq "-" exception)))) - (rule exception "6" (seq factor)) - (rule factor "7" (seq (opt (seq integer "*")) primary)) - (rule primary "8" + (rule single_definition (seq term (star (seq "," term)))) + (rule term (seq factor (opt (seq "-" exception)))) + (rule exception (seq factor)) + (rule factor (seq (opt (seq integer "*")) primary)) + (rule primary (alt optional_sequence repeated_sequence special_sequence grouped_sequence meta_identifier terminal_string empty )) - (rule optional_sequence "9" + (rule optional_sequence (seq start_option_symbol definitions_list end_option_symbol)) - (rule repeated_sequence "10" + (rule repeated_sequence (seq start_repeat_symbol definitions_list end_repeat_symbol)) - (rule grouped_sequence "11" (seq "(" definitions_list ")")) - (terminal letter "12" (range "a-zA-Z")) - (terminal decimal_digit "13" (range "0-9")) - (terminal integer "14" (plus decimal_digit)) - (terminal meta_identifier "15" (seq letter (star meta_identifier_character))) - (terminal meta_identifier_character "16" (alt letter decimal_digit "_")) - (terminal terminal_string "17" + (rule grouped_sequence (seq "(" definitions_list ")")) + (terminal letter (range "a-zA-Z")) + (terminal decimal_digit (range "0-9")) + (terminal integer (plus decimal_digit)) + (terminal meta_identifier (seq letter (star meta_identifier_character))) + (terminal meta_identifier_character (alt letter decimal_digit "_")) + (terminal terminal_string (alt (seq "'" (plus first_terminal_character) "'") (seq "\"" (plus second_terminal_character) "\"")) ) - (terminal first_terminal_character "18" (diff terminal_character "'")) - (terminal second_terminal_character "19" (diff terminal_character "\"")) - (terminal special_sequence "20" (seq "?" (star special_sequence_character) "?")) - (terminal special_sequence_character "21" (diff terminal_character "?")) - (terminal terminal_character "22" + (terminal first_terminal_character (diff terminal_character "'")) + (terminal second_terminal_character (diff terminal_character "\"")) + (terminal special_sequence (seq "?" (star special_sequence_character) "?")) + (terminal special_sequence_character (diff terminal_character "?")) + (terminal terminal_character (alt letter decimal_digit concatenate_symbol defining_symbol definition_separator_symbol end_comment_symbol end_group_symbol end_option_symbol end_repeat_symbol except_symbol first_quote_symbol repetition_symbol second_quote_symbol special_sequence_symbol start_comment_symbol start_group_symbol start_option_symbol start_repeat_symbol terminator_symbol other_character )) - (terminal other_character "23" (alt (range ":+_%@&$<>^` ̃#x20#x23") "\\")) - (terminal gap_separator "24" (range "#x9#xa#xb#xc#xd#x20")) - (terminal empty "25" (seq ())) - (terminal defining_symbol "26" (alt "=" ":")) - (terminal definition_separator_symbol "27" (alt "|" "/" "!")) - (terminal terminator_symbol "28" (alt ";" ".")) - (terminal start_option_symbol "29" (alt "[" "(/")) - (terminal end_option_symbol "30" (alt "]" "/)")) - (terminal start_repeat_symbol "31" (alt "{" "(:")) - (terminal end_repeat_symbol "32" (alt "}" ":)")) - (terminal gap_free_symbol "33" - (alt (diff terminal_character (range "'\"")) terminal_string)) - (terminal repetition_symbol "34" (seq "*")) - (terminal except_symbol "35" (seq "-")) - (terminal concatenate_symbol "36" (seq ",")) - (terminal first_quote_symbol "37" (seq "'")) - (terminal second_quote_symbol "38" (seq "\"")) - (terminal start_comment_symbol "39" (seq "(*")) - (terminal end_comment_symbol "40" (seq "*)")) - (terminal start_group_symbol "41" (seq "(")) - (terminal end_group_symbol "42" (seq ")")) - (terminal special_sequence_symbol "43" (seq "?"))) + (terminal other_character (alt (range ":+_%@&$<>^` ̃#x20#x23") "\\")) + (terminal gap_separator (range "#x9#xa#xb#xc#xd#x20")) + (pass _pass (plus gap_separator)) + (terminal empty (seq ())) + (terminal defining_symbol (alt "=" ":")) + (terminal definition_separator_symbol (alt "|" "/" "!")) + (terminal terminator_symbol (alt ";" ".")) + (terminal start_option_symbol (alt "[" "(/")) + (terminal end_option_symbol (alt "]" "/)")) + (terminal start_repeat_symbol (alt "{" "(:")) + (terminal end_repeat_symbol (alt "}" ":)")) + (terminal gap_free_symbol (alt (diff terminal_character (range "'\"")) terminal_string)) + (terminal repetition_symbol (seq "*")) + (terminal except_symbol (seq "-")) + (terminal concatenate_symbol (seq ",")) + (terminal first_quote_symbol (seq "'")) + (terminal second_quote_symbol (seq "\"")) + (terminal start_comment_symbol (seq "(*")) + (terminal end_comment_symbol (seq "*)")) + (terminal start_group_symbol (seq "(")) + (terminal end_group_symbol (seq ")")) + (rule syntax (star syntax_rule))) diff --git a/examples/isoebnf/meta.rb b/examples/isoebnf/meta.rb index c903214..fa77c7e 100644 --- a/examples/isoebnf/meta.rb +++ b/examples/isoebnf/meta.rb @@ -2,68 +2,68 @@ # Derived from iso-ebnf.ebnf module ISOEBNFMeta RULES = [ - EBNF::Rule.new(:syntax, "1", [:star, :syntax_rule]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:syntax_rule, "2", [:seq, :meta_identifier, :defining_symbol, :definitions_list, :terminator_symbol]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:definitions_list, "3", [:seq, :single_definition, :_definitions_list_1]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_definitions_list_1, "3.1", [:star, :_definitions_list_2]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_definitions_list_2, "3.2", [:seq, :definition_separator_symbol, :definitions_list]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:single_definition, "4", [:seq, :term, :_single_definition_1]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_single_definition_1, "4.1", [:star, :_single_definition_2]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_single_definition_2, "4.2", [:seq, ",", :term]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:term, "5", [:seq, :factor, :_term_1]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_term_1, "5.1", [:opt, :_term_2]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_term_2, "5.2", [:seq, "-", :exception]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:exception, "6", [:seq, :factor]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:factor, "7", [:seq, :_factor_1, :primary]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_factor_1, "7.1", [:opt, :_factor_2]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_factor_2, "7.2", [:seq, :integer, "*"]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:primary, "8", [:alt, :optional_sequence, :repeated_sequence, :special_sequence, :grouped_sequence, :meta_identifier, :terminal_string, :empty]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:optional_sequence, "9", [:seq, :start_option_symbol, :definitions_list, :end_option_symbol]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:repeated_sequence, "10", [:seq, :start_repeat_symbol, :definitions_list, :end_repeat_symbol]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:grouped_sequence, "11", [:seq, "(", :definitions_list, ")"]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:letter, "12", [:range, "a-zA-Z"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:decimal_digit, "13", [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:integer, "14", [:plus, :decimal_digit], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:meta_identifier, "15", [:seq, :letter, :_meta_identifier_1], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_meta_identifier_1, "15.1", [:star, :meta_identifier_character]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:meta_identifier_character, "16", [:alt, :letter, :decimal_digit, "_"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:terminal_string, "17", [:alt, :_terminal_string_1, :_terminal_string_2], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_terminal_string_1, "17.1", [:seq, "'", :_terminal_string_3, "'"]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_terminal_string_3, "17.3", [:plus, :first_terminal_character]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_terminal_string_2, "17.2", [:seq, "\"", :_terminal_string_4, "\""]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_terminal_string_4, "17.4", [:plus, :second_terminal_character]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:first_terminal_character, "18", [:diff, :terminal_character, "'"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:second_terminal_character, "19", [:diff, :terminal_character, "\""], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:special_sequence, "20", [:seq, "?", :_special_sequence_1, "?"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_special_sequence_1, "20.1", [:star, :special_sequence_character]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:special_sequence_character, "21", [:diff, :terminal_character, "?"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:terminal_character, "22", [:alt, :letter, :decimal_digit, :concatenate_symbol, :defining_symbol, :definition_separator_symbol, :end_comment_symbol, :end_group_symbol, :end_option_symbol, :end_repeat_symbol, :except_symbol, :first_quote_symbol, :repetition_symbol, :second_quote_symbol, :special_sequence_symbol, :start_comment_symbol, :start_group_symbol, :start_option_symbol, :start_repeat_symbol, :terminator_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:other_character, "23", [:alt, :_other_character_1, "\\"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_other_character_1, "23.1", [:range, ":+_%@&$<>^` ̃#x20#x23"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:gap_separator, "24", [:range, "#x9#xa#xb#xc#xd#x20"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:syntax, nil, [:star, :syntax_rule]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:syntax_rule, nil, [:seq, :meta_identifier, :defining_symbol, :definitions_list, :terminator_symbol]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:definitions_list, nil, [:seq, :single_definition, :_definitions_list_1]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_definitions_list_1, ".1", [:star, :_definitions_list_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_definitions_list_2, ".2", [:seq, :definition_separator_symbol, :definitions_list]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:single_definition, nil, [:seq, :term, :_single_definition_1]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_single_definition_1, ".1", [:star, :_single_definition_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_single_definition_2, ".2", [:seq, ",", :term]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:term, nil, [:seq, :factor, :_term_1]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_term_1, ".1", [:opt, :_term_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_term_2, ".2", [:seq, "-", :exception]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:exception, nil, [:seq, :factor]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:factor, nil, [:seq, :_factor_1, :primary]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_factor_1, ".1", [:opt, :_factor_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_factor_2, ".2", [:seq, :integer, "*"]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:primary, nil, [:alt, :optional_sequence, :repeated_sequence, :special_sequence, :grouped_sequence, :meta_identifier, :terminal_string, :empty]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:optional_sequence, nil, [:seq, :start_option_symbol, :definitions_list, :end_option_symbol]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:repeated_sequence, nil, [:seq, :start_repeat_symbol, :definitions_list, :end_repeat_symbol]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:grouped_sequence, nil, [:seq, "(", :definitions_list, ")"]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:letter, nil, [:range, "a-zA-Z"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:decimal_digit, nil, [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:integer, nil, [:plus, :decimal_digit], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:meta_identifier, nil, [:seq, :letter, :_meta_identifier_1], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_meta_identifier_1, ".1", [:star, :meta_identifier_character]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:meta_identifier_character, nil, [:alt, :letter, :decimal_digit, "_"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:terminal_string, nil, [:alt, :_terminal_string_1, :_terminal_string_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_terminal_string_1, ".1", [:seq, "'", :_terminal_string_3, "'"]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_terminal_string_3, ".3", [:plus, :first_terminal_character]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_terminal_string_2, ".2", [:seq, "\"", :_terminal_string_4, "\""]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_terminal_string_4, ".4", [:plus, :second_terminal_character]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:first_terminal_character, nil, [:diff, :terminal_character, "'"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:second_terminal_character, nil, [:diff, :terminal_character, "\""], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:special_sequence, nil, [:seq, "?", :_special_sequence_1, "?"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_special_sequence_1, ".1", [:star, :special_sequence_character]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:special_sequence_character, nil, [:diff, :terminal_character, "?"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:terminal_character, nil, [:alt, :letter, :decimal_digit, :concatenate_symbol, :defining_symbol, :definition_separator_symbol, :end_comment_symbol, :end_group_symbol, :end_option_symbol, :end_repeat_symbol, :except_symbol, :first_quote_symbol, :repetition_symbol, :second_quote_symbol, :special_sequence_symbol, :start_comment_symbol, :start_group_symbol, :start_option_symbol, :start_repeat_symbol, :terminator_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:other_character, nil, [:alt, :_other_character_1, "\\"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_other_character_1, ".1", [:range, ":+_%@&$<>^` ̃#x20#x23"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:gap_separator, nil, [:range, "#x9#xa#xb#xc#xd#x20"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_pass, nil, [:plus, :gap_separator], kind: :pass).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:empty, "25", [:seq, []], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:defining_symbol, "26", [:alt, "=", ":"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:definition_separator_symbol, "27", [:alt, "|", "/", "!"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:terminator_symbol, "28", [:alt, ";", "."], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:start_option_symbol, "29", [:alt, "[", "(/"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:end_option_symbol, "30", [:alt, "]", "/)"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:start_repeat_symbol, "31", [:alt, "{", "(:"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:end_repeat_symbol, "32", [:alt, "}", ":)"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:gap_free_symbol, "33", [:alt, :_gap_free_symbol_1, :terminal_string], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_gap_free_symbol_1, "33.1", [:seq, :_gap_free_symbol_3, :terminal_character]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_gap_free_symbol_3, "33.3", [:not, :_gap_free_symbol_2]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_gap_free_symbol_2, "33.2", [:range, "'\""], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:repetition_symbol, "34", [:seq, "*"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:except_symbol, "35", [:seq, "-"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:concatenate_symbol, "36", [:seq, ","], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:first_quote_symbol, "37", [:seq, "'"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:second_quote_symbol, "38", [:seq, "\""], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:start_comment_symbol, "39", [:seq, "(*"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:end_comment_symbol, "40", [:seq, "*)"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:start_group_symbol, "41", [:seq, "("], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:end_group_symbol, "42", [:seq, ")"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:special_sequence_symbol, "43", [:seq, "?"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:empty, nil, [:seq, []], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:defining_symbol, nil, [:alt, "=", ":"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:definition_separator_symbol, nil, [:alt, "|", "/", "!"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:terminator_symbol, nil, [:alt, ";", "."], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:start_option_symbol, nil, [:alt, "[", "(/"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:end_option_symbol, nil, [:alt, "]", "/)"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:start_repeat_symbol, nil, [:alt, "{", "(:"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:end_repeat_symbol, nil, [:alt, "}", ":)"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:gap_free_symbol, nil, [:alt, :_gap_free_symbol_1, :terminal_string], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_gap_free_symbol_1, ".1", [:seq, :_gap_free_symbol_3, :terminal_character]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_gap_free_symbol_3, ".3", [:not, :_gap_free_symbol_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_gap_free_symbol_2, ".2", [:range, "'\""], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:repetition_symbol, nil, [:seq, "*"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:except_symbol, nil, [:seq, "-"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:concatenate_symbol, nil, [:seq, ","], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:first_quote_symbol, nil, [:seq, "'"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:second_quote_symbol, nil, [:seq, "\""], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:start_comment_symbol, nil, [:seq, "(*"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:end_comment_symbol, nil, [:seq, "*)"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:start_group_symbol, nil, [:seq, "("], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:end_group_symbol, nil, [:seq, ")"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:special_sequence_symbol, nil, [:seq, "?"], kind: :terminal).extend(EBNF::PEG::Rule), ] end diff --git a/lib/ebnf/parser.rb b/lib/ebnf/parser.rb index df3b805..360b2c8 100644 --- a/lib/ebnf/parser.rb +++ b/lib/ebnf/parser.rb @@ -44,7 +44,7 @@ def eachRule(scanner) yield r unless r.empty? @lineno = cur_lineno r = s - when s = scanner.scan(/(?:\[[\w\.]+\])\s*[\w\.]+\s*::=/) + when s = scanner.scan(EBNF::Terminals::LHS) # Found rule start, if we've already collected a rule, yield it yield r unless r.empty? #debug("eachRule(rule)") { "[#{cur_lineno}] #{s.inspect}" } diff --git a/lib/ebnf/peg/rule.rb b/lib/ebnf/peg/rule.rb index 2d82e65..d24dc99 100644 --- a/lib/ebnf/peg/rule.rb +++ b/lib/ebnf/peg/rule.rb @@ -46,7 +46,7 @@ def parse(input) # otherwise, if regexp = parser.find_terminal_regexp(sym) matched = input.scan(regexp) - result = (matched ? parser.onTerminal(sym, matched) : :unmatched) + result = parser.onTerminal(sym, (matched ? matched : :unmatched)) # Update furthest failure for strings and terminals parser.update_furthest_failure(input.pos, input.lineno, sym) if result == :unmatched parser.packrat[sym][pos] = { diff --git a/lib/ebnf/terminals.rb b/lib/ebnf/terminals.rb index 5b4ca6f..4ad3ea7 100644 --- a/lib/ebnf/terminals.rb +++ b/lib/ebnf/terminals.rb @@ -1,16 +1,17 @@ # encoding: utf-8 # Terminal definitions for the EBNF grammar module EBNF::Terminals - SYMBOL = %r([a-zA-Z0-9_\.]+)u.freeze + SYMBOL_BASE = %r(\b[a-zA-Z0-9_\.]+\b)u.freeze + SYMBOL = %r(#{SYMBOL_BASE}(?!\s*::=))u.freeze HEX = %r(\#x[a-fA-F0-9]+)u.freeze CHAR = %r([\u0009\u000A\u000D\u0020-\uD7FF\u{10000}-\u{10FFFF}])u.freeze R_CHAR = %r([\u0009\u000A\u000D\u0020-\u005C\u005E-\uD7FF\u{10000}-\u{10FFFF}])u.freeze - RANGE = %r(\[(?:(?:#{R_CHAR})\-(?:#{R_CHAR})|(?:#{HEX})-(?:#{HEX}))\])u.freeze + RANGE = %r(\[(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}-#{HEX}))+\])u.freeze ENUM_BASE = %r(\[(?:(?:#{R_CHAR})+|(?:#{HEX})+)\])u.freeze - ENUM = %r((?:#{ENUM_BASE})(?!\s+#{SYMBOL}\s*::=))u.freeze - LHS = %r(\[(?:(?:#{SYMBOL})+\]\s+)?(?:#{SYMBOL})\s*::=)u.freeze - O_RANGE = %r(\[^(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}-#{HEX})\])u.freeze - O_ENUM = %r(\[^(?:#{R_CHAR})+\])u.freeze + ENUM = %r(#{ENUM_BASE}(?!\s+#{SYMBOL_BASE}\s*::=))u.freeze + LHS = %r((?:\[#{SYMBOL_BASE}\])?\s*#{SYMBOL_BASE}\s*::=)u.freeze + O_RANGE = %r(\[^(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}-#{HEX}))+\])u.freeze + O_ENUM = %r(\[^#{R_CHAR}+\])u.freeze STRING1 = %r("[\u0009\u000A\u000D\u0020\u0021\u0023-\uD7FF\u{10000}-\u{10FFFF}]*")u.freeze STRING2 = %r('[\u0009\u000A\u000D\u0020-\u0026\u0028-\uD7FF\u{10000}-\u{10FFFF}]*')u.freeze POSTFIX = %r([?*+])u.freeze diff --git a/spec/parser_spec.rb b/spec/parser_spec.rb index 0edd68b..924a2b6 100644 --- a/spec/parser_spec.rb +++ b/spec/parser_spec.rb @@ -20,6 +20,23 @@ expect(ebnf(:ruleParts, input).to_sxp).to produce(expected, @debug) end end + + context "without rule identifiers" do + { + %{Prolog ::= BaseDecl? PrefixDecl*} => + %{(rule Prolog (seq (opt BaseDecl) (star PrefixDecl)))}, + %{declaration ::= '@terminals' | '@pass'} => + %{(rule declaration (alt "@terminals" "@pass"))}, + %{postfix ::= primary ( [?*+] )?} => + %{(rule postfix (seq primary (opt (range "?*+"))))}, + %{STRING2 ::= "'" (CHAR - "'")* "'"} => + %{(terminal STRING2 (seq "'" (star (diff CHAR "'")) "'"))}, + }.each do |input, expected| + it "given #{input.inspect} produces #{expected}" do + expect(ebnf(:ruleParts, input).to_sxp).to produce(expected, @debug) + end + end + end end describe "#expression" do diff --git a/spec/rule_spec.rb b/spec/rule_spec.rb index d6c7115..6a1ae44 100644 --- a/spec/rule_spec.rb +++ b/spec/rule_spec.rb @@ -870,8 +870,8 @@ HEX: ["#x"], ENUM: ["[", :HEX, :LHS], O_ENUM: ["[^", :HEX], - RANGE: ["[", :HEX], - O_RANGE: ["[^", :HEX], + RANGE: ["["], + O_RANGE: ["[^"], STRING1: ['"'], STRING2: ["'"], CHAR: ["#x9#xA#xD", "#x20-#xD7FF", "#xE000-#xFFFD", "#x10000-#x10FFFF"], From aa5804999d5aee3583b8f146fd3006f8b3db498f Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Sun, 28 Jun 2020 17:23:56 -0700 Subject: [PATCH 08/50] When creating sub-rules, don't add an id if non existed before. --- lib/ebnf/rule.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ebnf/rule.rb b/lib/ebnf/rule.rb index c37f115..d860e34 100644 --- a/lib/ebnf/rule.rb +++ b/lib/ebnf/rule.rb @@ -660,7 +660,7 @@ def cclass(txt) def make_sym_id(variation = nil) @id_seq ||= 0 @id_seq += 1 - ["_#{@sym}_#{@id_seq}#{variation}".to_sym, "#{@id}.#{@id_seq}#{variation}"] + ["_#{@sym}_#{@id_seq}#{variation}".to_sym, ("#{@id}.#{@id_seq}#{variation}" if @id)] end end end \ No newline at end of file From 2464b79e3b9c7f54f857c4517376406b7bd1c781 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Sun, 28 Jun 2020 17:24:59 -0700 Subject: [PATCH 09/50] In PEG::Rule#rept, check for length before looking at another value. --- lib/ebnf/peg/rule.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ebnf/peg/rule.rb b/lib/ebnf/peg/rule.rb index d24dc99..dcda348 100644 --- a/lib/ebnf/peg/rule.rb +++ b/lib/ebnf/peg/rule.rb @@ -213,7 +213,7 @@ def rept(input, min, max, prod) when Symbol rule = parser.find_rule(prod) raise "No rule found for #{prod}" unless rule - while (res = rule.parse(input)) != :unmatched && (max == '*' || result.length < max) + while (max == '*' || result.length < max) && (res = rule.parse(input)) != :unmatched eat_whitespace(input) unless terminal? result << res end From b1fde9a478720d08c0e45b79e0a5af8e21b1e0f1 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Sun, 28 Jun 2020 17:25:50 -0700 Subject: [PATCH 10/50] In PEG::Parser, allow a string to be specified for whitespace, which allows an exact match (or no match, if the string is empty). --- lib/ebnf/peg/parser.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/ebnf/peg/parser.rb b/lib/ebnf/peg/parser.rb index 84b38cf..e9fd664 100644 --- a/lib/ebnf/peg/parser.rb +++ b/lib/ebnf/peg/parser.rb @@ -210,6 +210,7 @@ def parse(input = nil, start = nil, rules = nil, **options, &block) @whitespace = case options[:whitespace] when Regexp then options[:whitespace] when Symbol then @rules[options[:whitespace]] + else options[:whitespace] end || @rules.values.detect(&:pass?) || /(?:\s|(?:#[^x][^\n\r]*))+/m.freeze From 7aff9de506d053f16fcab205a884b8bd290645b9 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Mon, 29 Jun 2020 11:14:13 -0700 Subject: [PATCH 11/50] ABNF parser example. --- README.md | 34 +- bin/ebnf | 5 - examples/abnf/README.md | 147 +++ examples/abnf/Rakefile | 38 + examples/abnf/abnf-core.ebnf | 52 + examples/abnf/abnf.abnf | 121 ++ examples/abnf/abnf.ebnf | 124 +++ examples/abnf/abnf.peg.sxp | 105 ++ examples/abnf/abnf.sxp | 44 + examples/abnf/core.rb | 23 + examples/abnf/doc/layout.mustache | 491 +++++++++ examples/abnf/doc/parser.html | 1155 ++++++++++++++++++++ examples/abnf/examples/1star.abnf | 1 + examples/abnf/examples/postal-address.abnf | 2 +- examples/abnf/meta.rb | 111 ++ examples/abnf/parse | 53 + examples/abnf/parser.rb | 269 +++++ examples/ebnf-ll1-parser/README.md | 30 +- examples/ebnf-peg-parser/README.md | 19 +- examples/isoebnf/README.md | 21 +- lib/ebnf/peg/parser.rb | 2 +- lib/ebnf/rule.rb | 17 + 22 files changed, 2809 insertions(+), 55 deletions(-) create mode 100644 examples/abnf/README.md create mode 100644 examples/abnf/Rakefile create mode 100644 examples/abnf/abnf-core.ebnf create mode 100644 examples/abnf/abnf.abnf create mode 100644 examples/abnf/abnf.ebnf create mode 100644 examples/abnf/abnf.peg.sxp create mode 100644 examples/abnf/abnf.sxp create mode 100644 examples/abnf/core.rb create mode 100644 examples/abnf/doc/layout.mustache create mode 100644 examples/abnf/doc/parser.html create mode 100644 examples/abnf/examples/1star.abnf create mode 100644 examples/abnf/meta.rb create mode 100755 examples/abnf/parse create mode 100644 examples/abnf/parser.rb diff --git a/README.md b/README.md index 9f03f99..e1f81d2 100644 --- a/README.md +++ b/README.md @@ -9,10 +9,17 @@ ## Description This is a [Ruby][] implementation of an [EBNF][] and [BNF][] parser and parser generator. +### [PEG][]/[Packrat][] Parser +In the primary mode, it supports a Parsing Expression Grammar ([PEG][]) parser generator. This performs more minmal transformations on the parsed grammar to extract sub-productions, which allows each component of a rule to generate its own parsing event. + +The resulting {EBNF::PEG::Rule} objects then parse each associated rule according to the operator semantics and use a [Packrat][] memoizer to reduce extra work when backtracking. + +These rules are driven using the {EBNF::PEG::Parser} module which calls invokes the starting rule and ensures that all input is consumed. + ### LL(1) Parser -In one mode, it parses [EBNF][] grammars to [BNF][], generates [First/Follow][] and Branch tables for [LL(1)][] grammars, which can be used with the stream [Tokenizer][] and [LL(1) Parser][]. +In another mode, it parses [EBNF][] grammars to [BNF][], generates [First/Follow][] and Branch tables for [LL(1)][] grammars, which can be used with the stream [Tokenizer][] and [LL(1) Parser][]. -As LL(1) grammars operate using `alt` and `seq` primitives, allowing for a match on alternative productions or a sequence of productions, generating a parser requires turning the EBNF rules into BNF: +As LL(1) grammars operate using `alt` and `seq` primitives, allowing for a match on alternative productions or a sequence of productions, generating a parser requires turning the [EBNF][] rules into [BNF][]: * Transform `a ::= b?` into `a ::= _empty | b` * Transform `a ::= b+` into `a ::= b b*` @@ -29,9 +36,6 @@ The _exception operator_ (`A - B`) is only supported on terminals. See {EBNF::LL1} and {EBNF::LL1::Parser} for further information. -### [PEG][]/[Packrat][] Parser -An additional Parsing Expression Grammar ([PEG][]) parser generator is also supported. This performs more minmal transformations on the parsed grammar to extract sub-productions, which allows each component of a rule to generate its own parsing event. - ## Usage ### Parsing an EBNF Grammar @@ -39,19 +43,19 @@ An additional Parsing Expression Grammar ([PEG][]) parser generator is also supp ebnf = EBNF.parse(File.open('./etc/ebnf.ebnf')) -Output rules and terminals as S-Expressions, Turtle, HTML or BNF +Output rules and terminals as [S-Expressions][S-Expression], [Turtle][], HTML or [BNF][] puts ebnf.to_sxp puts ebnf.to_ttl puts ebnf.to_html puts ebnf.to_s -Transform EBNF to PEG (generates sub-rules for embedded expressions) and the RULES table as Ruby for parsing grammars: +Transform [EBNF][] to [PEG][] (generates sub-rules for embedded expressions) and the RULES table as Ruby for parsing grammars: ebnf.make_peg ebnf.to_ruby -Transform EBNF to BNF (generates sub-rules using `alt` or `seq` from `plus`, `star` or `opt`) +Transform [EBNF][] to [BNF][] (generates sub-rules using `alt` or `seq` from `plus`, `star` or `opt`) ebnf.make_bnf @@ -87,7 +91,7 @@ which can also be proceeded by an optional number enclosed in square brackets to [1] symbol ::= expression -Symbols are written with an initial capital letter if they are the start symbol of a regular language (terminals), otherwise with an initial lowercase letter (non-terminals). Literal strings are quoted. +Symbols are written in CAPITAL CASE if they are the start symbol of a regular language (terminals), otherwise with they are treated as non-terminal rules. Literal strings are quoted. Within the expression on the right-hand side of a rule, the following expressions are used to match strings of one or more characters: @@ -132,10 +136,10 @@ Within the expression on the right-hand side of a rule, the following expression * `@pass` defines the expression used to detect whitespace, which is removed in processing. * No support for `wfc` (well-formedness constraint) or `vc` (validity constraint). -Parsing this grammar yields an S-Expression version: {file:etc/ebnf.sxp} (or [LL(1)][] version {file:etc/ebnf.ll1.sxp} or [PEG][] version {file:etc/ebnf.peg.sxp}). +Parsing this grammar yields an [S-Expression][] version: {file:etc/ebnf.sxp} (or [LL(1)][] version {file:etc/ebnf.ll1.sxp} or [PEG][] version {file:etc/ebnf.peg.sxp}). ### Parser S-Expressions -Intermediate representations of the grammar may be serialized to Lisp-like S-Expressions. For example, the rule +Intermediate representations of the grammar may be serialized to Lisp-like [S-Expressions][S-Expression]. For example, the rule [1] ebnf ::= (declaration | rule)* @@ -192,7 +196,9 @@ For an example parser built using this gem that parses the [EBNF][] grammar, see There is also an [EBNF LL(1) Parser example](https://dryruby.github.io/ebnf/examples/ebnf-peg-parser/doc/parser.html). -The [ISO EBNF Parser](https://dryruby.github.io/ebnf/examples/iso-ebnf/doc/parser.html) example parses [ISO/IEC 14977][] into S-Expressions, which can be used to parse compatible grammars using this parser (either PEG or LL(1)). +The [ISO EBNF Parser](https://dryruby.github.io/ebnf/examples/isoebnf/doc/parser.html) example parses [ISO/IEC 14977][] into [S-Expressions][S-Expression], which can be used to parse compatible grammars using this parser (either [PEG][] or [LL(1)][]). + +The [ABNF Parser](https://dryruby.github.io/ebnf/examples/abnf/doc/parser.html) example parses [ABNF][] into [S-Expressions][S-Expression], which can be used to parse compatible grammars using this [PEG][] parser. ## Acknowledgements Much of this work, particularly the generic parser, is inspired by work originally done by @@ -237,6 +243,7 @@ A copy of the [Turtle EBNF][] and derived parser files are included in the repos [YARD]: https://yardoc.org/ [YARD-GS]: https://rubydoc.info/docs/yard/file/docs/GettingStarted.md [PDD]: https://lists.w3.org/Archives/Public/public-rdf-ruby/2010May/0013.html +[ABNF]: https://www.rfc-editor.org/rfc/rfc5234 [BNF]: https://en.wikipedia.org/wiki/Backus–Naur_form [EBNF]: https://www.w3.org/TR/REC-xml/#sec-notation [EBNF doc]: https://rubydoc.info/github/dryruby/ebnf @@ -245,9 +252,10 @@ A copy of the [Turtle EBNF][] and derived parser files are included in the repos [LL(1)]: https://www.csd.uwo.ca/~moreno//CS447/Lectures/Syntax.html/node14.html [LL(1) Parser]: https://en.wikipedia.org/wiki/LL_parser [Logger]: https://ruby-doc.org/stdlib-2.4.0/libdoc/logger/rdoc/Logger.html +[S-expression]: https://en.wikipedia.org/wiki/S-expression [Tokenizer]: https://en.wikipedia.org/wiki/Lexical_analysis#Tokenizer +[Turtle]: https://www.w3.org/TR/2012/WD-turtle-20120710/ [Turtle EBNF]: https://dvcs.w3.org/hg/rdf/file/default/rdf-turtle/turtle.bnf [Packrat]: https://pdos.csail.mit.edu/~baford/packrat/thesis/ [PEG]: https://en.wikipedia.org/wiki/Parsing_expression_grammar -[Treetop]: https://rubygems.org/gems/treetop [Haml]: https://rubygems.org/gems/haml diff --git a/bin/ebnf b/bin/ebnf index bb6ad36..d3a1720 100755 --- a/bin/ebnf +++ b/bin/ebnf @@ -70,11 +70,6 @@ opts.each do |opt, arg| end end -if options[:output_format] == :rb && !(options[:ll1] || options[:peg]) - STDERR.puts "outputing in .rb format requires --ll or --peg" - exit(1) -end - input = File.open(ARGV[0]) if ARGV[0] ebnf = EBNF.parse(input || STDIN, **options) diff --git a/examples/abnf/README.md b/examples/abnf/README.md new file mode 100644 index 0000000..1eec76c --- /dev/null +++ b/examples/abnf/README.md @@ -0,0 +1,147 @@ +# IETF ABNF Parser example + +This example implements an [ABNF][] parser which parses compatible grammars into [S-Expressions][S-Expression]. This allows the resulting [S-Expressions][] to drive a PEG Parser to parser documents defined using [ABNF][]. + +## Parsing the Grammar + + require 'ebnf' + + abnf = ABNFParser.new(File.open("abnf.abnf")) + +Output rules and terminals as [S-Expression][S-Expression]: + + puts abnf.to_sxp + +This generates a [S-Expression][] form of the grammar suitable for use by {EBNF}. + + ( + (rule rulelist (plus (alt rule (seq (star c-wsp) c-nl)))) + (rule rule (seq rulename defined-as elements c-nl)) + (rule rulename (seq ALPHA (star (alt ALPHA DIGIT "-")))) + (rule defined-as (seq (star c-wsp) (alt "=" "=/") (star c-wsp))) + (rule elements (seq alternation (star c-wsp))) + (rule c-wsp (alt WSP (seq c-nl WSP))) + (rule c-nl (alt comment CRLF)) + (rule comment (seq ";" (star (alt WSP VCHAR)) CRLF)) + (rule alternation + (seq concatenation (star (seq (star c-wsp) "/" (star c-wsp) concatenation)))) + (rule concatenation (seq repetition (star (seq (plus c-wsp) repetition)))) + (rule repetition (seq (opt repeat) element)) + (rule repeat (alt (seq (star DIGIT) "*" (star DIGIT)) (plus DIGIT))) + (rule element (alt rulename group option char-val num-val prose-val)) + (rule group (seq "(" (star c-wsp) alternation (star c-wsp) ")")) + (rule option (seq "[" (star c-wsp) alternation (star c-wsp) "]")) + (rule char-val (alt case-insensitive-string case-sensitive-string)) + (rule case-insensitive-string (seq (opt "%i") quoted-string)) + (rule case-sensitive-string (seq "%s" quoted-string)) + (rule quoted-string + (seq DQUOTE (star (alt (range "#x20-#x21") (range "#x23-#x7e"))) DQUOTE)) + (rule num-val (seq "%" (alt bin-val dec-val hex-val))) + (rule bin-val (seq "b" (plus BIT) (opt (alt (plus (seq "." (plus BIT))) (seq "-" (plus BIT)))))) + (rule dec-val + (seq "d" (plus DIGIT) (opt (alt (plus (seq "." (plus DIGIT))) (seq "-" (plus DIGIT)))))) + (rule hex-val + (seq "x" (plus HEXDIG) (opt (alt (plus (seq "." (plus HEXDIG))) (seq "-" (plus HEXDIG)))))) + (rule prose-val (seq "<" (star (alt (range "#x20-#x3d") (range "#x3f-#x7e"))) ">")) + (terminal ALPHA (alt (range "#x41-#x5a") (range "#x61-#x7a"))) + (terminal BIT (alt "0" "1")) + (terminal CHAR (range "#x1-#x7f")) + (terminal CR (hex "#x0D")) + (terminal CRLF (seq (opt CR) LF)) + (terminal CTL (alt (range "#x0-#x1f") (hex "#x7F"))) + (terminal DIGIT (range "#x30-#x39")) + (terminal DQUOTE (hex "#x22")) + (terminal HEXDIG (alt DIGIT "A" "B" "C" "D" "E" "F")) + (terminal HTAB (hex "#x09")) + (terminal LF (hex "#x0A")) + (terminal LWSP (star (alt WSP (seq CRLF WSP)))) + (terminal OCTET (range "#x0-#xff")) + (terminal SP (hex "#x20")) + (terminal VCHAR (range "#x21-#x7e")) + (terminal WSP (alt SP HTAB))) + +This can then be used as input to {EBNF.parse} to transform ABNF to PEG for parsing examples of the grammar using {EBNF::PEG::Parser}. + + ebnf --input-format sxp --peg abnf.sxp -o abnf.peg.sxp + +Note, however, that ABNF doesn't distinguish between terminal rules and non-terminal rules, so all rules are parsed as non-terminal rules with strings the only terminals. + +When parsing files with this grammar, rules that are all capitalized _will_ be treated as terminal productions, although this is an proprietary interpretation of the specification. + +## Example Walkthrough + +This example uses the [EBNF][] grammar from {file:abnf.ebnf} to generate {file:meta}, which includes the resulting `RULES` table, used by {file:parser} to implement a parser for the grammar. It also uses {file:abnf-core.ebnf} to create {file:core} which contains the core ABNF terminals useable by ABNF grammars without specifically defining them. + +The first step is defining regular expressions for terminals used within the grammar. Note that the parser can operate without terminal definitions, but this can greatly improve parser performance. + +The {file:parser} is implemented using the {ABNFParser} class, which includes {EBNF::PEG::Parser}. + +### Parser basics +The parser operates directly using the rules from the abstract syntax tree generated by turning the original [EBNF][] grammar using {EBNF::PEG#make_peg}. Tokens are derived from terminal rules defined in the grammar or contained inline through non-terminal rule definitions. Terminals are either strings, which must be matched exactly, or symbols, which identify a regular expression used to match the terminal and yield a token. The association between terminal symbols and their regular expressions along with processing rules to invoke when they are identified are described in [Terminal definitions](#Terminal_definitions). + +The parser starts with the specified rule, `rulelist` in this case, and executes that rule, which is expected to completely parse the input file potentially leaving some whitespace. + +Non-terminal rules have an expression using one of the following: + +`seq` +: A sequence of rules or terminals. If any (other than `opt` or `star`) to not parse, the rule is terminated as unmatched. +`opt` +: An optional rule or terminal. It either results in the matching rule or returns `nil`. +`alt` +: A list of alternative rules, which are attempted in order. It terminates with the first matching rule, or is terminated as unmatched, if no such rule is found. +`plus` +: A sequence of one or more of the matching rule. If there is no such rule, it is terminated as unmatched; otherwise, the result is an array containing all matched input. +`rept m n` +: A sequence of at lest `m` and at most `n` of the matching rule. It will always return an array. +`star` +: A sequence of zero or more of the matching rule. It will always return an array. + +The starting rule is of the form `(rule rulelist (plus (alt rule (seq (star c_wsp) c_nl))))` which will attempt to parse the aliteration repeatedly until the end of input. + +If a rule matches, it enters a _production_, which may invoke a _start production before matching is attempted, and will call any _production_ either if matched, or unmatched. In the case of this parser, the _start production_ is used to declare the `as_hash` option on sequences, which causes the matched values to be represented using a Hash, rather than an array of hashes for each element of the sequence, which is the default behavior. The _production_ may choose to evaluate the returned abstract syntax tree to simplify the result, or create some semantic representation of that value. + +Due to the nature of [PEG][] parsers, the same rule may be attempted at the same input location many times; this is optimized by use of a [Packrat][] memoizing cache, which remembers the result of a previous successful evaluation and short-circuits further execution. + +Processing continues by continuing to look for productions sequence and pushing those productions onto the stack. When a production is complete, any associated _production handler_ is invoked, after popping off the top of the `prod_data` stack. The just removed hash is passed as `current` to the _production handler_. This is typically where the work of the parser happens. See [Production definitions](#Production_definitions) for more information. + +### Terminal definitions +The {file:parser} uses a DSL to specify `terminals` and `productions` associated with rules in the grammar. Each `terminal` specifies the rule name, associated regular expression, and a block which is invoked when the parser recognizes the terminal: + + terminal(:quoted_string, /"[\x20-\x21\x23-\x7E]*"/) do |value| + value[1..-2] + end + +In this terminal definition, the `quoted_string` is treated as a terminal and is recognized using the `/"[\x20-\x21\x23-\x7E]*"/` regular expression. When found, the value of the string is returned, minus the beginning and ending double-quote characters, for use by productions which include it. + +### Production definitions +Looking at the grammar itself, we can see that the first declaration is + + rulelist ::= ( rule | (c_wsp* c_nl) )+ + +which corresponds to the original ABNF rule + + rulelist = 1*( rule / (*c-wsp c-nl) ) + +In some cases, [ABNF][] can represent expressions not directly available in [EBNF][], such as the following portion from {file:examples/postal-address.abnf}: + + house-num = 1*8(DIGIT / ALPHA) + +In [EBNF][], this would be represented more explicitly: + + house_num ::= (DIGIT / ALPHA) + ((DIGIT / ALPHA) + ((DIGIT / ALPHA) + ((DIGIT / ALPHA) + {(DIGIT / ALPHA) + (DIGIT / ALPHA) + ((DIGIT / ALPHA) + (DIGIT / ALPHA)?)?}?)?)?)? + +However, there is a `rept` [S-Expression][] that represents the min/max repeating capability more directly. + + (rule house_num (rept 1 8 (alt DIGIT ALPHA))) + +[EBNF]: https://www.w3.org/TR/REC-xml/#sec-notation +[Packrat]: https://pdos.csail.mit.edu/~baford/packrat/thesis/ +[ABNF]: https://www.rfc-editor.org/rfc/rfc5234 +[S-expression]: https://en.wikipedia.org/wiki/S-expression diff --git a/examples/abnf/Rakefile b/examples/abnf/Rakefile new file mode 100644 index 0000000..fb20295 --- /dev/null +++ b/examples/abnf/Rakefile @@ -0,0 +1,38 @@ +task default: ['abnf.sxp', 'abnf.peg.sxp', "meta.rb", "core.rb", :doc] + +file "meta.rb" => "abnf.ebnf" do |t| + sh %{ + ebnf --peg --format rb \ + --mod-name ABNFMeta \ + --output meta.rb \ + #{t.prerequisites.first} + } +end + +file "core.rb" => "abnf-core.ebnf" do |t| + sh %{ + ebnf --format rb \ + --mod-name ABNFCore \ + --output core.rb \ + #{t.prerequisites.first} + } +end + +file 'abnf.sxp' => "abnf.ebnf" do |t| + sh %{ + ebnf --output abnf.sxp #{t.prerequisites.first} + } +end + +file 'abnf.peg.sxp' => "abnf.ebnf" do |t| + sh %{ + ebnf --peg --output abnf.peg.sxp #{t.prerequisites.first} + } +end + +desc "Generate literal documentation for parser" +task doc: %w(doc/parser.html) + +file "doc/parser.html" => "parser.rb" do + `rocco -t doc/layout.mustache parser.rb -o doc` +end diff --git a/examples/abnf/abnf-core.ebnf b/examples/abnf/abnf-core.ebnf new file mode 100644 index 0000000..5856444 --- /dev/null +++ b/examples/abnf/abnf-core.ebnf @@ -0,0 +1,52 @@ +# Core terminals available in uses of ABNF +ALPHA ::= [#x41-#x5A#x61-#x7A] # A-Z | a-z + +BIT ::= '0' | '1' + +CHAR ::= [#x01-#x7F] + # any 7-bit US-ASCII character, + # excluding NUL +CR ::= #x0D + # carriage return + +CRLF ::= CR? LF + # Internet standard newline + +CTL ::= [#x00-#x1F] | #x7F + # controls + +DIGIT ::= [#x30-#x39] + # 0-9 + +DQUOTE ::= #x22 + # " (Double Quote) + +HEXDIG ::= DIGIT | [A-F] + +HTAB ::= #x09 + # horizontal tab + +LF ::= #x0A + # linefeed + +LWSP ::= (WSP | CRLF WSP)* + # Use of this linear-white-space rule + # permits lines containing only white + # space that are no longer legal in + # mail headers and have caused + # interoperability problems in other + # contexts. + # Do not use when defining mail + # headers and use with caution in + # other contexts. + +OCTET ::= [#x00-#xFF] + # 8 bits of data + +SP ::= #x20 + +VCHAR ::= [#x21-#x7E] + # visible (printing) characters + +WSP ::= SP | HTAB + # white space diff --git a/examples/abnf/abnf.abnf b/examples/abnf/abnf.abnf new file mode 100644 index 0000000..9acd3fb --- /dev/null +++ b/examples/abnf/abnf.abnf @@ -0,0 +1,121 @@ +rulelist = 1*( rule / (*c-wsp c-nl) ) + +rule = rulename defined-as elements c-nl + ; continues if next line starts + ; with white space + +rulename = ALPHA *(ALPHA / DIGIT / "-") + +defined-as = *c-wsp ("=" / "=/") *c-wsp + ; basic rules definition and + ; incremental alternatives + +elements = alternation *c-wsp + +c-wsp = WSP / (c-nl WSP) + +c-nl = comment / CRLF + ; comment or newline + +comment = ";" *(WSP / VCHAR) CRLF + +alternation = concatenation + *(*c-wsp "/" *c-wsp concatenation) + +concatenation = repetition *(1*c-wsp repetition) + +repetition = [repeat] element + +repeat = (*DIGIT "*" *DIGIT) / 1*DIGIT + +element = rulename / group / option / + char-val / num-val / prose-val + +group = "(" *c-wsp alternation *c-wsp ")" + +option = "[" *c-wsp alternation *c-wsp "]" + +char-val = case-insensitive-string / + case-sensitive-string + +case-insensitive-string = + [ "%i" ] quoted-string + +case-sensitive-string = + "%s" quoted-string + +quoted-string = DQUOTE *(%x20-21 / %x23-7E) DQUOTE + ; quoted string of SP and VCHAR + ; without DQUOTE + +num-val = "%" (bin-val / dec-val / hex-val) + +bin-val = "b" 1*BIT + [ 1*("." 1*BIT) / ("-" 1*BIT) ] + ; series of concatenated bit values + ; or single ONEOF range + +dec-val = "d" 1*DIGIT + [ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ] + +hex-val = "x" 1*HEXDIG + [ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ] + +prose-val = "<" *(%x20-3D / %x3F-7E) ">" + ; bracketed string of SP and VCHAR + ; without angles + ; prose description, to be used as + ; last resort + +ALPHA = %x41-5A / %x61-7A ; A-Z / a-z + +BIT = "0" / "1" + +CHAR = %x01-7F + ; any 7-bit US-ASCII character, + ; excluding NUL +CR = %x0D + ; carriage return + +CRLF = [CR] LF + ; Internet standard newline + ; Extended to allow only newline + +CTL = %x00-1F / %x7F + ; controls + +DIGIT = %x30-39 + ; 0-9 + +DQUOTE = %x22 + ; " (Double Quote) + +HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" + +HTAB = %x09 + ; horizontal tab + +LF = %x0A + ; linefeed + +LWSP = *(WSP / CRLF WSP) + ; Use of this linear-white-space rule + ; permits lines containing only white + ; space that are no longer legal in + ; mail headers and have caused + ; interoperability problems in other + ; contexts. + ; Do not use when defining mail + ; headers and use with caution in + ; other contexts. + +OCTET = %x00-FF + ; 8 bits of data + +SP = %x20 + +VCHAR = %x21-7E + ; visible (printing) characters + +WSP = SP / HTAB + ; white space diff --git a/examples/abnf/abnf.ebnf b/examples/abnf/abnf.ebnf new file mode 100644 index 0000000..2f5ef59 --- /dev/null +++ b/examples/abnf/abnf.ebnf @@ -0,0 +1,124 @@ +rulelist ::= ( rule | (c_wsp* c_nl) )+ + +rule ::= rulename defined_as elements c_nl + # continues if next line starts + # with white space + +elements ::= alternation c_wsp* + +alternation ::= concatenation + (c_wsp* "/" c_wsp* concatenation)* + +concatenation::= repetition (c_wsp+ repetition)* + +repetition ::= repeat? element + +repeat ::= (DIGIT* "*" DIGIT*) | DIGIT+ + +element ::= rulename | group | option | + char_val | num_val | prose_val + +group ::= "(" c_wsp* alternation c_wsp* ")" + +option ::= "[" c_wsp* alternation c_wsp* "]" + +char_val ::= case_insensitive_string | + case_sensitive_string + +case_insensitive_string ::= + "%i"? quoted_string + +case_sensitive_string ::= + "%s" quoted_string + +num_val ::= "%" (bin_val | dec_val | hex_val) + +@terminals + +# Terminals used in ABNF, itself +rulename ::= ALPHA (ALPHA | DIGIT | "-")* + +defined_as ::= c_wsp* ("=" | "=/") c_wsp* + # basic rules definition and + # incremental alternatives + +c_wsp ::= WSP | (c_nl WSP) + +c_nl ::= COMMENT | CRLF + # comment or newline + +comment ::= ";" (WSP | VCHAR)* CRLF + +quoted_string::= DQUOTE [#x20-#x21#x23-#x7E]* DQUOTE + # quoted string of SP and VCHAR + # without DQUOTE + +bin_val ::= "b" BIT+ + (("." BIT+)+ | ("-" BIT+))? + # series of concatenated bit values + # or single ONEOF range + +dec_val ::= "d" DIGIT+ + (("." DIGIT+)+ | ("-" DIGIT+))? + +hex_val ::= "x" HEXDIG+ + (("." HEXDIG+)+ | ("-" HEXDIG+))? + +prose_val ::= "<" [#x20-#x3D#x3F-#x7E]* ">" + # bracketed string of SP and VCHAR + # without angles + # prose description, to be used as + # last resort + +# Core terminals available in uses of ABNF +ALPHA ::= [#x41-#x5A#x61-#x7A] # A-Z | a-z + +BIT ::= '0' | '1' + +CHAR ::= [#x01-#x7F] + # any 7-bit US-ASCII character, + # excluding NUL +CR ::= #x0D + # carriage return + +CRLF ::= CR? LF + # Internet standard newline + +CTL ::= [#x00-#x1F] | #x7F + # controls + +DIGIT ::= [#x30-#x39] + # 0-9 + +DQUOTE ::= #x22 + # " (Double Quote) + +HEXDIG ::= DIGIT | [A-F] + +HTAB ::= #x09 + # horizontal tab + +LF ::= #x0A + # linefeed + +LWSP ::= (WSP | CRLF WSP)* + # Use of this linear-white-space rule + # permits lines containing only white + # space that are no longer legal in + # mail headers and have caused + # interoperability problems in other + # contexts. + # Do not use when defining mail + # headers and use with caution in + # other contexts. + +OCTET ::= [#x00-#xFF] + # 8 bits of data + +SP ::= #x20 + +VCHAR ::= [#x21-#x7E] + # visible (printing) characters + +WSP ::= SP | HTAB + # white space diff --git a/examples/abnf/abnf.peg.sxp b/examples/abnf/abnf.peg.sxp new file mode 100644 index 0000000..3373889 --- /dev/null +++ b/examples/abnf/abnf.peg.sxp @@ -0,0 +1,105 @@ +( + (terminal WSP (alt SP HTAB)) + (rule _rulelist_1 (alt rule _rulelist_2)) + (rule _rulelist_2 (seq _rulelist_3 c_nl)) + (rule _rulelist_3 (star c_wsp)) + (rule rule (seq rulename defined_as elements c_nl)) + (rule elements (seq alternation _elements_1)) + (rule _elements_1 (star c_wsp)) + (rule alternation (seq concatenation _alternation_1)) + (rule _alternation_1 (star _alternation_2)) + (rule _alternation_2 (seq _alternation_3 "/" _alternation_4 concatenation)) + (rule _alternation_3 (star c_wsp)) + (rule _alternation_4 (star c_wsp)) + (rule concatenation (seq repetition _concatenation_1)) + (rule _concatenation_1 (star _concatenation_2)) + (rule _concatenation_2 (seq _concatenation_3 repetition)) + (rule _concatenation_3 (plus c_wsp)) + (rule repetition (seq _repetition_1 element)) + (rule _repetition_1 (opt repeat)) + (rule repeat (alt _repeat_1 _repeat_2)) + (rule _repeat_1 (seq _repeat_3 "*" _repeat_4)) + (rule _repeat_3 (star DIGIT)) + (rule _repeat_4 (star DIGIT)) + (rule _repeat_2 (plus DIGIT)) + (rule element (alt rulename group option char_val num_val prose_val)) + (rule group (seq "(" _group_1 alternation _group_2 ")")) + (rule _group_1 (star c_wsp)) + (rule _group_2 (star c_wsp)) + (rule option (seq "[" _option_1 alternation _option_2 "]")) + (rule _option_1 (star c_wsp)) + (rule _option_2 (star c_wsp)) + (rule char_val (alt case_insensitive_string case_sensitive_string)) + (rule case_insensitive_string (seq _case_insensitive_string_1 quoted_string)) + (rule _case_insensitive_string_1 (opt "%i")) + (rule case_sensitive_string (seq "%s" quoted_string)) + (rule num_val (seq "%" _num_val_1)) + (rule _num_val_1 (alt bin_val dec_val hex_val)) + (terminal rulename (seq ALPHA _rulename_1)) + (rule _rulename_1 (star _rulename_2)) + (rule _rulename_2 (alt ALPHA DIGIT "-")) + (terminal defined_as (seq _defined_as_1 _defined_as_2 _defined_as_3)) + (rule _defined_as_1 (star c_wsp)) + (rule _defined_as_2 (alt "=" "=/")) + (rule _defined_as_3 (star c_wsp)) + (terminal c_wsp (alt WSP _c_wsp_1)) + (rule _c_wsp_1 (seq c_nl WSP)) + (terminal c_nl (alt COMMENT CRLF)) + (terminal comment (seq ";" _comment_1 CRLF)) + (rule _comment_1 (star _comment_2)) + (rule _comment_2 (alt WSP VCHAR)) + (terminal quoted_string (seq DQUOTE _quoted_string_1 DQUOTE)) + (rule _quoted_string_1 (star _quoted_string_2)) + (terminal _quoted_string_2 (range "#x20-#x21#x23-#x7E")) + (terminal bin_val (seq "b" _bin_val_1 _bin_val_2)) + (rule _bin_val_1 (plus BIT)) + (rule _bin_val_2 (opt _bin_val_3)) + (rule _bin_val_3 (alt _bin_val_4 _bin_val_5)) + (rule _bin_val_4 (plus _bin_val_6)) + (rule _bin_val_6 (seq "." _bin_val_7)) + (rule _bin_val_7 (plus BIT)) + (rule _bin_val_5 (seq "-" _bin_val_8)) + (rule _bin_val_8 (plus BIT)) + (terminal dec_val (seq "d" _dec_val_1 _dec_val_2)) + (rule _dec_val_1 (plus DIGIT)) + (rule _dec_val_2 (opt _dec_val_3)) + (rule _dec_val_3 (alt _dec_val_4 _dec_val_5)) + (rule _dec_val_4 (plus _dec_val_6)) + (rule _dec_val_6 (seq "." _dec_val_7)) + (rule _dec_val_7 (plus DIGIT)) + (rule _dec_val_5 (seq "-" _dec_val_8)) + (rule _dec_val_8 (plus DIGIT)) + (terminal hex_val (seq "x" _hex_val_1 _hex_val_2)) + (rule _hex_val_1 (plus HEXDIG)) + (rule _hex_val_2 (opt _hex_val_3)) + (rule _hex_val_3 (alt _hex_val_4 _hex_val_5)) + (rule _hex_val_4 (plus _hex_val_6)) + (rule _hex_val_6 (seq "." _hex_val_7)) + (rule _hex_val_7 (plus HEXDIG)) + (rule _hex_val_5 (seq "-" _hex_val_8)) + (rule _hex_val_8 (plus HEXDIG)) + (terminal prose_val (seq "<" _prose_val_1 ">")) + (rule _prose_val_1 (star _prose_val_2)) + (terminal _prose_val_2 (range "#x20-#x3D#x3F-#x7E")) + (terminal ALPHA (range "#x41-#x5A#x61-#x7A")) + (terminal BIT (alt "0" "1")) + (terminal CHAR (range "#x01-#x7F")) + (terminal CR (hex "#x0D")) + (terminal CRLF (seq _CRLF_1 LF)) + (terminal _CRLF_1 (opt CR)) + (terminal CTL (alt _CTL_1 _CTL_2)) + (terminal _CTL_1 (range "#x00-#x1F")) + (terminal _CTL_2 (hex "#x7F")) + (terminal DIGIT (range "#x30-#x39")) + (terminal DQUOTE (hex "#x22")) + (terminal HEXDIG (alt DIGIT _HEXDIG_1)) + (terminal _HEXDIG_1 (range "A-F")) + (terminal HTAB (hex "#x09")) + (terminal LF (hex "#x0A")) + (terminal LWSP (star _LWSP_1)) + (terminal _LWSP_1 (alt WSP _LWSP_2)) + (terminal _LWSP_2 (seq CRLF WSP)) + (terminal OCTET (range "#x00-#xFF")) + (terminal SP (hex "#x20")) + (terminal VCHAR (range "#x21-#x7E")) + (rule rulelist (plus _rulelist_1))) diff --git a/examples/abnf/abnf.sxp b/examples/abnf/abnf.sxp new file mode 100644 index 0000000..4b96a3e --- /dev/null +++ b/examples/abnf/abnf.sxp @@ -0,0 +1,44 @@ +( + (terminal WSP (alt SP HTAB)) + (rule rule (seq rulename defined_as elements c_nl)) + (rule elements (seq alternation (star c_wsp))) + (rule alternation + (seq concatenation (star (seq (star c_wsp) "/" (star c_wsp) concatenation)))) + (rule concatenation (seq repetition (star (seq (plus c_wsp) repetition)))) + (rule repetition (seq (opt repeat) element)) + (rule repeat (alt (seq (star DIGIT) "*" (star DIGIT)) (plus DIGIT))) + (rule element (alt rulename group option char_val num_val prose_val)) + (rule group (seq "(" (star c_wsp) alternation (star c_wsp) ")")) + (rule option (seq "[" (star c_wsp) alternation (star c_wsp) "]")) + (rule char_val (alt case_insensitive_string case_sensitive_string)) + (rule case_insensitive_string (seq (opt "%i") quoted_string)) + (rule case_sensitive_string (seq "%s" quoted_string)) + (rule num_val (seq "%" (alt bin_val dec_val hex_val))) + (terminal rulename (seq ALPHA (star (alt ALPHA DIGIT "-")))) + (terminal defined_as (seq (star c_wsp) (alt "=" "=/") (star c_wsp))) + (terminal c_wsp (alt WSP (seq c_nl WSP))) + (terminal c_nl (alt COMMENT CRLF)) + (terminal comment (seq ";" (star (alt WSP VCHAR)) CRLF)) + (terminal quoted_string (seq DQUOTE (star (range "#x20-#x21#x23-#x7E")) DQUOTE)) + (terminal bin_val (seq "b" (plus BIT) (opt (alt (plus (seq "." (plus BIT))) (seq "-" (plus BIT)))))) + (terminal dec_val + (seq "d" (plus DIGIT) (opt (alt (plus (seq "." (plus DIGIT))) (seq "-" (plus DIGIT)))))) + (terminal hex_val + (seq "x" (plus HEXDIG) (opt (alt (plus (seq "." (plus HEXDIG))) (seq "-" (plus HEXDIG)))))) + (terminal prose_val (seq "<" (star (range "#x20-#x3D#x3F-#x7E")) ">")) + (terminal ALPHA (range "#x41-#x5A#x61-#x7A")) + (terminal BIT (alt "0" "1")) + (terminal CHAR (range "#x01-#x7F")) + (terminal CR (hex "#x0D")) + (terminal CRLF (seq (opt CR) LF)) + (terminal CTL (alt (range "#x00-#x1F") (hex "#x7F"))) + (terminal DIGIT (range "#x30-#x39")) + (terminal DQUOTE (hex "#x22")) + (terminal HEXDIG (alt DIGIT (range "A-F"))) + (terminal HTAB (hex "#x09")) + (terminal LF (hex "#x0A")) + (terminal LWSP (star (alt WSP (seq CRLF WSP)))) + (terminal OCTET (range "#x00-#xFF")) + (terminal SP (hex "#x20")) + (terminal VCHAR (range "#x21-#x7E")) + (rule rulelist (plus (alt rule (seq (star c_wsp) c_nl))))) diff --git a/examples/abnf/core.rb b/examples/abnf/core.rb new file mode 100644 index 0000000..d4e73d0 --- /dev/null +++ b/examples/abnf/core.rb @@ -0,0 +1,23 @@ +# This file is automatically generated by ebnf version 2.0.0 +# Derived from abnf-core.ebnf +module ABNFCore + RULES = [ + EBNF::Rule.new(:ALPHA, nil, [:range, "#x41-#x5A#x61-#x7A"], kind: :terminal), + EBNF::Rule.new(:BIT, nil, [:alt, "0", "1"], kind: :terminal), + EBNF::Rule.new(:CHAR, nil, [:range, "#x01-#x7F"], kind: :terminal), + EBNF::Rule.new(:CR, nil, [:hex, "#x0D"], kind: :terminal), + EBNF::Rule.new(:CRLF, nil, [:seq, [:opt, :CR], :LF], kind: :terminal), + EBNF::Rule.new(:CTL, nil, [:alt, [:range, "#x00-#x1F"], [:hex, "#x7F"]], kind: :terminal), + EBNF::Rule.new(:DIGIT, nil, [:range, "#x30-#x39"], kind: :terminal), + EBNF::Rule.new(:DQUOTE, nil, [:hex, "#x22"], kind: :terminal), + EBNF::Rule.new(:HEXDIG, nil, [:alt, :DIGIT, [:range, "A-F"]], kind: :terminal), + EBNF::Rule.new(:HTAB, nil, [:hex, "#x09"], kind: :terminal), + EBNF::Rule.new(:LF, nil, [:hex, "#x0A"], kind: :terminal), + EBNF::Rule.new(:LWSP, nil, [:star, [:alt, :WSP, [:seq, :CRLF, :WSP]]], kind: :terminal), + EBNF::Rule.new(:OCTET, nil, [:range, "#x00-#xFF"], kind: :terminal), + EBNF::Rule.new(:SP, nil, [:hex, "#x20"], kind: :terminal), + EBNF::Rule.new(:VCHAR, nil, [:range, "#x21-#x7E"], kind: :terminal), + EBNF::Rule.new(:WSP, nil, [:alt, :SP, :HTAB], kind: :terminal), + ] +end + diff --git a/examples/abnf/doc/layout.mustache b/examples/abnf/doc/layout.mustache new file mode 100644 index 0000000..c62137d --- /dev/null +++ b/examples/abnf/doc/layout.mustache @@ -0,0 +1,491 @@ + + + + + {{ title }} + + + +
+
+ {{#sources?}} +
+ Jump To … +
+
+ {{#sources}} + {{ basename }} + {{/sources}} +
+
+
+ {{/sources?}} + + + + + + + + + {{#sections}} + + + + + {{/sections}} +

{{ title }}

+
+ +
+ {{{ docs }}} +
+
{{{ code }}}
+
+
+ diff --git a/examples/abnf/doc/parser.html b/examples/abnf/doc/parser.html new file mode 100644 index 0000000..5c26cae --- /dev/null +++ b/examples/abnf/doc/parser.html @@ -0,0 +1,1155 @@ + + + + + parser.rb + + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

parser.rb

+
+ +
+

EBNF Parser for EBNF.

+ +

Produces an Abstract Synatx Tree in S-Expression form for the input grammar file

+
+
require 'ebnf'
+require 'ebnf/terminals'
+require 'ebnf/peg/parser'
+require 'core'    # "Core" rules used in the ABNF spec.
+require 'meta'    # "ABNF" rules used for parsing ABNF, itself
+require 'scanf'
+require 'logger'
+
+class ABNFParser
+  include EBNF::PEG::Parser
+
+
+ +
+

Regular expressions for both "Core" and ABNF-specific terminals.

+
+
  ALPHA = %r{[\x41-\x5A\x61-\x7A]}
+  VCHAR = %r{[\x20-\x7E]}
+  WSP = %r{[\x20\x09]}
+  CRLF = %r{\x0D?\x0A}
+  COMMENT = %r{;(?:#{WSP}|#{VCHAR})*#{CRLF}}
+  C_NL = %r{#{COMMENT}|#{CRLF}}
+  C_WSP = %r{#{WSP}|(?:#{C_NL}#{WSP})}
+
+
+ +
+

Hash of generated {EBNF::Rule} objects by symbol

+ +

@return [Hash{Symbol => EBNF::Rule}]

+
+
  attr_reader :parsed_rules
+
+
+ +
+

The following ABNF grammar rules are treated as terminals.

+
+
+
+
+ +
+

rulename ::= ALPHA (ALPHA | DIGIT | "-")*

+
+
  terminal(:rulename, /#{ALPHA}(?:#{ALPHA}|[0-9-])*/) do |value|
+    value.to_sym
+  end
+
+
+ +
+

defined_as ::= c_wsp* ("=" | "=/") c_wsp*

+
+
  terminal(:defined_as, /#{C_WSP}*=\/?#{C_WSP}*/) {|value| value.strip}
+
+
+ +
+

quoted_string::= DQUOTE [#x20-#x21#x23-#x7E]* DQUOTE

+
+
  terminal(:quoted_string, /"[\x20-\x21\x23-\x7E]*"/) do |value|
+    value[1..-2]
+  end
+
+
+ +
+

bin_val ::= "b" BIT+ (("." BIT+)+ | ("-" BIT+))?

+
+
  terminal(:bin_val, /b[01]+(?:(?:(?:\.[01]+)+)|(?:-[01]+))?/) do |value|
+    if value.include?('.')
+
+
+ +
+

Interpret segments in binary creating a string

+
+
      value[1..-1].split('.').map {|b| b.to_i(base=2).chr}.join("")
+    elsif value.include?('-')
+
+
+ +
+

Interpret as a range

+
+
      [:range, value[1..-1].split('-').map {|b| "#x%x" % b.to_i(base=2)}.join("-")]
+    else
+
+
+ +
+

Interpret as a single HEX character

+
+
      [:hex, "#x%x" % value[1..-1].to_i(base=2)]
+    end
+  end
+
+
+ +
+

dec_val ::= "d" DIGIT+ (("." DIGIT+)+ | ("-" DIGIT+))?

+
+
  terminal(:dec_val, /d[0-9]+(?:(?:(?:\.[0-9]+)+)|(?:-[0-9]+))?/) do |value|
+    if value.include?('.')
+
+
+ +
+

Interpret segments in decimal creating a string

+
+
      value[1..-1].split('.').map {|d| d.to_i.chr}.join("")
+    elsif value.include?('-')
+
+
+ +
+

Interpret as a range

+
+
      [:range, value[1..-1].split('-').map {|d| "#x%x" % d.to_i}.join("-")]
+    else
+
+
+ +
+

Interpret as a single HEX character

+
+
      [:hex, "#x%x" % value[1..-1].to_i]
+    end
+  end
+
+
+ +
+

hex_val ::= "x" HEXDIG+ (("." HEXDIG+)+ | ("-" HEXDIG+))?

+
+
  terminal(:hex_val, /x[0-9A-F]+(?:(?:(?:\.[0-9A-F]+)+)|(?:-[0-9A-F]+))?/) do |value|
+    if value.include?('.')
+
+
+ +
+

Interpret segments in hexadecimal creating a string

+
+
      value[1..-1].split('.').map {|h| h.to_i(base=16).chr}.join("")
+    elsif value.include?('-')
+
+
+ +
+

Interpret as a range

+
+
      [:range, value[1..-1].split('-').map {|h| "#x%x" % h.to_i(base=16)}.join("-")]
+    else
+
+
+ +
+

Interpret as a single HEX character

+
+
      [:hex, "#x#{value[1..-1]}"]
+    end
+  end
+
+
+ +
+

c_wsp ::= WSP | (c_nl WSP)

+
+
  terminal(:c_wsp, C_WSP)
+
+
+ +
+

c_nl ::= comment | CRLF

+
+
  terminal(:c_nl, C_NL)
+
+
+ +
+

DIGIT ::= [#x30-#x39]

+
+
  terminal(:DIGIT, /\d/)
+
+
+ +
+

Non-terminal productions

+
+
+
+
+ +
+

The start_production on :rule allows the parser to present the value as a single Hash, rather than an array of individual hashes.

+
+
  start_production(:rule, as_hash: true)
+
+
+ +
+

rule ::= rulename defined_as elements c_nl

+
+
  production(:rule) do |value|
+
+
+ +
+

value contains an expression. +Invoke callback

+
+
    sym = value[:rulename]
+    elements = value[:elements]
+
+    if value[:defined_as] == "=/"
+
+
+ +
+

append to rule alternate

+
+
      rule = parsed_rules.fetch(sym) {raise "No existing rule found for #{sym}"}
+      rule.expr = [:alt, rule.expr] unless rule.alt?
+      if elements.first == :alt
+
+
+ +
+

append alternatives to rule

+
+
        rule.expr.concat(elements[1..-1])
+      else
+
+
+ +
+

add elements as last alternative

+
+
        rule.expr.push(elements)
+      end
+    else
+
+
+ +
+

There shouldn't be an existing rule

+
+
      raise "Redefining rule #{sym}" if parsed_rules.has_key?(sym)
+      parsed_rules[sym] = EBNF::Rule.new(sym.to_sym, nil, elements)
+    end
+    sym
+  end
+
+
+ +
+

elements ::= alternation c_wsp*

+
+
  production(:elements) do |value|
+    value.first[:alternation]
+  end
+
+
+ +
+

alternation ::= concatenation (c_wsp* "/" c_wsp* concatenation)*

+
+
  production(:alternation) do |value|
+    unless value.last[:_alternation_1].empty?
+      [:alt, value.first[:concatenation]] + value.last[:_alternation_1]
+    else
+      value.first[:concatenation]
+    end
+  end
+
+
+ +
+

The _aleteration_2 rule comes from the expanded PEG grammar and serves as an opportunity to custommize the values presented to the aleteration rule.

+
+
  production(:_alternation_2) do |value|
+    if Array(value.last[:concatenation]).first == :alt
+      value.last[:concatenation][1..-1]
+    else
+      [value.last[:concatenation]]
+    end
+    value.last[:concatenation]
+  end
+
+
+ +
+

concatenation::= repetition (c_wsp+ repetition)*

+
+
  production(:concatenation) do |value|
+    unless value.last[:_concatenation_1].empty?
+      [:seq, value.first[:repetition]] + value.last[:_concatenation_1]
+    else
+      value.first[:repetition]
+    end
+  end
+  start_production(:_concatenation_2, as_hash: true)
+  production(:_concatenation_2) do |value|
+    value[:repetition]
+  end
+
+
+ +
+

repetition ::= repeat? element

+
+
  production(:repetition) do |value|
+    rept = value.first[:_repetition_1]
+    elt = value.last[:element]
+    case rept
+    when [0, '*'] then [:star, elt]
+    when [1, '*'] then [:plus, elt]
+    when nil      then elt
+    else
+      [:rept, rept.first, rept.last, elt]
+    end
+  end
+
+
+ +
+

repeat ::= DIGIT+ | (DIGIT* "*" DIGIT*)

+
+
  production(:repeat) do |value|
+    if value.is_a?(Integer)
+      [value, value]
+    else
+      [value.first, value.last]
+    end
+  end
+  start_production(:_repeat_1, as_hash: true)
+  production(:_repeat_1) {|value| value.values}
+  production(:_repeat_2) {|value| value.join("").to_i}
+  production(:_repeat_3) {|value| value.join("").to_i}
+  production(:_repeat_4) {|value| value.length > 0 ? value.join("").to_i : '*'}
+
+
+ +
+

element ::= rulename | group | option | char_val | num_val | prose_val

+
+
  production(:element) do |value|
+    value
+  end
+
+
+ +
+

group ::= "(" c_wsp* alternation c_wsp* ")"

+
+
  start_production(:group, as_hash: true)
+  production(:group) do |value|
+    value[:alternation]
+  end
+
+
+ +
+

option ::= "[" c_wsp* alternation c_wsp* "]"

+
+
  start_production(:option, as_hash: true)
+  production(:option) do |value|
+    [:opt, value[:alternation]]
+  end
+
+
+ +
+

char_val ::= case_insensitive_string | case_sensitive_string

+
+
  production(:char_val) do |value|
+
+
+ +
+

FIXME: need rule logic for insensitive matching of strings

+
+
    value.last[:quoted_string]
+  end
+
+
+ +
+

num_val ::= "%" (bin_val | dec_val | hex_val)

+
+
  production(:num_val) do |value|
+    value.last[:_num_val_1]
+  end
+
+
+ +
+

Parser invocation.

+ +

On start, yield ourselves if a block is given, otherwise, return this parser instance

+ +

@param [#read, #to_s] input +@param [Hash{Symbol => Object}] options +@option options [Boolean] :level + Trace level. 0(debug), 1(info), 2(warn), 3(error). +@return [EBNFParser]

+
+
  def initialize(input, **options, &block)
+
+
+ +
+

If the level option is set, instantiate a logger for collecting trace information.

+
+
    if options.has_key?(:level)
+      options[:logger] = Logger.new(STDERR)
+      options[:logger].level = options[:level]
+      options[:logger].formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"}
+    end
+
+
+ +
+

Read input, if necessary, which will be used in a Scanner.

+
+
    @input = input.respond_to?(:read) ? input.read : input.to_s
+
+    @parsed_rules = {}
+
+
+ +
+

Parses into @parsed_rules

+
+
    parse(@input,
+          :rulelist,        # Starting rule
+          ABNFMeta::RULES,  # PEG rules
+          whitespace: '',   # No implicit whitespace
+          **options)
+  end
+
+
+ +
+

The AST includes the parsed rules along with built-in rules for ABNF used within the parsed grammar.

+ +

@return [ArrayEBNF::Rule]

+
+
  def ast
+
+
+ +
+

Add built-in rules for standard ABNF rules not

+
+
    parsed_rules.values.map(&:symbols).flatten.uniq.each do |sym|
+      rule = ABNFCore::RULES.detect {|r| r.sym == sym}
+      parsed_rules[sym] ||= rule
+    end
+
+    parsed_rules.values
+  end
+
+
+ +
+

Output formatted S-Expression of grammar

+
+
  def to_sxp
+    require 'sxp' unless defined?(SXP)
+
+
+ +
+

Output rules as a formatted S-Expression

+ +
+
    SXP::Generator.string(ast.map(&:for_sxp))
+  end
+end
+
+
+ diff --git a/examples/abnf/examples/1star.abnf b/examples/abnf/examples/1star.abnf new file mode 100644 index 0000000..06d5780 --- /dev/null +++ b/examples/abnf/examples/1star.abnf @@ -0,0 +1 @@ +suffix = 1*("I" / "V" / "X") diff --git a/examples/abnf/examples/postal-address.abnf b/examples/abnf/examples/postal-address.abnf index 4b1e969..dd99bf0 100644 --- a/examples/abnf/examples/postal-address.abnf +++ b/examples/abnf/examples/postal-address.abnf @@ -17,4 +17,4 @@ street-name = 1*VCHAR zip-part = town-name "," SP state 1*2SP zip-code CRLF town-name = 1*(ALPHA / SP) state = 2ALPHA -zip-code = 5DIGIT ["-" 4DIGIT] \ No newline at end of file +zip-code = 5DIGIT ["-" 4DIGIT] diff --git a/examples/abnf/meta.rb b/examples/abnf/meta.rb new file mode 100644 index 0000000..b5ce638 --- /dev/null +++ b/examples/abnf/meta.rb @@ -0,0 +1,111 @@ +# This file is automatically generated by ebnf version 2.0.0 +# Derived from abnf.ebnf +module ABNFMeta + RULES = [ + EBNF::Rule.new(:rulelist, nil, [:plus, :_rulelist_1]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_rulelist_1, nil, [:alt, :rule, :_rulelist_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_rulelist_2, nil, [:seq, :_rulelist_3, :c_nl]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_rulelist_3, nil, [:star, :c_wsp]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:rule, nil, [:seq, :rulename, :defined_as, :elements, :c_nl]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:elements, nil, [:seq, :alternation, :_elements_1]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_elements_1, nil, [:star, :c_wsp]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:alternation, nil, [:seq, :concatenation, :_alternation_1]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_alternation_1, nil, [:star, :_alternation_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_alternation_2, nil, [:seq, :_alternation_3, "/", :_alternation_4, :concatenation]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_alternation_3, nil, [:star, :c_wsp]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_alternation_4, nil, [:star, :c_wsp]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:concatenation, nil, [:seq, :repetition, :_concatenation_1]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_concatenation_1, nil, [:star, :_concatenation_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_concatenation_2, nil, [:seq, :_concatenation_3, :repetition]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_concatenation_3, nil, [:plus, :c_wsp]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:repetition, nil, [:seq, :_repetition_1, :element]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_repetition_1, nil, [:opt, :repeat]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:repeat, nil, [:alt, :_repeat_1, :_repeat_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_repeat_1, nil, [:seq, :_repeat_3, "*", :_repeat_4]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_repeat_3, nil, [:star, :DIGIT]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_repeat_4, nil, [:star, :DIGIT]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_repeat_2, nil, [:plus, :DIGIT]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:element, nil, [:alt, :rulename, :group, :option, :char_val, :num_val, :prose_val]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:group, nil, [:seq, "(", :_group_1, :alternation, :_group_2, ")"]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_group_1, nil, [:star, :c_wsp]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_group_2, nil, [:star, :c_wsp]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:option, nil, [:seq, "[", :_option_1, :alternation, :_option_2, "]"]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_option_1, nil, [:star, :c_wsp]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_option_2, nil, [:star, :c_wsp]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:char_val, nil, [:alt, :case_insensitive_string, :case_sensitive_string]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:case_insensitive_string, nil, [:seq, :_case_insensitive_string_1, :quoted_string]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_case_insensitive_string_1, nil, [:opt, "%i"]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:case_sensitive_string, nil, [:seq, "%s", :quoted_string]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:num_val, nil, [:seq, "%", :_num_val_1]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_num_val_1, nil, [:alt, :bin_val, :dec_val, :hex_val]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:rulename, nil, [:seq, :ALPHA, :_rulename_1], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_rulename_1, nil, [:star, :_rulename_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_rulename_2, nil, [:alt, :ALPHA, :DIGIT, "-"]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:defined_as, nil, [:seq, :_defined_as_1, :_defined_as_2, :_defined_as_3], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_defined_as_1, nil, [:star, :c_wsp]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_defined_as_2, nil, [:alt, "=", "=/"]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_defined_as_3, nil, [:star, :c_wsp]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:c_wsp, nil, [:alt, :WSP, :_c_wsp_1], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_c_wsp_1, nil, [:seq, :c_nl, :WSP]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:c_nl, nil, [:alt, :COMMENT, :CRLF], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:comment, nil, [:seq, ";", :_comment_1, :CRLF], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_comment_1, nil, [:star, :_comment_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_comment_2, nil, [:alt, :WSP, :VCHAR]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:quoted_string, nil, [:seq, :DQUOTE, :_quoted_string_1, :DQUOTE], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_quoted_string_1, nil, [:star, :_quoted_string_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_quoted_string_2, nil, [:range, "#x20-#x21#x23-#x7E"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:bin_val, nil, [:seq, "b", :_bin_val_1, :_bin_val_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_bin_val_1, nil, [:plus, :BIT]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_bin_val_2, nil, [:opt, :_bin_val_3]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_bin_val_3, nil, [:alt, :_bin_val_4, :_bin_val_5]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_bin_val_4, nil, [:plus, :_bin_val_6]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_bin_val_6, nil, [:seq, ".", :_bin_val_7]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_bin_val_7, nil, [:plus, :BIT]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_bin_val_5, nil, [:seq, "-", :_bin_val_8]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_bin_val_8, nil, [:plus, :BIT]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:dec_val, nil, [:seq, "d", :_dec_val_1, :_dec_val_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_dec_val_1, nil, [:plus, :DIGIT]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_dec_val_2, nil, [:opt, :_dec_val_3]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_dec_val_3, nil, [:alt, :_dec_val_4, :_dec_val_5]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_dec_val_4, nil, [:plus, :_dec_val_6]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_dec_val_6, nil, [:seq, ".", :_dec_val_7]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_dec_val_7, nil, [:plus, :DIGIT]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_dec_val_5, nil, [:seq, "-", :_dec_val_8]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_dec_val_8, nil, [:plus, :DIGIT]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:hex_val, nil, [:seq, "x", :_hex_val_1, :_hex_val_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_hex_val_1, nil, [:plus, :HEXDIG]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_hex_val_2, nil, [:opt, :_hex_val_3]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_hex_val_3, nil, [:alt, :_hex_val_4, :_hex_val_5]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_hex_val_4, nil, [:plus, :_hex_val_6]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_hex_val_6, nil, [:seq, ".", :_hex_val_7]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_hex_val_7, nil, [:plus, :HEXDIG]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_hex_val_5, nil, [:seq, "-", :_hex_val_8]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_hex_val_8, nil, [:plus, :HEXDIG]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:prose_val, nil, [:seq, "<", :_prose_val_1, ">"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_prose_val_1, nil, [:star, :_prose_val_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_prose_val_2, nil, [:range, "#x20-#x3D#x3F-#x7E"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:ALPHA, nil, [:range, "#x41-#x5A#x61-#x7A"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:BIT, nil, [:alt, "0", "1"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:CHAR, nil, [:range, "#x01-#x7F"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:CR, nil, [:hex, "#x0D"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:CRLF, nil, [:seq, :_CRLF_1, :LF], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_CRLF_1, nil, [:opt, :CR], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:CTL, nil, [:alt, :_CTL_1, :_CTL_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_CTL_1, nil, [:range, "#x00-#x1F"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_CTL_2, nil, [:hex, "#x7F"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:DIGIT, nil, [:range, "#x30-#x39"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:DQUOTE, nil, [:hex, "#x22"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:HEXDIG, nil, [:alt, :DIGIT, :_HEXDIG_1], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_HEXDIG_1, nil, [:range, "A-F"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:HTAB, nil, [:hex, "#x09"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:LF, nil, [:hex, "#x0A"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:LWSP, nil, [:star, :_LWSP_1], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_LWSP_1, nil, [:alt, :WSP, :_LWSP_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_LWSP_2, nil, [:seq, :CRLF, :WSP], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:OCTET, nil, [:range, "#x00-#xFF"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:SP, nil, [:hex, "#x20"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:VCHAR, nil, [:range, "#x21-#x7E"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:WSP, nil, [:alt, :SP, :HTAB], kind: :terminal).extend(EBNF::PEG::Rule), + ] +end + diff --git a/examples/abnf/parse b/examples/abnf/parse new file mode 100755 index 0000000..04905c0 --- /dev/null +++ b/examples/abnf/parse @@ -0,0 +1,53 @@ +#!/usr/bin/env ruby +# parse --- Process EBNF to generate AST S-Expression + +$:.unshift(File.expand_path("../../../lib", __FILE__)) +$:.unshift(File.expand_path("..", __FILE__)) +require 'rubygems' +require 'getoptlong' +require 'parser' +require 'sxp' + +out = STDOUT + +OPT_ARGS = [ + ["--evaluate","-e", GetoptLong::REQUIRED_ARGUMENT, "Evaluate argument"], + ["--trace", GetoptLong::OPTIONAL_ARGUMENT, "Trace output level (0-3)"], + ["--help", "-?", GetoptLong::NO_ARGUMENT, "This message"] +] +def usage + require 'ebnf' + STDERR.puts %{#{$0} Version #{EBNF::VERSION}} + STDERR.puts %{Usage: #{$0} [options] file ...} + width = OPT_ARGS.map do |o| + l = o.first.length + l += o[1].length + 2 if o[1].is_a?(String) + l + end.max + OPT_ARGS.each do |o| + s = " %-*s " % [width, (o[1].is_a?(String) ? "#{o[0,2].join(', ')}" : o[0])] + s += o.last + STDERR.puts s + end + exit(1) +end + +options = {} +input = nil + +opts = GetoptLong.new(*OPT_ARGS.map {|o| o[0..-2]}) + +opts.each do |opt, arg| + case opt + when '--evaluate' then input = arg + when '--trace' then options[:level] = arg.to_i + when '--help' then usage + end +end + +input = File.open(ARGV[0]) if ARGV[0] + +# Collect rules +ebnf = ABNFParser.new(input || STDIN, **options) + +puts ebnf.to_sxp diff --git a/examples/abnf/parser.rb b/examples/abnf/parser.rb new file mode 100644 index 0000000..d1a83ea --- /dev/null +++ b/examples/abnf/parser.rb @@ -0,0 +1,269 @@ +# # EBNF Parser for EBNF. +# +# Produces an Abstract Synatx Tree in S-Expression form for the input grammar file +require 'ebnf' +require 'ebnf/terminals' +require 'ebnf/peg/parser' +require 'core' # "Core" rules used in the ABNF spec. +require 'meta' # "ABNF" rules used for parsing ABNF, itself +require 'scanf' +require 'logger' + +class ABNFParser + include EBNF::PEG::Parser + + # Regular expressions for both "Core" and ABNF-specific terminals. + ALPHA = %r{[\x41-\x5A\x61-\x7A]} + VCHAR = %r{[\x20-\x7E]} + WSP = %r{[\x20\x09]} + CRLF = %r{\x0D?\x0A} + COMMENT = %r{;(?:#{WSP}|#{VCHAR})*#{CRLF}} + C_NL = %r{#{COMMENT}|#{CRLF}} + C_WSP = %r{#{WSP}|(?:#{C_NL}#{WSP})} + + ## + # Hash of generated {EBNF::Rule} objects by symbol + # + # @return [Hash{Symbol => EBNF::Rule}] + attr_reader :parsed_rules + + ## + # The following ABNF grammar rules are treated as terminals. + + # `rulename ::= ALPHA (ALPHA | DIGIT | "-")*` + terminal(:rulename, /#{ALPHA}(?:#{ALPHA}|[0-9-])*/) do |value| + value.to_sym + end + + # `defined_as ::= c_wsp* ("=" | "=/") c_wsp*` + terminal(:defined_as, /#{C_WSP}*=\/?#{C_WSP}*/) {|value| value.strip} + + # `quoted_string::= DQUOTE [#x20-#x21#x23-#x7E]* DQUOTE` + terminal(:quoted_string, /"[\x20-\x21\x23-\x7E]*"/) do |value| + value[1..-2] + end + + # `bin_val ::= "b" BIT+ (("." BIT+)+ | ("-" BIT+))?` + terminal(:bin_val, /b[01]+(?:(?:(?:\.[01]+)+)|(?:-[01]+))?/) do |value| + if value.include?('.') + # Interpret segments in binary creating a string + value[1..-1].split('.').map {|b| b.to_i(base=2).chr}.join("") + elsif value.include?('-') + # Interpret as a range + [:range, value[1..-1].split('-').map {|b| "#x%x" % b.to_i(base=2)}.join("-")] + else + # Interpret as a single HEX character + [:hex, "#x%x" % value[1..-1].to_i(base=2)] + end + end + + # `dec_val ::= "d" DIGIT+ (("." DIGIT+)+ | ("-" DIGIT+))?` + terminal(:dec_val, /d[0-9]+(?:(?:(?:\.[0-9]+)+)|(?:-[0-9]+))?/) do |value| + if value.include?('.') + # Interpret segments in decimal creating a string + value[1..-1].split('.').map {|d| d.to_i.chr}.join("") + elsif value.include?('-') + # Interpret as a range + [:range, value[1..-1].split('-').map {|d| "#x%x" % d.to_i}.join("-")] + else + # Interpret as a single HEX character + [:hex, "#x%x" % value[1..-1].to_i] + end + end + + # `hex_val ::= "x" HEXDIG+ (("." HEXDIG+)+ | ("-" HEXDIG+))?` + terminal(:hex_val, /x[0-9A-F]+(?:(?:(?:\.[0-9A-F]+)+)|(?:-[0-9A-F]+))?/) do |value| + if value.include?('.') + # Interpret segments in hexadecimal creating a string + value[1..-1].split('.').map {|h| h.to_i(base=16).chr}.join("") + elsif value.include?('-') + # Interpret as a range + [:range, value[1..-1].split('-').map {|h| "#x%x" % h.to_i(base=16)}.join("-")] + else + # Interpret as a single HEX character + [:hex, "#x#{value[1..-1]}"] + end + end + + # `c_wsp ::= WSP | (c_nl WSP)` + terminal(:c_wsp, C_WSP) + + # `c_nl ::= comment | CRLF` + terminal(:c_nl, C_NL) + + # `DIGIT ::= [#x30-#x39]` + terminal(:DIGIT, /\d/) + + # ## Non-terminal productions + + # The `start_production` on `:rule` allows the parser to present the value as a single Hash, rather than an array of individual hashes. + start_production(:rule, as_hash: true) + + # `rule ::= rulename defined_as elements c_nl` + production(:rule) do |value| + # value contains an expression. + # Invoke callback + sym = value[:rulename] + elements = value[:elements] + + if value[:defined_as] == "=/" + # append to rule alternate + rule = parsed_rules.fetch(sym) {raise "No existing rule found for #{sym}"} + rule.expr = [:alt, rule.expr] unless rule.alt? + if elements.first == :alt + # append alternatives to rule + rule.expr.concat(elements[1..-1]) + else + # add elements as last alternative + rule.expr.push(elements) + end + else + # There shouldn't be an existing rule + raise "Redefining rule #{sym}" if parsed_rules.has_key?(sym) + parsed_rules[sym] = EBNF::Rule.new(sym.to_sym, nil, elements) + end + sym + end + + # `elements ::= alternation c_wsp*` + production(:elements) do |value| + value.first[:alternation] + end + + # `alternation ::= concatenation (c_wsp* "/" c_wsp* concatenation)*` + production(:alternation) do |value| + unless value.last[:_alternation_1].empty? + [:alt, value.first[:concatenation]] + value.last[:_alternation_1] + else + value.first[:concatenation] + end + end + + # The `_aleteration_2` rule comes from the expanded PEG grammar and serves as an opportunity to custommize the values presented to the `aleteration` rule. + production(:_alternation_2) do |value| + if Array(value.last[:concatenation]).first == :alt + value.last[:concatenation][1..-1] + else + [value.last[:concatenation]] + end + value.last[:concatenation] + end + + # `concatenation::= repetition (c_wsp+ repetition)*` + production(:concatenation) do |value| + unless value.last[:_concatenation_1].empty? + [:seq, value.first[:repetition]] + value.last[:_concatenation_1] + else + value.first[:repetition] + end + end + start_production(:_concatenation_2, as_hash: true) + production(:_concatenation_2) do |value| + value[:repetition] + end + + # `repetition ::= repeat? element` + production(:repetition) do |value| + rept = value.first[:_repetition_1] + elt = value.last[:element] + case rept + when [0, '*'] then [:star, elt] + when [1, '*'] then [:plus, elt] + when nil then elt + else + [:rept, rept.first, rept.last, elt] + end + end + + # `repeat ::= DIGIT+ | (DIGIT* "*" DIGIT*)` + production(:repeat) do |value| + if value.is_a?(Integer) + [value, value] + else + [value.first, value.last] + end + end + start_production(:_repeat_1, as_hash: true) + production(:_repeat_1) {|value| value.values} + production(:_repeat_2) {|value| value.join("").to_i} + production(:_repeat_3) {|value| value.join("").to_i} + production(:_repeat_4) {|value| value.length > 0 ? value.join("").to_i : '*'} + + # `element ::= rulename | group | option | char_val | num_val | prose_val` + production(:element) do |value| + value + end + + # `group ::= "(" c_wsp* alternation c_wsp* ")"` + start_production(:group, as_hash: true) + production(:group) do |value| + value[:alternation] + end + + # `option ::= "[" c_wsp* alternation c_wsp* "]"` + start_production(:option, as_hash: true) + production(:option) do |value| + [:opt, value[:alternation]] + end + + # `char_val ::= case_insensitive_string | case_sensitive_string` + production(:char_val) do |value| + # FIXME: need rule logic for insensitive matching of strings + value.last[:quoted_string] + end + + # `num_val ::= "%" (bin_val | dec_val | hex_val)` + production(:num_val) do |value| + value.last[:_num_val_1] + end + + # ## Parser invocation. + # On start, yield ourselves if a block is given, otherwise, return this parser instance + # + # @param [#read, #to_s] input + # @param [Hash{Symbol => Object}] options + # @option options [Boolean] :level + # Trace level. 0(debug), 1(info), 2(warn), 3(error). + # @return [EBNFParser] + def initialize(input, **options, &block) + # If the `level` option is set, instantiate a logger for collecting trace information. + if options.has_key?(:level) + options[:logger] = Logger.new(STDERR) + options[:logger].level = options[:level] + options[:logger].formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"} + end + + # Read input, if necessary, which will be used in a Scanner. + @input = input.respond_to?(:read) ? input.read : input.to_s + + @parsed_rules = {} + + # Parses into `@parsed_rules` + parse(@input, + :rulelist, # Starting rule + ABNFMeta::RULES, # PEG rules + whitespace: '', # No implicit whitespace + **options) + end + + ## + # The AST includes the parsed rules along with built-in rules for ABNF used within the parsed grammar. + # + # @return [Array] + def ast + # Add built-in rules for standard ABNF rules not + parsed_rules.values.map(&:symbols).flatten.uniq.each do |sym| + rule = ABNFCore::RULES.detect {|r| r.sym == sym} + parsed_rules[sym] ||= rule + end + + parsed_rules.values + end + + # Output formatted S-Expression of grammar + def to_sxp + require 'sxp' unless defined?(SXP) + # Output rules as a formatted S-Expression + SXP::Generator.string(ast.map(&:for_sxp)) + end +end diff --git a/examples/ebnf-ll1-parser/README.md b/examples/ebnf-ll1-parser/README.md index c9f76e2..e30dcd7 100644 --- a/examples/ebnf-ll1-parser/README.md +++ b/examples/ebnf-ll1-parser/README.md @@ -1,6 +1,6 @@ # EBNF Parser example -This example implements an [EBNF][] parser equivalent to the built-in parser. The proximate result is an Abstract S-Expression which can be used to generate parser tables input grammars. Effectively, this is a re-implementation of {EBNF::Parser} itself. +This example implements an [EBNF][] parser equivalent to the built-in parser. The proximate result is an Abstract [S-Expression][] which can be used to generate parser tables input grammars. Effectively, this is a re-implementation of {EBNF::Parser} itself. ## Parsing the Grammar @@ -8,11 +8,11 @@ This example implements an [EBNF][] parser equivalent to the built-in parser. Th ebnf = EBNFLL1Parser.new(File.open("../../etc/ebnf.ebnf")) -Output rules and terminals as S-Expressions, Turtle or EBNF +Output rules and terminals as [S-Expressions][S-Expression], [Turtle][] or [EBNF][] puts ebnf.to_sxp -This generates a S-Expression form of the grammar suitable for use by {EBNF} for generating a BNF representation (avoiding `star`, `plus`, and `opt` expressions), LL(1) first/follow comprehensions and branch tables used for parsing input files based on the grammar. +This generates a [S-Expression][] form of the grammar suitable for use by {EBNF} for generating a [BNF][] representation (avoiding `star`, `plus`, and `opt` expressions), [LL(1)][] [First/Follow][] comprehensions and branch tables used for parsing input files based on the grammar. ( (pass _pass (seq PASS)) @@ -53,12 +53,12 @@ This generates a S-Expression form of the grammar suitable for use by {EBNF} for (seq "/*" (star (alt (opt (seq "*" (range "^/"))) (range "^*"))) "*/") (seq "(*" (star (alt (opt (seq "*" (range "^)"))) (range "^*"))) "*)")) )) ) -This can then be used as input to {EBNF.parse} to transform EBNF to BNF, create LL(1) first/follow rules and/or generate parser tables for parsing examples of the grammar using {EBNF::LL1::Parser}. +This can then be used as input to {EBNF.parse} to transform [EBNF][] to [BNF][], create [LL(1)][] [First/Follow][] rules and/or generate parser tables for parsing examples of the grammar using {EBNF::LL1::Parser}. ebnf --input-format sxp --bnf ebnf.sxp ebnf --input-format sxp --ll1 ebnf --format rb ebnf.sxp -An example S-Expression for rule `ebnf`, which uses both `start` and `alt` operators is transformed to use just BNF `alt` and `seq` operators, and include `first` and `follow` sets is shown here: +An example [S-Expression][] for rule `ebnf`, which uses both `start` and `alt` operators is transformed to use just BNF `alt` and `seq` operators, and include `first` and `follow` sets is shown here: (rule ebnf "1" (start #t) @@ -76,9 +76,9 @@ Note that sub-productions `_ebnf_1` through `_ebnf_3` are created, could be usef ## Example Walkthrough -This example uses the EBNF grammar from {file:/etc/ebnf.ebnf} to generate {file:meta}, which include the resulting `BRANCH`, `FIRST`, `FOLLOW`, `TERMINALS` and `PASS` tables, used by {file:parser} to implement a parser for the grammar. +This example uses the [EBNF][] grammar from {file:/etc/ebnf.ebnf} to generate {file:meta}, which include the resulting `BRANCH`, `FIRST`, `FOLLOW`, `TERMINALS` and `PASS` tables, used by {file:parser} to implement a parser for the grammar. -The first step is defining regular expressions for terminals used within the grammar. The table generation process in {EBNF::LL1#build_tables} is not yet capable of automatically generating regular expressions for terminal productions, so they must be defined by hand. For the EBNF grammar, this is done in {EBNF::Terminals}. +The first step is defining regular expressions for terminals used within the grammar. The table generation process in {EBNF::LL1#build_tables} is not yet capable of automatically generating regular expressions for terminal productions, so they must be defined by hand. For the [EBNF][] grammar, this is done in {EBNF::Terminals}. The {file:parser} is implemented using the {EBNFLL1Parser} class, which includes {EBNF::LL1::Parser} and {EBNFParserMeta}. @@ -119,11 +119,11 @@ This is associated with the '|' part of the `alt` production. [5] alt ::= seq ('|' seq)* -When this is invoked, we have already processed one `seq`, which is placed on the `prod_data` stack, as `input[:seq]`. The result is to remove the `seq` data and append it to the `alt` data in `input[:alt]`. The final result of `alt`, will then be the hash containing :alt and an array of data matching the `seq` sub-productions. Looking at the EBNF grammar itself, we can see that the first declaration is +When this is invoked, we have already processed one `seq`, which is placed on the `prod_data` stack, as `input[:seq]`. The result is to remove the `seq` data and append it to the `alt` data in `input[:alt]`. The final result of `alt`, will then be the hash containing :alt and an array of data matching the `seq` sub-productions. Looking at the [EBNF][] grammar itself, we can see that the first declaration is [1] ebnf ::= (declaration | rule)* -This is reduced to the LL(1) S-Expression noted above: +This is reduced to the LL(1) [S-Expression][] noted above: (rule ebnf "1" (start #t) @@ -137,16 +137,12 @@ This is reduced to the LL(1) S-Expression noted above: (rule _ebnf_2 "1.2" (first "@pass" "@terminals" LHS) (follow _eof) (seq _ebnf_1 ebnf)) (rule _ebnf_3 "1.3" (first "@pass" "@terminals" LHS _eps) (follow _eof) (seq ebnf)) -The `ebnf` production uses the `alt` operator. When matching the production itself we can see that it is either a `declaration` or a `rule`. In this case of this parser, the result of parsing EBNF is an Abstract Syntax Tree, but in other cases it may create something else. In the case of the [Turtle gem][], the parser generates _RDF Triples_. Because the parser uses a streaming lexer, a file of any length can be passed to the parser, which emits triples as sufficient processing completes. +The `ebnf` production uses the `alt` operator. When matching the production itself we can see that it is either a `declaration` or a `rule`. In this case of this parser, the result of parsing [EBNF][] is an Abstract Syntax Tree, but in other cases it may create something else. In the case of the [Turtle gem][], the parser generates _RDF Triples_. Because the parser uses a streaming lexer, a file of any length can be passed to the parser, which emits triples as sufficient processing completes. -[Ruby]: https://ruby-lang.org/ -[YARD]: https://yardoc.org/ -[YARD-GS]: https://rubydoc.info/docs/yard/file/docs/GettingStarted.md -[PDD]: https://lists.w3.org/Archives/Public/public-rdf-ruby/2010May/0013.html +[BNF]: https://en.wikipedia.org/wiki/Backus–Naur_form [EBNF]: https://www.w3.org/TR/REC-xml/#sec-notation -[EBNF doc]: https://rubydoc.info/github/dryruby/ebnf/ [First/Follow]: https://en.wikipedia.org/wiki/LL_parser#Constructing_an_LL.281.29_parsing_table [LL(1)]: https://www.csd.uwo.ca/~moreno//CS447/Lectures/Syntax.html/node14.html -[LL(1) Parser]: https://en.wikipedia.org/wiki/LL_parser -[Tokenizer]: https://en.wikipedia.org/wiki/Lexical_analysis#Tokenizer +[S-expression]: https://en.wikipedia.org/wiki/S-expression +[Turtle]: https://www.w3.org/TR/2012/WD-turtle-20120710/ [Turtle gem]: https://rubygems.org/gems/rdf-turtle diff --git a/examples/ebnf-peg-parser/README.md b/examples/ebnf-peg-parser/README.md index efa116c..a51ccb3 100644 --- a/examples/ebnf-peg-parser/README.md +++ b/examples/ebnf-peg-parser/README.md @@ -1,6 +1,6 @@ # EBNF Parser example -This example implements an [EBNF][] parser equivalent to the built-in parser. The proximate result is an Abstract S-Expression composed of sub-rules which can be directly executed by the parser. Effectively, this is a re-implementation of {EBNF::Parser} itself. +This example implements an [EBNF][] parser equivalent to the built-in parser. The proximate result is an Abstract [S-Expression][] composed of sub-rules which can be directly executed by the parser. Effectively, this is a re-implementation of {EBNF::Parser} itself. ## Parsing the Grammar @@ -8,11 +8,11 @@ This example implements an [EBNF][] parser equivalent to the built-in parser. Th ebnf = EBNFPegParser.new(File.open("../../etc/ebnf.ebnf")) -Output rules and terminals as S-Expressions, Turtle or EBNF +Output rules and terminals as [S-Expressions][S-Expression], [Turtle][] or [EBNF][] puts ebnf.to_sxp -This generates a S-Expression form of the grammar suitable for use by {EBNF}. +This generates a [S-Expression][] form of the grammar suitable for use by {EBNF}. ( (pass _pass (seq PASS)) @@ -53,11 +53,11 @@ This generates a S-Expression form of the grammar suitable for use by {EBNF}. (seq "/*" (star (alt (opt (seq "*" (range "^/"))) (range "^*"))) "*/") (seq "(*" (star (alt (opt (seq "*" (range "^)"))) (range "^*"))) "*)")) )) ) -This can then be used as input to {EBNF.parse} to transform EBNF to PEG for parsing examples of the grammar using {EBNF::PEG::Parser}. +This can then be used as input to {EBNF.parse} to transform [EBNF][] to [PEG][] for parsing examples of the grammar using {EBNF::PEG::Parser}. ebnf --input-format sxp --peg ebnf.sxp -o ebnf.peg.sxp -An example S-Expression for rule `ebnf`, which is decomposed into sub-rules as follows: +An example [S-Expression][] for rule `ebnf`, which is decomposed into sub-rules as follows: (rule ebnf "1" (star _ebnf_1)) (rule _ebnf_1 "1.1" (alt declaration rule)) @@ -73,7 +73,7 @@ The first step is defining regular expressions for terminals used within the gra The {file:parser} is implemented using the {EBNFPegParser} class, which includes {EBNF::PEG::Parser}. ### Parser basics -The parser operates directly using the rules from the abstract syntax tree generated by turning the original EBNF grammar using {EBNF::PEG#make_peg}. Tokens are derived from terminal rules defined in the grammar or contained inline through non-terminal rule definitions. Tokens are either strings, which must be matched exactly, or symbols, which identify a regular expression used to match the terminal and yield a token. The association between terminal symbols and their regular expressions along with processing rules to invoke when they are identified are described in [Terminal definitions](#Terminal_definitions). +The parser operates directly using the rules from the abstract syntax tree generated by turning the original [EBNF][] grammar using {EBNF::PEG#make_peg}. Tokens are derived from terminal rules defined in the grammar or contained inline through non-terminal rule definitions. Tokens are either strings, which must be matched exactly, or symbols, which identify a regular expression used to match the terminal and yield a token. The association between terminal symbols and their regular expressions along with processing rules to invoke when they are identified are described in [Terminal definitions](#Terminal_definitions). The parser starts with the specified rule, `ebnf` in this case, and executes that rule, which is expected to completely parse the input file potentially leaving some whitespace. @@ -146,7 +146,7 @@ Looking at the EBNF grammar itself, we can see that the first declaration is [1] ebnf ::= (declaration | rule)* -This is reduced to the LL(1) S-Expression noted above: +This is reduced to the [PEG][] [S-Expression][] noted above: (rule ebnf "1" (star _ebnf_1)) (rule _ebnf_1 "1.1" (alt declaration rule)) @@ -159,5 +159,8 @@ The `ebnf` production uses the `alt` operator. When matching the production itse [PDD]: https://lists.w3.org/Archives/Public/public-rdf-ruby/2010May/0013.html [EBNF]: https://www.w3.org/TR/REC-xml/#sec-notation [EBNF doc]: https://rubydoc.info/github/dryruby/ebnf/ -[Turtle gem]: https://rubygems.org/gems/rdf-turtle [Packrat]: https://pdos.csail.mit.edu/~baford/packrat/thesis/ +[PEG]: https://en.wikipedia.org/wiki/Parsing_expression_grammar +[S-expression]: https://en.wikipedia.org/wiki/S-expression +[Turtle]: https://www.w3.org/TR/2012/WD-turtle-20120710/ +[Turtle gem]: https://rubygems.org/gems/rdf-turtle diff --git a/examples/isoebnf/README.md b/examples/isoebnf/README.md index c0590ac..957fb6e 100644 --- a/examples/isoebnf/README.md +++ b/examples/isoebnf/README.md @@ -1,6 +1,6 @@ # ISO EBNF Parser example -This example implements an [ISO/IEC 14977][] parser which parses compatible grammars into S-Expressions. This allows the resulting S-Expressions to drive a PEG Parser to parser documents defined using [ISO/IEC 14977][]. +This example implements an [ISO/IEC 14977][] parser which parses compatible grammars into [S-Expressions][S-Expression]. This allows the resulting [S-Expressions][S-Expression] to drive a [PEG][]/[Packrat][] Parser to parser documents defined using [ISO/IEC 14977][]. ## Parsing the Grammar @@ -8,11 +8,11 @@ This example implements an [ISO/IEC 14977][] parser which parses compatible gram ebnf = ISOEBNFPegParser.new(File.open("examples/ebnf.isoebnf")) -Output rules and terminals as S-Expressions: +Output rules and terminals as [S-Expressions][S-Expression]: puts ebnf.to_sxp -This generates a S-Expression form of the grammar suitable for use by {EBNF}. +This generates a [S-Expression][] form of the grammar suitable for use by {EBNF}. ( (rule syntax (star syntax_rule)) @@ -66,26 +66,26 @@ This generates a S-Expression form of the grammar suitable for use by {EBNF}. (rule start_repeat_symbol (alt "{" "(:")) (rule end_repeat_symbol (alt "}" ":)"))) -This can then be used as input to {EBNF.parse} to transform EBNF to PEG for parsing examples of the grammar using {EBNF::PEG::Parser}. +This can then be used as input to {EBNF.parse} to transform [EBNF][] to [PEG][] for parsing examples of the grammar using {EBNF::PEG::Parser}. ebnf --input-format sxp --peg ebnf.sxp -o ebnf.peg.sxp -Note, however, that ISO EBNF doesn't distinguish between terminal rules and non-terminal rules, so all rules are parsed as non-terminal rules with strings the only terminals. Whereas, the {file:iso-ebnf.ebnf W3C EBNF definition of the grammar} does use terminal rules. +Note, however, that [ISO EBNF][ISO/IEC 14977] doesn't distinguish between terminal rules and non-terminal rules, so all rules are parsed as non-terminal rules with strings the only terminals. Whereas, the W3C [EBNF][] {file:iso-ebnf.ebnf definition of the grammar} does use terminal rules. When parsing files with this grammar, rules that are all capitalized _will_ be treated as terminal productions, although this is an proprietary interpretation of the specification. ## Example Walkthrough -This example uses the EBNF grammar from {file:iso-ebnf.ebnf} to generate {file:meta}, which includes the resulting `RULES` table, used by {file:parser} to implement a parser for the grammar. +This example uses the [EBNF][] grammar from {file:iso-ebnf.ebnf} to generate {file:meta}, which includes the resulting `RULES` table, used by {file:parser} to implement a parser for the grammar. The first step is defining regular expressions for terminals used within the grammar. Note that the parser can operate without terminal definitions, but this can greatly improve parser performance. The {file:parser} is implemented using the {ISOEBNFPegParser} class, which includes {EBNF::PEG::Parser}. ### Parser basics -The parser operates directly using the rules from the abstract syntax tree generated by turning the original EBNF grammar using {EBNF::PEG#make_peg}. Tokens are derived from terminal rules defined in the grammar or contained inline through non-terminal rule definitions. Tokens are either strings, which must be matched exactly, or symbols, which identify a regular expression used to match the terminal and yield a token. The association between terminal symbols and their regular expressions along with processing rules to invoke when they are identified are described in [Terminal definitions](#Terminal_definitions). +The parser operates directly using the rules from the abstract syntax tree generated by turning the original [ISO EBNF][ISO/IEC 14977] grammar using {EBNF::PEG#make_peg}. Tokens are derived from terminal rules defined in the grammar or contained inline through non-terminal rule definitions. Tokens are either strings, which must be matched exactly, or symbols, which identify a regular expression used to match the terminal and yield a token. The association between terminal symbols and their regular expressions along with processing rules to invoke when they are identified are described in [Terminal definitions](#Terminal_definitions). -The parser starts with the specified rule, `ebnf` in this case, and executes that rule, which is expected to completely parse the input file potentially leaving some whitespace. +The parser starts with the specified rule, `syntax` in this case, and executes that rule, which is expected to completely parse the input file potentially leaving some whitespace. Non-terminal rules have an expression using one of the following: @@ -130,6 +130,7 @@ Looking at the grammar itself, we can see that the first declaration is [PDD]: https://lists.w3.org/Archives/Public/public-rdf-ruby/2010May/0013.html [EBNF]: https://www.w3.org/TR/REC-xml/#sec-notation [EBNF doc]: https://rubydoc.info/github/dryruby/ebnf/ -[Turtle gem]: https://rubygems.org/gems/rdf-turtle [Packrat]: https://pdos.csail.mit.edu/~baford/packrat/thesis/ -[ISO/IEC 14977]:https://www.iso.org/standard/26153.html \ No newline at end of file +[PEG]: https://en.wikipedia.org/wiki/Parsing_expression_grammar +[ISO/IEC 14977]:https://www.iso.org/standard/26153.html +[S-expression]: https://en.wikipedia.org/wiki/S-expression diff --git a/lib/ebnf/peg/parser.rb b/lib/ebnf/peg/parser.rb index e9fd664..48b7ea7 100644 --- a/lib/ebnf/peg/parser.rb +++ b/lib/ebnf/peg/parser.rb @@ -98,7 +98,7 @@ def terminal(term, regexp = nil, **options, &block) # # @param [Symbol] term # The rule name - # @param [Hash{Symbol => Object}] + # @param [Hash{Symbol => Object}] options # Options which are returned from {Parser#onStart}. # @option options [Boolean] :as_hash (false) # If the production is a `seq`, causes the value to be represented as a single hash, rather than an array of individual hashes for each sub-production. Note that this is not always advisable due to the possibility of repeated productions within the sequence. diff --git a/lib/ebnf/rule.rb b/lib/ebnf/rule.rb index d860e34..76375b1 100644 --- a/lib/ebnf/rule.rb +++ b/lib/ebnf/rule.rb @@ -479,6 +479,23 @@ def terminals(ast, expr = @expr) end.flatten.compact.uniq end + # Return the symbols used in the rule. + # + # @param [Array] expr (@expr) + # The expression to check, defaults to the rule expression. + # Typically, if the expression is recursive, the embedded expression is called recursively. + # @return [Array] + def symbols(expr = @expr) + expr[1..-1].map do |sym| + case sym + when Symbol + sym + when Array + symbols(sym) + end + end.flatten.compact.uniq + end + ## # The following are used for LL(1) transformation. ## From b6fd550ac8f2fa41cb9e091a222f5def7fd2e389 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Mon, 29 Jun 2020 15:31:10 -0700 Subject: [PATCH 12/50] Support `nocase` Rule operator for case-insensitive matching (used in ABNF). --- README.md | 10 ++++- examples/abnf/.byebug_history | 8 ++++ examples/abnf/README.md | 17 +++++-- examples/abnf/parser.rb | 11 +++-- lib/ebnf/peg/rule.rb | 2 +- lib/ebnf/rule.rb | 25 ++++++++--- spec/peg/rule_spec.rb | 18 ++++++++ spec/rule_spec.rb | 84 ++++++++++++++++++++++++++++++++--- 8 files changed, 153 insertions(+), 22 deletions(-) create mode 100644 examples/abnf/.byebug_history diff --git a/README.md b/README.md index e1f81d2..0fab1e5 100644 --- a/README.md +++ b/README.md @@ -158,7 +158,6 @@ Different components of an EBNF rule expression are transformed into their own o "string""string" 'string'"string" A (B | C)(seq (A (alt B C))) - A~ extension(not A) A?(opt A) A B(seq A B) A | B(alt A B) @@ -167,11 +166,18 @@ Different components of an EBNF rule expression are transformed into their own o (seq (not B) A) for non-terminals (PEG parsing only)
A+(plus A) A*(star A) - A{n*m} extension(rept n m A) @pass " "*(pass _pass (star " ")) @terminals +Other rule operators are not directly supported in [EBNF][], but are included to support other notations (e.g., [ABNF][] and [ISO/IEC 14977][]): + + + + + +
%i"StRiNg"(nocase "StRiNg")Case-insensitive string matching
'' - A(not A)Negative look-ahead, used for non-terminal uses of `B - A`.
n*mA(rept n m A)Explicit repetition.
+ Additionally, rules defined with an UPPERCASE symbol are treated as terminals. For an [LL(1)][] parser generator, the {EBNF::BNF.make_bnf} method can be used to transform the EBNF rule into a BNF rule. diff --git a/examples/abnf/.byebug_history b/examples/abnf/.byebug_history new file mode 100644 index 0000000..7b79e57 --- /dev/null +++ b/examples/abnf/.byebug_history @@ -0,0 +1,8 @@ +exit +value +c +value +c +value +c +value diff --git a/examples/abnf/README.md b/examples/abnf/README.md index 1eec76c..0ed3387 100644 --- a/examples/abnf/README.md +++ b/examples/abnf/README.md @@ -83,19 +83,28 @@ The parser starts with the specified rule, `rulelist` in this case, and executes Non-terminal rules have an expression using one of the following: -`seq` -: A sequence of rules or terminals. If any (other than `opt` or `star`) to not parse, the rule is terminated as unmatched. -`opt` -: An optional rule or terminal. It either results in the matching rule or returns `nil`. `alt` : A list of alternative rules, which are attempted in order. It terminates with the first matching rule, or is terminated as unmatched, if no such rule is found. +`opt` +: An optional rule or terminal. It either results in the matching rule or returns `nil`. `plus` : A sequence of one or more of the matching rule. If there is no such rule, it is terminated as unmatched; otherwise, the result is an array containing all matched input. `rept m n` : A sequence of at lest `m` and at most `n` of the matching rule. It will always return an array. +`seq` +: A sequence of rules or terminals. If any (other than `opt` or `star`) to not parse, the rule is terminated as unmatched. `star` : A sequence of zero or more of the matching rule. It will always return an array. +Terminal rules may be expressed using any of the above operators, and additionally the following: + +`hex` +: A single character represented using the hexadecimal notation `#xnn`. +`nocase` +: A string which matches in a case-insensitive manner, so that `(nocase "fOo")` will match either of the strings `"foo"`, `"FOO"` or any other combination. +`range` +: A range of characters, possibly repeated, of the form `(range "a-z")`. May also use hexadecimal notation. + The starting rule is of the form `(rule rulelist (plus (alt rule (seq (star c_wsp) c_nl))))` which will attempt to parse the aliteration repeatedly until the end of input. If a rule matches, it enters a _production_, which may invoke a _start production before matching is attempted, and will call any _production_ either if matched, or unmatched. In the case of this parser, the _start production_ is used to declare the `as_hash` option on sequences, which causes the matched values to be represented using a Hash, rather than an array of hashes for each element of the sequence, which is the default behavior. The _production_ may choose to evaluate the returned abstract syntax tree to simplify the result, or create some semantic representation of that value. diff --git a/examples/abnf/parser.rb b/examples/abnf/parser.rb index d1a83ea..a88f278 100644 --- a/examples/abnf/parser.rb +++ b/examples/abnf/parser.rb @@ -206,9 +206,14 @@ class ABNFParser [:opt, value[:alternation]] end - # `char_val ::= case_insensitive_string | case_sensitive_string` - production(:char_val) do |value| - # FIXME: need rule logic for insensitive matching of strings + # `case_insensitive_string ::= "%i"? quoted_string` + production(:case_insensitive_string) do |value| + require 'byebug'; byebug if value.first.has_key?(:case_sensitive_string) + [:nocase, value.last[:quoted_string]] + end + + # `case_sensitive_string ::= "%s" quoted_string` + production(:case_sensitive_string) do |value| value.last[:quoted_string] end diff --git a/lib/ebnf/peg/rule.rb b/lib/ebnf/peg/rule.rb index dcda348..90ba639 100644 --- a/lib/ebnf/peg/rule.rb +++ b/lib/ebnf/peg/rule.rb @@ -135,7 +135,7 @@ def parse(input) # Update furthest failure for strings and terminals parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal? plus.is_a?(Array) && terminal? ? plus.join("") : plus - when :range + when :range, :nocase # Matches the specified character range input.scan(to_regexp) || begin # Update furthest failure for strings and terminals diff --git a/lib/ebnf/rule.rb b/lib/ebnf/rule.rb index 76375b1..15b09f3 100644 --- a/lib/ebnf/rule.rb +++ b/lib/ebnf/rule.rb @@ -9,7 +9,7 @@ class Rule }.map(&:to_sym).freeze TERM_OPS = %w{ - hex range + hex nocase range }.map(&:to_sym).freeze # Symbol of rule @@ -63,6 +63,17 @@ class Rule # `nil` is allowed only for @pass # @param [Integer, nil] id # @param [Array] expr + # The expression is an internal-representation of an S-Expression with one of the following oparators: + # + # * `alt` – A list of alternative rules, which are attempted in order. It terminates with the first matching rule, or is terminated as unmatched, if no such rule is found. + # * `hex` – A single character represented using the hexadecimal notation `#xnn`. + # * `nocase` – A string which matches in a case-insensitive manner, so that `(nocase "fOo")` will match either of the strings `"foo"`, `"FOO"` or any other combination. + # * `opt` – An optional rule or terminal. It either results in the matching rule or returns `nil`. + # * `plus` – A sequence of one or more of the matching rule. If there is no such rule, it is terminated as unmatched; otherwise, the result is an array containing all matched input. + # * `range` – A range of characters, possibly repeated, of the form `(range "a-z")`. May also use hexadecimal notation. + # * `rept m n` – A sequence of at lest `m` and at most `n` of the matching rule. It will always return an array. + # * `seq` – A sequence of rules or terminals. If any (other than `opt` or `star`) to not parse, the rule is terminated as unmatched. + # * `star` – A sequence of zero or more of the matching rule. It will always return an array. # @param [:rule, :terminal, :pass, ] kind (nil) # @param [String] ebnf (nil) # When parsing, records the EBNF string used to create the rule. @@ -100,7 +111,7 @@ def initialize(sym, id, expr, kind: nil, ebnf: nil, first: nil, follow: nil, sta raise ArgumentError, "#{@expr.first} operation must have at least one operand, had #{@expr.length - 1}" unless @expr.length > 1 when :diff raise ArgumentError, "#{@expr.first} operation must have exactly two operands, had #{@expr.length - 1}" unless @expr.length == 3 - when :hex, :not, :opt, :plus, :range, :star + when :hex, :nocase, :not, :opt, :plus, :range, :star raise ArgumentError, "#{@expr.first} operation must have exactly one operand, had #{@expr.length - 1}" unless @expr.length == 2 when :rept raise ArgumentError, "#{@expr.first} operation must have exactly three, had #{@expr.length - 1}" unless @expr.length == 4 @@ -153,11 +164,11 @@ def self.from_sxp(sxp) # @param [Hash{Symbol => Symbol}] cleanup (nil) # @param [Hash{Symbol => Object}] options def build(expr, kind: nil, cleanup: nil, **options) - new_sym, new_id = (@top_rule ||self).send(:make_sym_id) + new_sym, new_id = @top_rule.send(:make_sym_id) self.class.new(new_sym, new_id, expr, kind: kind, ebnf: @ebnf, - top_rule: (@top_rule || self), + top_rule: @top_rule, cleanup: cleanup, **options) end @@ -319,7 +330,7 @@ def to_peg this.expr = [:seq, new_rule.sym, expr[1]] new_rules << this new_rules << new_rule - elsif [:hex, :range].include?(expr.first) + elsif [:hex, :nocase, :range].include?(expr.first) # This rules are fine, they just need to be terminals raise "Encountered #{expr.first.inspect}, which is a #{self.kind}, not :terminal" unless self.terminal? new_rules << self @@ -338,6 +349,8 @@ def to_regexp case expr.first when :hex Regexp.new(translate_codepoints(expr[1])) + when :nocase + /#{expr.last}/ui when :range Regexp.new("[#{translate_codepoints(expr[1])}]") else @@ -622,6 +635,8 @@ def ttl_expr(expr, pfx, depth, is_obj = true) statements << %{#{indent}"g:#{op.to_s[1..-1]}"} when :"'" statements << %{#{indent}"#{esc(expr)}"} + when :nocase + statements << %{#{indent}#{bra} re:matches #{expr.first.inspect} #{ket}} when :range statements << %{#{indent}#{bra} re:matches #{cclass(expr.first).inspect} #{ket}} when :hex diff --git a/spec/peg/rule_spec.rb b/spec/peg/rule_spec.rb index 1126b28..546449b 100644 --- a/spec/peg/rule_spec.rb +++ b/spec/peg/rule_spec.rb @@ -314,6 +314,24 @@ input: "B", expect: :unmatched }, + '(nocase "foo") with "foo"' => { + rule: [:nocase, "foo"], + + input: "foo", + expect: "foo" + }, + '(nocase "foo") with "FOO"' => { + rule: [:nocase, "foo"], + + input: "FOO", + expect: "FOO" + }, + '(nocase "fOo") with "FoO"' => { + rule: [:nocase, "fOo"], + + input: "FoO", + expect: "FoO" + }, "(range A-C) with 'A'" => { rule: [:range, "A-C"], input: "A", diff --git a/spec/rule_spec.rb b/spec/rule_spec.rb index 6a1ae44..1b25d2e 100644 --- a/spec/rule_spec.rb +++ b/spec/rule_spec.rb @@ -6,8 +6,8 @@ describe EBNF::Rule do let(:debug) {[]} - let(:ebnf) {EBNF.parse("", debug: debug)} - subject {EBNF::Rule.new(:rule, "0", [:seq, :foo], ebnf: ebnf)} + let(:ebnf) {EBNF.parse(File.open(File.expand_path("../../etc/ebnf.ebnf", __FILE__)))} + subject {EBNF::Rule.new(:rule, "0", [:seq, :foo])} describe ".from_sxp" do context "accepts valid variations" do @@ -36,6 +36,10 @@ %{(terminal R_CHAR "21" (diff CHAR "]"))}, EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal) ], + "nocase": [ + %{(terminal nc (nocase "foo"))}, + EBNF::Rule.new(:nc, nil, [:nocase, "foo"], kind: :terminal) + ], "not": [ %{(rule _a_1 "n.1" (not op1))}, EBNF::Rule.new(:_a_1, "n.1", [:not, :op1], kind: :rule) @@ -78,6 +82,8 @@ "diff (empty)": %{(terminal R_CHAR "21" (diff))}, "diff (one)": %{(terminal R_CHAR "21" (diff CHAR))}, "diff (three)": %{(terminal R_CHAR "21" (diff CHAR "]" ","))}, + "nocase (empty)": %{(terminal nc (nocase))}, + "nocase (two)": %{(terminal nc (nocase "foo" "bar"))}, "not (empty)": %{(rule _a_1 "n.1" (not))}, "not (two)": %{(rule _a_1 "n.1" (not op1 op2))}, "opt (empty)": %{(rule _diff_1 "7.1" (opt))}, @@ -124,6 +130,10 @@ EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal), %{(terminal R_CHAR "21" (diff CHAR "]"))}, ], + "nocase": [ + EBNF::Rule.new(:nc, nil, [:nocase, "foo"], kind: :terminal), + %{(terminal nc (nocase "foo"))}, + ], "not": [ EBNF::Rule.new(:_a_1, "n.1", [:not, :op1], kind: :rule), %{(rule _a_1 "n.1" (not op1))}, @@ -198,12 +208,18 @@ dc:identifier "21"; re:diff ( :CHAR "]" ) .}, ], + "nocase": [ + EBNF::Rule.new(:nc, nil, [:nocase, "foo"], kind: :terminal), + %{ + :nc rdfs:label "nc"; + re:matches "foo" .}, + ], "not": [ EBNF::Rule.new(:_a_1, "n.1", [:not, :op1], kind: :rule), %{ - :_a_1 rdfs:label "_a_1"; - dc:identifier "n.1"; - g:not :op1 .}, + :_a_1 rdfs:label "_a_1"; + dc:identifier "n.1"; + g:not :op1 .}, ], "opt": [ EBNF::Rule.new(:_diff_1, "7.1", [:opt, :_diff_2], kind: :rule), @@ -482,6 +498,7 @@ describe "#to_regexp" do { hex: ["#x20", / /], + nocase: ["foo", /foo/ui], range: ["a-b", /[a-b]/], }.each do |title, (exp, regexp)| it title do @@ -516,6 +533,10 @@ EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal), true, ], + "nocase": [ + EBNF::Rule.new(:nc, nil, [:nocase, "foo"], kind: :terminal), + true, + ], "not": [ EBNF::Rule.new(:_a_1, "n.1", [:not, :op1], kind: :rule), false, @@ -573,6 +594,10 @@ EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal), false, ], + "nocase": [ + EBNF::Rule.new(:nc, nil, [:nocase, "foo"], kind: :terminal), + false, + ], "not": [ EBNF::Rule.new(:_a_1, "n.1", [:not, :op1], kind: :rule), false, @@ -630,6 +655,10 @@ EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal), false, ], + "nocase": [ + EBNF::Rule.new(:nc, nil, [:nocase, "foo"], kind: :terminal), + false, + ], "not": [ EBNF::Rule.new(:_a_1, "n.1", [:not, :op1], kind: :rule), true, @@ -687,6 +716,10 @@ EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal), false, ], + "nocase": [ + EBNF::Rule.new(:nc, nil, [:nocase, "foo"], kind: :terminal), + false, + ], "not": [ EBNF::Rule.new(:_a_1, "n.1", [:not, :op1], kind: :rule), false, @@ -744,6 +777,10 @@ EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal), false, ], + "nocase": [ + EBNF::Rule.new(:nc, nil, [:nocase, "foo"], kind: :terminal), + false, + ], "not": [ EBNF::Rule.new(:_a_1, "n.1", [:not, :op1], kind: :rule), false, @@ -821,7 +858,7 @@ end describe "#non_terminals" do - subject {EBNF.parse(File.read File.expand_path("../../etc/ebnf.ebnf", __FILE__))} + subject {ebnf} { _pass: [], ebnf: [:declaration, :rule], @@ -854,7 +891,7 @@ end describe "#terminals" do - subject {EBNF.parse(File.read File.expand_path("../../etc/ebnf.ebnf", __FILE__))} + subject {ebnf} { _pass: [:PASS], ebnf: [], @@ -886,6 +923,39 @@ end end + describe "#symbols" do + subject {ebnf} + { + _pass: [:PASS], + ebnf: [:declaration, :rule], + declaration: [:pass], + alt: [:seq], + seq: [:diff], + diff: [:postfix], + postfix: [:primary, :POSTFIX], + primary: [:HEX, :SYMBOL, :ENUM, :O_ENUM, :RANGE, :O_RANGE, :STRING1, :STRING2, :expression], + pass: [:expression], + LHS: [:SYMBOL], + SYMBOL: [], + HEX: [], + ENUM: [:R_CHAR, :HEX, :LHS], + O_ENUM: [:R_CHAR, :HEX], + RANGE: [:R_CHAR, :HEX], + O_RANGE: [:R_CHAR, :HEX], + STRING1: [:CHAR], + STRING2: [:CHAR], + CHAR: [], + R_CHAR: [:CHAR], + POSTFIX: [], + PASS: [] + }.each do |sym, expected| + it "#{sym} => #{expected.inspect}" do + res = subject.ast.find {|r| r.sym == sym} + expect(res.symbols).to eq expected + end + end + end + describe "#validate!" do subject {EBNF.parse("a ::= b")} it "notes missing rule" do From d5504ffda06d3e3f01ba3b2b3b99c125b271ed72 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Tue, 30 Jun 2020 09:55:27 -0700 Subject: [PATCH 13/50] Rename `nocase` to `istr`. --- README.md | 2 +- examples/abnf/README.md | 4 ++-- examples/abnf/parser.rb | 2 +- lib/ebnf/peg/rule.rb | 2 +- lib/ebnf/rule.rb | 12 ++++++------ spec/peg/rule_spec.rb | 12 ++++++------ spec/rule_spec.rb | 42 ++++++++++++++++++++--------------------- 7 files changed, 38 insertions(+), 38 deletions(-) diff --git a/README.md b/README.md index 0fab1e5..0bb17ce 100644 --- a/README.md +++ b/README.md @@ -173,7 +173,7 @@ Different components of an EBNF rule expression are transformed into their own o Other rule operators are not directly supported in [EBNF][], but are included to support other notations (e.g., [ABNF][] and [ISO/IEC 14977][]): - +
%i"StRiNg"(nocase "StRiNg")Case-insensitive string matching
%i"StRiNg"(istr "StRiNg")Case-insensitive string matching
'' - A(not A)Negative look-ahead, used for non-terminal uses of `B - A`.
n*mA(rept n m A)Explicit repetition.
diff --git a/examples/abnf/README.md b/examples/abnf/README.md index 0ed3387..f6d7a43 100644 --- a/examples/abnf/README.md +++ b/examples/abnf/README.md @@ -100,8 +100,8 @@ Terminal rules may be expressed using any of the above operators, and additional `hex` : A single character represented using the hexadecimal notation `#xnn`. -`nocase` -: A string which matches in a case-insensitive manner, so that `(nocase "fOo")` will match either of the strings `"foo"`, `"FOO"` or any other combination. +`istr` +: A string which matches in a case-insensitive manner, so that `(istr "fOo")` will match either of the strings `"foo"`, `"FOO"` or any other combination. `range` : A range of characters, possibly repeated, of the form `(range "a-z")`. May also use hexadecimal notation. diff --git a/examples/abnf/parser.rb b/examples/abnf/parser.rb index a88f278..3f9400c 100644 --- a/examples/abnf/parser.rb +++ b/examples/abnf/parser.rb @@ -209,7 +209,7 @@ class ABNFParser # `case_insensitive_string ::= "%i"? quoted_string` production(:case_insensitive_string) do |value| require 'byebug'; byebug if value.first.has_key?(:case_sensitive_string) - [:nocase, value.last[:quoted_string]] + [:istr, value.last[:quoted_string]] end # `case_sensitive_string ::= "%s" quoted_string` diff --git a/lib/ebnf/peg/rule.rb b/lib/ebnf/peg/rule.rb index 90ba639..0934bb8 100644 --- a/lib/ebnf/peg/rule.rb +++ b/lib/ebnf/peg/rule.rb @@ -135,7 +135,7 @@ def parse(input) # Update furthest failure for strings and terminals parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal? plus.is_a?(Array) && terminal? ? plus.join("") : plus - when :range, :nocase + when :range, :istr # Matches the specified character range input.scan(to_regexp) || begin # Update furthest failure for strings and terminals diff --git a/lib/ebnf/rule.rb b/lib/ebnf/rule.rb index 15b09f3..cb73d1b 100644 --- a/lib/ebnf/rule.rb +++ b/lib/ebnf/rule.rb @@ -9,7 +9,7 @@ class Rule }.map(&:to_sym).freeze TERM_OPS = %w{ - hex nocase range + hex istr range }.map(&:to_sym).freeze # Symbol of rule @@ -67,7 +67,7 @@ class Rule # # * `alt` – A list of alternative rules, which are attempted in order. It terminates with the first matching rule, or is terminated as unmatched, if no such rule is found. # * `hex` – A single character represented using the hexadecimal notation `#xnn`. - # * `nocase` – A string which matches in a case-insensitive manner, so that `(nocase "fOo")` will match either of the strings `"foo"`, `"FOO"` or any other combination. + # * `istr` – A string which matches in a case-insensitive manner, so that `(istr "fOo")` will match either of the strings `"foo"`, `"FOO"` or any other combination. # * `opt` – An optional rule or terminal. It either results in the matching rule or returns `nil`. # * `plus` – A sequence of one or more of the matching rule. If there is no such rule, it is terminated as unmatched; otherwise, the result is an array containing all matched input. # * `range` – A range of characters, possibly repeated, of the form `(range "a-z")`. May also use hexadecimal notation. @@ -111,7 +111,7 @@ def initialize(sym, id, expr, kind: nil, ebnf: nil, first: nil, follow: nil, sta raise ArgumentError, "#{@expr.first} operation must have at least one operand, had #{@expr.length - 1}" unless @expr.length > 1 when :diff raise ArgumentError, "#{@expr.first} operation must have exactly two operands, had #{@expr.length - 1}" unless @expr.length == 3 - when :hex, :nocase, :not, :opt, :plus, :range, :star + when :hex, :istr, :not, :opt, :plus, :range, :star raise ArgumentError, "#{@expr.first} operation must have exactly one operand, had #{@expr.length - 1}" unless @expr.length == 2 when :rept raise ArgumentError, "#{@expr.first} operation must have exactly three, had #{@expr.length - 1}" unless @expr.length == 4 @@ -330,7 +330,7 @@ def to_peg this.expr = [:seq, new_rule.sym, expr[1]] new_rules << this new_rules << new_rule - elsif [:hex, :nocase, :range].include?(expr.first) + elsif [:hex, :istr, :range].include?(expr.first) # This rules are fine, they just need to be terminals raise "Encountered #{expr.first.inspect}, which is a #{self.kind}, not :terminal" unless self.terminal? new_rules << self @@ -349,7 +349,7 @@ def to_regexp case expr.first when :hex Regexp.new(translate_codepoints(expr[1])) - when :nocase + when :istr /#{expr.last}/ui when :range Regexp.new("[#{translate_codepoints(expr[1])}]") @@ -635,7 +635,7 @@ def ttl_expr(expr, pfx, depth, is_obj = true) statements << %{#{indent}"g:#{op.to_s[1..-1]}"} when :"'" statements << %{#{indent}"#{esc(expr)}"} - when :nocase + when :istr statements << %{#{indent}#{bra} re:matches #{expr.first.inspect} #{ket}} when :range statements << %{#{indent}#{bra} re:matches #{cclass(expr.first).inspect} #{ket}} diff --git a/spec/peg/rule_spec.rb b/spec/peg/rule_spec.rb index 546449b..e3838c1 100644 --- a/spec/peg/rule_spec.rb +++ b/spec/peg/rule_spec.rb @@ -314,20 +314,20 @@ input: "B", expect: :unmatched }, - '(nocase "foo") with "foo"' => { - rule: [:nocase, "foo"], + '(istr "foo") with "foo"' => { + rule: [:istr, "foo"], input: "foo", expect: "foo" }, - '(nocase "foo") with "FOO"' => { - rule: [:nocase, "foo"], + '(istr "foo") with "FOO"' => { + rule: [:istr, "foo"], input: "FOO", expect: "FOO" }, - '(nocase "fOo") with "FoO"' => { - rule: [:nocase, "fOo"], + '(istr "fOo") with "FoO"' => { + rule: [:istr, "fOo"], input: "FoO", expect: "FoO" diff --git a/spec/rule_spec.rb b/spec/rule_spec.rb index 1b25d2e..f17bd1a 100644 --- a/spec/rule_spec.rb +++ b/spec/rule_spec.rb @@ -36,9 +36,9 @@ %{(terminal R_CHAR "21" (diff CHAR "]"))}, EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal) ], - "nocase": [ - %{(terminal nc (nocase "foo"))}, - EBNF::Rule.new(:nc, nil, [:nocase, "foo"], kind: :terminal) + "istr": [ + %{(terminal nc (istr "foo"))}, + EBNF::Rule.new(:nc, nil, [:istr, "foo"], kind: :terminal) ], "not": [ %{(rule _a_1 "n.1" (not op1))}, @@ -82,8 +82,8 @@ "diff (empty)": %{(terminal R_CHAR "21" (diff))}, "diff (one)": %{(terminal R_CHAR "21" (diff CHAR))}, "diff (three)": %{(terminal R_CHAR "21" (diff CHAR "]" ","))}, - "nocase (empty)": %{(terminal nc (nocase))}, - "nocase (two)": %{(terminal nc (nocase "foo" "bar"))}, + "istr (empty)": %{(terminal nc (istr))}, + "istr (two)": %{(terminal nc (istr "foo" "bar"))}, "not (empty)": %{(rule _a_1 "n.1" (not))}, "not (two)": %{(rule _a_1 "n.1" (not op1 op2))}, "opt (empty)": %{(rule _diff_1 "7.1" (opt))}, @@ -130,9 +130,9 @@ EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal), %{(terminal R_CHAR "21" (diff CHAR "]"))}, ], - "nocase": [ - EBNF::Rule.new(:nc, nil, [:nocase, "foo"], kind: :terminal), - %{(terminal nc (nocase "foo"))}, + "istr": [ + EBNF::Rule.new(:nc, nil, [:istr, "foo"], kind: :terminal), + %{(terminal nc (istr "foo"))}, ], "not": [ EBNF::Rule.new(:_a_1, "n.1", [:not, :op1], kind: :rule), @@ -208,8 +208,8 @@ dc:identifier "21"; re:diff ( :CHAR "]" ) .}, ], - "nocase": [ - EBNF::Rule.new(:nc, nil, [:nocase, "foo"], kind: :terminal), + "istr": [ + EBNF::Rule.new(:nc, nil, [:istr, "foo"], kind: :terminal), %{ :nc rdfs:label "nc"; re:matches "foo" .}, @@ -498,7 +498,7 @@ describe "#to_regexp" do { hex: ["#x20", / /], - nocase: ["foo", /foo/ui], + istr: ["foo", /foo/ui], range: ["a-b", /[a-b]/], }.each do |title, (exp, regexp)| it title do @@ -533,8 +533,8 @@ EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal), true, ], - "nocase": [ - EBNF::Rule.new(:nc, nil, [:nocase, "foo"], kind: :terminal), + "istr": [ + EBNF::Rule.new(:nc, nil, [:istr, "foo"], kind: :terminal), true, ], "not": [ @@ -594,8 +594,8 @@ EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal), false, ], - "nocase": [ - EBNF::Rule.new(:nc, nil, [:nocase, "foo"], kind: :terminal), + "istr": [ + EBNF::Rule.new(:nc, nil, [:istr, "foo"], kind: :terminal), false, ], "not": [ @@ -655,8 +655,8 @@ EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal), false, ], - "nocase": [ - EBNF::Rule.new(:nc, nil, [:nocase, "foo"], kind: :terminal), + "istr": [ + EBNF::Rule.new(:nc, nil, [:istr, "foo"], kind: :terminal), false, ], "not": [ @@ -716,8 +716,8 @@ EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal), false, ], - "nocase": [ - EBNF::Rule.new(:nc, nil, [:nocase, "foo"], kind: :terminal), + "istr": [ + EBNF::Rule.new(:nc, nil, [:istr, "foo"], kind: :terminal), false, ], "not": [ @@ -777,8 +777,8 @@ EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal), false, ], - "nocase": [ - EBNF::Rule.new(:nc, nil, [:nocase, "foo"], kind: :terminal), + "istr": [ + EBNF::Rule.new(:nc, nil, [:istr, "foo"], kind: :terminal), false, ], "not": [ From f3a56dac0edb4b930d778d6e8ab0e5f3bc438e23 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Tue, 30 Jun 2020 14:08:57 -0700 Subject: [PATCH 14/50] Only use istr operator if a case-insensitive string contains alphabetic characters. --- examples/abnf/parser.rb | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/examples/abnf/parser.rb b/examples/abnf/parser.rb index 3f9400c..d095791 100644 --- a/examples/abnf/parser.rb +++ b/examples/abnf/parser.rb @@ -208,8 +208,13 @@ class ABNFParser # `case_insensitive_string ::= "%i"? quoted_string` production(:case_insensitive_string) do |value| - require 'byebug'; byebug if value.first.has_key?(:case_sensitive_string) - [:istr, value.last[:quoted_string]] + str = value.last[:quoted_string] + if str.match?(/[[:alpha:]]/) + # Only need to use case-insensitive if there are alphabetic characters in the string. + [:istr, value.last[:quoted_string]] + else + value.last[:quoted_string] + end end # `case_sensitive_string ::= "%s" quoted_string` From b6fa6353b0188394a3d819c7379c71147f56f85d Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Tue, 30 Jun 2020 14:46:59 -0700 Subject: [PATCH 15/50] Make abnf character hex ranges (`%x0e`) match case insensitively. Add more ABNF examples. --- examples/abnf/.byebug_history | 8 --- examples/abnf/abnf.ebnf | 2 +- examples/abnf/examples/http.abnf | 104 +++++++++++++++++++++++++++++++ examples/abnf/examples/json.abnf | 72 +++++++++++++++++++++ examples/abnf/examples/strs.abnf | 3 + examples/abnf/examples/uri.abnf | 71 +++++++++++++++++++++ examples/abnf/parser.rb | 3 +- 7 files changed, 253 insertions(+), 10 deletions(-) delete mode 100644 examples/abnf/.byebug_history create mode 100644 examples/abnf/examples/http.abnf create mode 100644 examples/abnf/examples/json.abnf create mode 100644 examples/abnf/examples/strs.abnf create mode 100644 examples/abnf/examples/uri.abnf diff --git a/examples/abnf/.byebug_history b/examples/abnf/.byebug_history deleted file mode 100644 index 7b79e57..0000000 --- a/examples/abnf/.byebug_history +++ /dev/null @@ -1,8 +0,0 @@ -exit -value -c -value -c -value -c -value diff --git a/examples/abnf/abnf.ebnf b/examples/abnf/abnf.ebnf index 2f5ef59..6e8d708 100644 --- a/examples/abnf/abnf.ebnf +++ b/examples/abnf/abnf.ebnf @@ -93,7 +93,7 @@ DIGIT ::= [#x30-#x39] DQUOTE ::= #x22 # " (Double Quote) -HEXDIG ::= DIGIT | [A-F] +HEXDIG ::= DIGIT | "A" | "B" | "C" | "D" | "E" | "F" HTAB ::= #x09 # horizontal tab diff --git a/examples/abnf/examples/http.abnf b/examples/abnf/examples/http.abnf new file mode 100644 index 0000000..1cc15de --- /dev/null +++ b/examples/abnf/examples/http.abnf @@ -0,0 +1,104 @@ +BWS = OWS + +Connection = *( "," OWS ) connection-option *( OWS "," [ OWS connection-option ] ) + +Content-Length = 1*DIGIT + +HTTP-message = start-line *( header-field CRLF ) CRLF [ message-body] +HTTP-name = %x48.54.54.50 ; HTTP +HTTP-version = HTTP-name "/" DIGIT "." DIGIT +Host = uri-host [ ":" port ] + +OWS = *( SP / HTAB ) + +RWS = 1*( SP / HTAB ) + +TE = [ ( "," / t-codings ) *( OWS "," [ OWS t-codings ] ) ] +Trailer = *( "," OWS ) field-name *( OWS "," [ OWS field-name ] ) +Transfer-Encoding = *( "," OWS ) transfer-coding *( OWS "," [ OWS transfer-coding ] ) + +URI-reference = +Upgrade = *( "," OWS ) protocol *( OWS "," [ OWS protocol ] ) + +Via = *( "," OWS ) + ( received-protocol RWS received-by [ RWS comment ] ) + *( OWS "," [ OWS ( received-protocol RWS received-by [ RWS comment ] ) ] ) + +absolute-URI = +absolute-form = absolute-URI +absolute-path = 1*( "/" segment ) +asterisk-form = "*" +authority = +authority-form = authority +chunk = chunk-size [ chunk-ext ] CRLF chunk-data CRLF +chunk-data = 1*OCTET +chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) +chunk-ext-name = token +chunk-ext-val = token / quoted-string +chunk-size = 1*HEXDIG +chunked-body = *chunk last-chunk trailer-part CRLF +comment = "(" *( ctext / quoted-pair / comment ) ")" +connection-option = token +ctext = HTAB / SP / %x21-27 ; '!'-''' + / %x2A-5B ; '*'-'[' + / %x5D-7E ; ']'-'~' + / obs-text + +field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] +field-name = token +field-value = *( field-content / obs-fold ) +field-vchar = VCHAR / obs-text +fragment = + +header-field = field-name ":" OWS field-value OWS +http-URI = "http://" authority path-abempty [ "?" query ] [ "#" fragment ] +https-URI = "https://" authority path-abempty [ "?" query ] [ "#" fragment ] + +last-chunk = 1*"0" [ chunk-ext ] CRLF + +message-body = *OCTET +method = token + +obs-fold = CRLF 1*( SP / HTAB ) +obs-text = %x80-FF +origin-form = absolute-path [ "?" query ] + +partial-URI = relative-part [ "?" query ] +path-abempty = +port = +protocol = protocol-name [ "/" protocol-version ] +protocol-name = token +protocol-version = token +pseudonym = token + +qdtext = HTAB / SP / "!" / %x23-5B ; '#'-'[' + / %x5D-7E ; ']'-'~' + / obs-text +query = +quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text ) +quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE + +rank = ( "0" [ "." *3DIGIT ] ) / ( "1" [ "." *3"0" ] ) +reason-phrase = *( HTAB / SP / VCHAR / obs-text ) +received-by = ( uri-host [ ":" port ] ) / pseudonym +received-protocol = [ protocol-name "/" ] protocol-version +relative-part = +request-line = method SP request-target SP HTTP-version CRLF +request-target = origin-form / absolute-form / authority-form / asterisk-form + +scheme = +segment = +start-line = request-line / status-line +status-code = 3DIGIT +status-line = HTTP-version SP status-code SP reason-phrase CRLF + +t-codings = "trailers" / ( transfer-coding [ t-ranking ] ) +t-ranking = OWS ";" OWS "q=" rank +tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA +token = 1*tchar +trailer-part = *( header-field CRLF ) +transfer-coding = "chunked" / "compress" / "deflate" / "gzip" / transfer-extension +transfer-extension = token *( OWS ";" OWS transfer-parameter ) +transfer-parameter = token BWS "=" BWS ( token / quoted-string ) + +uri-host = diff --git a/examples/abnf/examples/json.abnf b/examples/abnf/examples/json.abnf new file mode 100644 index 0000000..29039a0 --- /dev/null +++ b/examples/abnf/examples/json.abnf @@ -0,0 +1,72 @@ +JSON-text = ws value ws + +begin-array = ws %x5B ws ; [ left square bracket + +begin-object = ws %x7B ws ; { left curly bracket + +end-array = ws %x5D ws ; ] right square bracket + +end-object = ws %x7D ws ; } right curly bracket + +name-separator = ws %x3A ws ; : colon + +value-separator = ws %x2C ws ; , comma + +ws = *( + %x20 / ; Space + %x09 / ; Horizontal tab + %x0A / ; Line feed or New line + %x0D ) ; Carriage return + +value = false / null / true / object / array / number / string + +false = %x66.61.6c.73.65 ; false + +null = %x6e.75.6c.6c ; null + +true = %x74.72.75.65 ; true + +object = begin-object [ member *( value-separator member ) ] + end-object + +member = string name-separator value + +array = begin-array [ value *( value-separator value ) ] end-array + +number = [ minus ] int [ frac ] [ exp ] + +decimal-point = %x2E ; . + +digit1-9 = %x31-39 ; 1-9 + +e = %x65 / %x45 ; e E + +exp = e [ minus / plus ] 1*DIGIT + +frac = decimal-point 1*DIGIT +int = zero / ( digit1-9 *DIGIT ) + +minus = %x2D ; - + +plus = %x2B ; + + +zero = %x30 ; 0 +string = quotation-mark *char quotation-mark + +char = unescaped / + escape ( + %x22 / ; " quotation mark U+0022 + %x5C / ; \ reverse solidus U+005C + %x2F / ; / solidus U+002F + %x62 / ; b backspace U+0008 + %x66 / ; f form feed U+000C + %x6E / ; n line feed U+000A + %x72 / ; r carriage return U+000D + %x74 / ; t tab U+0009 + %x75 4HEXDIG ) ; uXXXX U+XXXX + +escape = %x5C ; \ + +quotation-mark = %x22 ; " + +unescaped = %x20-21 / %x23-5B / %x5D-10FFFF diff --git a/examples/abnf/examples/strs.abnf b/examples/abnf/examples/strs.abnf new file mode 100644 index 0000000..c45d1ce --- /dev/null +++ b/examples/abnf/examples/strs.abnf @@ -0,0 +1,3 @@ +x = *y %d1.3.10 +y = %s"Ab" / (2z / %x30-39) +z = x ["ab"] diff --git a/examples/abnf/examples/uri.abnf b/examples/abnf/examples/uri.abnf new file mode 100644 index 0000000..40e1a95 --- /dev/null +++ b/examples/abnf/examples/uri.abnf @@ -0,0 +1,71 @@ +URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] + +hier-part = "//" authority path-abempty + / path-absolute + / path-rootless + / path-empty + +URI-reference = URI / relative-ref + +absolute-URI = scheme ":" hier-part [ "?" query ] + +relative-ref = relative-part [ "?" query ] [ "#" fragment ] + +relative-part = "//" authority path-abempty + / path-absolute + / path-noscheme + / path-empty + +scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + +authority = [ userinfo "@" ] host [ ":" port ] +userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) +host = IP-literal / IPv4address / reg-name +port = *DIGIT + +IP-literal = "[" ( IPv6address / IPvFuture ) "]" + +IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) + +IPv6address = 6( h16 ":" ) ls32 + / "::" 5( h16 ":" ) ls32 + / [ h16 ] "::" 4( h16 ":" ) ls32 + / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 + / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 + / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 + / [ *4( h16 ":" ) h16 ] "::" ls32 + / [ *5( h16 ":" ) h16 ] "::" h16 + / [ *6( h16 ":" ) h16 ] "::" + +h16 = 1*4HEXDIG +ls32 = ( h16 ":" h16 ) / IPv4address +IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet +dec-octet = DIGIT ; 0-9 + / %x31-39 DIGIT ; 10-99 + / "1" 2DIGIT ; 100-199 + / "2" %x30-34 DIGIT ; 200-249 + / "25" %x30-35 ; 250-255 +reg-name = *( unreserved / pct-encoded / sub-delims ) +path = path-abempty ; begins with "/" or is empty + / path-absolute ; begins with "/" but not "//" + / path-noscheme ; begins with a non-colon segment + / path-rootless ; begins with a segment + / path-empty ; zero characters +path-abempty = *( "/" segment ) +path-absolute = "/" [ segment-nz *( "/" segment ) ] +path-noscheme = segment-nz-nc *( "/" segment ) +path-rootless = segment-nz *( "/" segment ) +path-empty = 0 +segment = *pchar +segment-nz = 1*pchar +segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) + ; non-zero-length segment without any colon ":" +pchar = unreserved / pct-encoded / sub-delims / ":" / "@" +query = *( pchar / "/" / "?" ) +fragment = *( pchar / "/" / "?" ) +pct-encoded = "%" HEXDIG HEXDIG +unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" +reserved = gen-delims / sub-delims +gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" +sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + / "*" / "+" / "," / ";" / "=" diff --git a/examples/abnf/parser.rb b/examples/abnf/parser.rb index d095791..ecccd4d 100644 --- a/examples/abnf/parser.rb +++ b/examples/abnf/parser.rb @@ -72,7 +72,7 @@ class ABNFParser end # `hex_val ::= "x" HEXDIG+ (("." HEXDIG+)+ | ("-" HEXDIG+))?` - terminal(:hex_val, /x[0-9A-F]+(?:(?:(?:\.[0-9A-F]+)+)|(?:-[0-9A-F]+))?/) do |value| + terminal(:hex_val, /x[0-9A-F]+(?:(?:(?:\.[0-9A-F]+)+)|(?:-[0-9A-F]+))?/i) do |value| if value.include?('.') # Interpret segments in hexadecimal creating a string value[1..-1].split('.').map {|h| h.to_i(base=16).chr}.join("") @@ -122,6 +122,7 @@ class ABNFParser raise "Redefining rule #{sym}" if parsed_rules.has_key?(sym) parsed_rules[sym] = EBNF::Rule.new(sym.to_sym, nil, elements) end + progress(:rule, level: 2) {parsed_rules[sym].to_sxp} sym end From 3de4eb6362e64e68671fe006535bf512db9b0fa1 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Wed, 1 Jul 2020 13:39:26 -0700 Subject: [PATCH 16/50] Add native ABNF support usable from the command line, and by using `format: :abnf` to `EBNF.parse`. No support for writing ABNF yet, but parsed ABNF can be written as EBNF, with some loss of fideliy. --- .travis.yml | 1 + bin/ebnf | 6 +- ebnf.gemspec | 1 + lib/ebnf.rb | 1 + lib/ebnf/abnf.rb | 269 ++++++++++++++++++++++++++++++++++++++++++ lib/ebnf/abnf/core.rb | 23 ++++ lib/ebnf/abnf/meta.rb | 111 +++++++++++++++++ lib/ebnf/base.rb | 17 ++- lib/ebnf/writer.rb | 123 ++++++++++++++----- spec/writer_spec.rb | 147 +++++++++++++++++++++++ 10 files changed, 663 insertions(+), 36 deletions(-) create mode 100644 lib/ebnf/abnf.rb create mode 100644 lib/ebnf/abnf/core.rb create mode 100644 lib/ebnf/abnf/meta.rb diff --git a/.travis.yml b/.travis.yml index 83a126b..7e1193e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,7 @@ language: ruby script: "bundle exec rspec spec" env: + - NOKOGIRI_USE_SYSTEM_LIBRARIES=true - CI=true rvm: - 2.4 diff --git a/bin/ebnf b/bin/ebnf index d3a1720..2067dde 100755 --- a/bin/ebnf +++ b/bin/ebnf @@ -24,8 +24,8 @@ OPT_ARGS = [ ["--bnf", GetoptLong::NO_ARGUMENT, "Transform EBNF to BNF"], ["--evaluate","-e", GetoptLong::REQUIRED_ARGUMENT,"Evaluate argument as an EBNF document"], ["--ll1", GetoptLong::REQUIRED_ARGUMENT,"Generate First/Follow rules, argument is start symbol"], - ["--format", "-f", GetoptLong::REQUIRED_ARGUMENT,"Specify output format one of ebnf, html, ttl, sxp, or rb"], - ["--input-format", GetoptLong::REQUIRED_ARGUMENT,"Specify input format one of ebnf or sxp"], + ["--format", "-f", GetoptLong::REQUIRED_ARGUMENT,"Specify output format one of abnf, abnfh, ebnf, html, ttl, sxp, or rb"], + ["--input-format", GetoptLong::REQUIRED_ARGUMENT,"Specify input format one of abnf, ebnf or sxp"], ["--mod-name", GetoptLong::REQUIRED_ARGUMENT,"Module name used when creating ruby tables"], ["--output", "-o", GetoptLong::REQUIRED_ARGUMENT,"Output to the specified file path"], ["--peg", GetoptLong::NO_ARGUMENT, "Transform EBNF to PEG"], @@ -81,6 +81,8 @@ if options[:ll1] end res = case options[:output_format] +when :abnf then ebnf.to_s(:abnf) +when :abnfh then ebnf.to_html(:abnf) when :ebnf then ebnf.to_s when :html then ebnf.to_html when :sxp then ebnf.to_sxp diff --git a/ebnf.gemspec b/ebnf.gemspec index 197c9e4..4d07e55 100755 --- a/ebnf.gemspec +++ b/ebnf.gemspec @@ -30,6 +30,7 @@ Gem::Specification.new do |gem| gem.add_development_dependency 'rdf-spec', '~> 3.1' gem.add_development_dependency 'rdf-turtle', '~> 3.1' gem.add_development_dependency 'haml', '~> 5.0' + gem.add_development_dependency 'nokogiri', '~> 1.10' gem.add_development_dependency 'rspec', '~> 3.9' gem.add_development_dependency 'rspec-its', '~> 1.3' gem.add_development_dependency 'yard', '~> 0.9' diff --git a/lib/ebnf.rb b/lib/ebnf.rb index 75bb5bc..7c32687 100755 --- a/lib/ebnf.rb +++ b/lib/ebnf.rb @@ -1,4 +1,5 @@ module EBNF + autoload :ABNF, "ebnf/abnf" autoload :Base, "ebnf/base" autoload :BNF, "ebnf/bnf" autoload :LL1, "ebnf/ll1" diff --git a/lib/ebnf/abnf.rb b/lib/ebnf/abnf.rb new file mode 100644 index 0000000..2eaa8b7 --- /dev/null +++ b/lib/ebnf/abnf.rb @@ -0,0 +1,269 @@ +require_relative 'abnf/core' +require_relative 'abnf/meta' + +# ABNF parser +# Parses ABNF into an array of {EBNF::Rule}. +module EBNF + class ABNF + include EBNF::PEG::Parser + + # Regular expressions for both "Core" and ABNF-specific terminals. + ALPHA = %r{[\x41-\x5A\x61-\x7A]} + VCHAR = %r{[\x20-\x7E]} + WSP = %r{[\x20\x09]} + CRLF = %r{\x0D?\x0A} + COMMENT = %r{;(?:#{WSP}|#{VCHAR})*#{CRLF}} + C_NL = %r{#{COMMENT}|#{CRLF}} + C_WSP = %r{#{WSP}|(?:#{C_NL}#{WSP})} + + ## + # Hash of generated {EBNF::Rule} objects by symbol + # + # @return [Hash{Symbol => EBNF::Rule}] + attr_reader :parsed_rules + + ## + # The following ABNF grammar rules are treated as terminals. + + # `rulename ::= ALPHA (ALPHA | DIGIT | "-")*` + terminal(:rulename, /#{ALPHA}(?:#{ALPHA}|[0-9-])*/) do |value| + value.to_sym + end + + # `defined_as ::= c_wsp* ("=" | "=/") c_wsp*` + terminal(:defined_as, /#{C_WSP}*=\/?#{C_WSP}*/) {|value| value.strip} + + # `quoted_string::= DQUOTE [#x20-#x21#x23-#x7E]* DQUOTE` + terminal(:quoted_string, /"[\x20-\x21\x23-\x7E]*"/) do |value| + value[1..-2] + end + + # `bin_val ::= "b" BIT+ (("." BIT+)+ | ("-" BIT+))?` + terminal(:bin_val, /b[01]+(?:(?:(?:\.[01]+)+)|(?:-[01]+))?/) do |value| + if value.include?('.') + # Interpret segments in binary creating a string + value[1..-1].split('.').map {|b| b.to_i(base=2).chr}.join("") + elsif value.include?('-') + # Interpret as a range + [:range, value[1..-1].split('-').map {|b| "#x%x" % b.to_i(base=2)}.join("-")] + else + # Interpret as a single HEX character + [:hex, "#x%x" % value[1..-1].to_i(base=2)] + end + end + + # `dec_val ::= "d" DIGIT+ (("." DIGIT+)+ | ("-" DIGIT+))?` + terminal(:dec_val, /d[0-9]+(?:(?:(?:\.[0-9]+)+)|(?:-[0-9]+))?/) do |value| + if value.include?('.') + # Interpret segments in decimal creating a string + value[1..-1].split('.').map {|d| d.to_i.chr}.join("") + elsif value.include?('-') + # Interpret as a range + [:range, value[1..-1].split('-').map {|d| "#x%x" % d.to_i}.join("-")] + else + # Interpret as a single HEX character + [:hex, "#x%x" % value[1..-1].to_i] + end + end + + # `hex_val ::= "x" HEXDIG+ (("." HEXDIG+)+ | ("-" HEXDIG+))?` + terminal(:hex_val, /x[0-9A-F]+(?:(?:(?:\.[0-9A-F]+)+)|(?:-[0-9A-F]+))?/i) do |value| + if value.include?('.') + # Interpret segments in hexadecimal creating a string + value[1..-1].split('.').map {|h| h.to_i(base=16).chr}.join("") + elsif value.include?('-') + # Interpret as a range + [:range, value[1..-1].split('-').map {|h| "#x%x" % h.to_i(base=16)}.join("-")] + else + # Interpret as a single HEX character + [:hex, "#x#{value[1..-1]}"] + end + end + + # `c_wsp ::= WSP | (c_nl WSP)` + terminal(:c_wsp, C_WSP) + + # `c_nl ::= comment | CRLF` + terminal(:c_nl, C_NL) + + # `DIGIT ::= [#x30-#x39]` + terminal(:DIGIT, /\d/) + + # ## Non-terminal productions + + # The `start_production` on `:rule` allows the parser to present the value as a single Hash, rather than an array of individual hashes. + start_production(:rule, as_hash: true) + + # `rule ::= rulename defined_as elements c_nl` + production(:rule) do |value| + # value contains an expression. + # Invoke callback + sym = value[:rulename] + elements = value[:elements] + + if value[:defined_as] == "=/" + # append to rule alternate + rule = parsed_rules.fetch(sym) {raise "No existing rule found for #{sym}"} + rule.expr = [:alt, rule.expr] unless rule.alt? + if elements.first == :alt + # append alternatives to rule + rule.expr.concat(elements[1..-1]) + else + # add elements as last alternative + rule.expr.push(elements) + end + else + # There shouldn't be an existing rule + raise "Redefining rule #{sym}" if parsed_rules.has_key?(sym) + parsed_rules[sym] = EBNF::Rule.new(sym.to_sym, nil, elements) + end + progress(:rule, level: 2) {parsed_rules[sym].to_sxp} + sym + end + + # `elements ::= alternation c_wsp*` + production(:elements) do |value| + value.first[:alternation] + end + + # `alternation ::= concatenation (c_wsp* "/" c_wsp* concatenation)*` + production(:alternation) do |value| + unless value.last[:_alternation_1].empty? + [:alt, value.first[:concatenation]] + value.last[:_alternation_1] + else + value.first[:concatenation] + end + end + + # The `_aleteration_2` rule comes from the expanded PEG grammar and serves as an opportunity to custommize the values presented to the `aleteration` rule. + production(:_alternation_2) do |value| + if Array(value.last[:concatenation]).first == :alt + value.last[:concatenation][1..-1] + else + [value.last[:concatenation]] + end + value.last[:concatenation] + end + + # `concatenation::= repetition (c_wsp+ repetition)*` + production(:concatenation) do |value| + unless value.last[:_concatenation_1].empty? + [:seq, value.first[:repetition]] + value.last[:_concatenation_1] + else + value.first[:repetition] + end + end + start_production(:_concatenation_2, as_hash: true) + production(:_concatenation_2) do |value| + value[:repetition] + end + + # `repetition ::= repeat? element` + production(:repetition) do |value| + rept = value.first[:_repetition_1] + elt = value.last[:element] + case rept + when [0, '*'] then [:star, elt] + when [1, '*'] then [:plus, elt] + when nil then elt + else + [:rept, rept.first, rept.last, elt] + end + end + + # `repeat ::= DIGIT+ | (DIGIT* "*" DIGIT*)` + production(:repeat) do |value| + if value.is_a?(Integer) + [value, value] + else + [value.first, value.last] + end + end + start_production(:_repeat_1, as_hash: true) + production(:_repeat_1) {|value| value.values} + production(:_repeat_2) {|value| value.join("").to_i} + production(:_repeat_3) {|value| value.join("").to_i} + production(:_repeat_4) {|value| value.length > 0 ? value.join("").to_i : '*'} + + # `element ::= rulename | group | option | char_val | num_val | prose_val` + production(:element) do |value| + value + end + + # `group ::= "(" c_wsp* alternation c_wsp* ")"` + start_production(:group, as_hash: true) + production(:group) do |value| + value[:alternation] + end + + # `option ::= "[" c_wsp* alternation c_wsp* "]"` + start_production(:option, as_hash: true) + production(:option) do |value| + [:opt, value[:alternation]] + end + + # `case_insensitive_string ::= "%i"? quoted_string` + production(:case_insensitive_string) do |value| + str = value.last[:quoted_string] + if str.match?(/[[:alpha:]]/) + # Only need to use case-insensitive if there are alphabetic characters in the string. + [:istr, value.last[:quoted_string]] + else + value.last[:quoted_string] + end + end + + # `case_sensitive_string ::= "%s" quoted_string` + production(:case_sensitive_string) do |value| + value.last[:quoted_string] + end + + # `num_val ::= "%" (bin_val | dec_val | hex_val)` + production(:num_val) do |value| + value.last[:_num_val_1] + end + + # ## Parser invocation. + # On start, yield ourselves if a block is given, otherwise, return this parser instance + # + # @param [#read, #to_s] input + # @param [Hash{Symbol => Object}] options + # @option options [Boolean] :level + # Trace level. 0(debug), 1(info), 2(warn), 3(error). + # @return [EBNFParser] + def initialize(input, **options) + # If the `level` option is set, instantiate a logger for collecting trace information. + if options.has_key?(:level) + options[:logger] = Logger.new(STDERR) + options[:logger].level = options[:level] + options[:logger].formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"} + end + + # Read input, if necessary, which will be used in a Scanner. + @input = input.respond_to?(:read) ? input.read : input.to_s + + @parsed_rules = {} + + # Parses into `@parsed_rules` + parse(@input, + :rulelist, # Starting rule + ABNFMeta::RULES, # PEG rules + whitespace: '', # No implicit whitespace + **options) + end + + ## + # The AST includes the parsed rules along with built-in rules for ABNF used within the parsed grammar. + # + # @return [Array] + def ast + # Add built-in rules for standard ABNF rules not + parsed_rules.values.map(&:symbols).flatten.uniq.each do |sym| + rule = ABNFCore::RULES.detect {|r| r.sym == sym} + parsed_rules[sym] ||= rule + end + + parsed_rules.values + end + end +end \ No newline at end of file diff --git a/lib/ebnf/abnf/core.rb b/lib/ebnf/abnf/core.rb new file mode 100644 index 0000000..d4e73d0 --- /dev/null +++ b/lib/ebnf/abnf/core.rb @@ -0,0 +1,23 @@ +# This file is automatically generated by ebnf version 2.0.0 +# Derived from abnf-core.ebnf +module ABNFCore + RULES = [ + EBNF::Rule.new(:ALPHA, nil, [:range, "#x41-#x5A#x61-#x7A"], kind: :terminal), + EBNF::Rule.new(:BIT, nil, [:alt, "0", "1"], kind: :terminal), + EBNF::Rule.new(:CHAR, nil, [:range, "#x01-#x7F"], kind: :terminal), + EBNF::Rule.new(:CR, nil, [:hex, "#x0D"], kind: :terminal), + EBNF::Rule.new(:CRLF, nil, [:seq, [:opt, :CR], :LF], kind: :terminal), + EBNF::Rule.new(:CTL, nil, [:alt, [:range, "#x00-#x1F"], [:hex, "#x7F"]], kind: :terminal), + EBNF::Rule.new(:DIGIT, nil, [:range, "#x30-#x39"], kind: :terminal), + EBNF::Rule.new(:DQUOTE, nil, [:hex, "#x22"], kind: :terminal), + EBNF::Rule.new(:HEXDIG, nil, [:alt, :DIGIT, [:range, "A-F"]], kind: :terminal), + EBNF::Rule.new(:HTAB, nil, [:hex, "#x09"], kind: :terminal), + EBNF::Rule.new(:LF, nil, [:hex, "#x0A"], kind: :terminal), + EBNF::Rule.new(:LWSP, nil, [:star, [:alt, :WSP, [:seq, :CRLF, :WSP]]], kind: :terminal), + EBNF::Rule.new(:OCTET, nil, [:range, "#x00-#xFF"], kind: :terminal), + EBNF::Rule.new(:SP, nil, [:hex, "#x20"], kind: :terminal), + EBNF::Rule.new(:VCHAR, nil, [:range, "#x21-#x7E"], kind: :terminal), + EBNF::Rule.new(:WSP, nil, [:alt, :SP, :HTAB], kind: :terminal), + ] +end + diff --git a/lib/ebnf/abnf/meta.rb b/lib/ebnf/abnf/meta.rb new file mode 100644 index 0000000..b5ce638 --- /dev/null +++ b/lib/ebnf/abnf/meta.rb @@ -0,0 +1,111 @@ +# This file is automatically generated by ebnf version 2.0.0 +# Derived from abnf.ebnf +module ABNFMeta + RULES = [ + EBNF::Rule.new(:rulelist, nil, [:plus, :_rulelist_1]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_rulelist_1, nil, [:alt, :rule, :_rulelist_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_rulelist_2, nil, [:seq, :_rulelist_3, :c_nl]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_rulelist_3, nil, [:star, :c_wsp]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:rule, nil, [:seq, :rulename, :defined_as, :elements, :c_nl]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:elements, nil, [:seq, :alternation, :_elements_1]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_elements_1, nil, [:star, :c_wsp]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:alternation, nil, [:seq, :concatenation, :_alternation_1]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_alternation_1, nil, [:star, :_alternation_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_alternation_2, nil, [:seq, :_alternation_3, "/", :_alternation_4, :concatenation]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_alternation_3, nil, [:star, :c_wsp]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_alternation_4, nil, [:star, :c_wsp]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:concatenation, nil, [:seq, :repetition, :_concatenation_1]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_concatenation_1, nil, [:star, :_concatenation_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_concatenation_2, nil, [:seq, :_concatenation_3, :repetition]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_concatenation_3, nil, [:plus, :c_wsp]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:repetition, nil, [:seq, :_repetition_1, :element]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_repetition_1, nil, [:opt, :repeat]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:repeat, nil, [:alt, :_repeat_1, :_repeat_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_repeat_1, nil, [:seq, :_repeat_3, "*", :_repeat_4]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_repeat_3, nil, [:star, :DIGIT]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_repeat_4, nil, [:star, :DIGIT]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_repeat_2, nil, [:plus, :DIGIT]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:element, nil, [:alt, :rulename, :group, :option, :char_val, :num_val, :prose_val]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:group, nil, [:seq, "(", :_group_1, :alternation, :_group_2, ")"]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_group_1, nil, [:star, :c_wsp]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_group_2, nil, [:star, :c_wsp]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:option, nil, [:seq, "[", :_option_1, :alternation, :_option_2, "]"]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_option_1, nil, [:star, :c_wsp]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_option_2, nil, [:star, :c_wsp]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:char_val, nil, [:alt, :case_insensitive_string, :case_sensitive_string]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:case_insensitive_string, nil, [:seq, :_case_insensitive_string_1, :quoted_string]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_case_insensitive_string_1, nil, [:opt, "%i"]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:case_sensitive_string, nil, [:seq, "%s", :quoted_string]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:num_val, nil, [:seq, "%", :_num_val_1]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_num_val_1, nil, [:alt, :bin_val, :dec_val, :hex_val]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:rulename, nil, [:seq, :ALPHA, :_rulename_1], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_rulename_1, nil, [:star, :_rulename_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_rulename_2, nil, [:alt, :ALPHA, :DIGIT, "-"]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:defined_as, nil, [:seq, :_defined_as_1, :_defined_as_2, :_defined_as_3], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_defined_as_1, nil, [:star, :c_wsp]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_defined_as_2, nil, [:alt, "=", "=/"]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_defined_as_3, nil, [:star, :c_wsp]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:c_wsp, nil, [:alt, :WSP, :_c_wsp_1], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_c_wsp_1, nil, [:seq, :c_nl, :WSP]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:c_nl, nil, [:alt, :COMMENT, :CRLF], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:comment, nil, [:seq, ";", :_comment_1, :CRLF], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_comment_1, nil, [:star, :_comment_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_comment_2, nil, [:alt, :WSP, :VCHAR]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:quoted_string, nil, [:seq, :DQUOTE, :_quoted_string_1, :DQUOTE], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_quoted_string_1, nil, [:star, :_quoted_string_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_quoted_string_2, nil, [:range, "#x20-#x21#x23-#x7E"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:bin_val, nil, [:seq, "b", :_bin_val_1, :_bin_val_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_bin_val_1, nil, [:plus, :BIT]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_bin_val_2, nil, [:opt, :_bin_val_3]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_bin_val_3, nil, [:alt, :_bin_val_4, :_bin_val_5]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_bin_val_4, nil, [:plus, :_bin_val_6]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_bin_val_6, nil, [:seq, ".", :_bin_val_7]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_bin_val_7, nil, [:plus, :BIT]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_bin_val_5, nil, [:seq, "-", :_bin_val_8]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_bin_val_8, nil, [:plus, :BIT]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:dec_val, nil, [:seq, "d", :_dec_val_1, :_dec_val_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_dec_val_1, nil, [:plus, :DIGIT]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_dec_val_2, nil, [:opt, :_dec_val_3]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_dec_val_3, nil, [:alt, :_dec_val_4, :_dec_val_5]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_dec_val_4, nil, [:plus, :_dec_val_6]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_dec_val_6, nil, [:seq, ".", :_dec_val_7]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_dec_val_7, nil, [:plus, :DIGIT]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_dec_val_5, nil, [:seq, "-", :_dec_val_8]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_dec_val_8, nil, [:plus, :DIGIT]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:hex_val, nil, [:seq, "x", :_hex_val_1, :_hex_val_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_hex_val_1, nil, [:plus, :HEXDIG]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_hex_val_2, nil, [:opt, :_hex_val_3]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_hex_val_3, nil, [:alt, :_hex_val_4, :_hex_val_5]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_hex_val_4, nil, [:plus, :_hex_val_6]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_hex_val_6, nil, [:seq, ".", :_hex_val_7]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_hex_val_7, nil, [:plus, :HEXDIG]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_hex_val_5, nil, [:seq, "-", :_hex_val_8]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_hex_val_8, nil, [:plus, :HEXDIG]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:prose_val, nil, [:seq, "<", :_prose_val_1, ">"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_prose_val_1, nil, [:star, :_prose_val_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_prose_val_2, nil, [:range, "#x20-#x3D#x3F-#x7E"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:ALPHA, nil, [:range, "#x41-#x5A#x61-#x7A"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:BIT, nil, [:alt, "0", "1"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:CHAR, nil, [:range, "#x01-#x7F"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:CR, nil, [:hex, "#x0D"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:CRLF, nil, [:seq, :_CRLF_1, :LF], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_CRLF_1, nil, [:opt, :CR], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:CTL, nil, [:alt, :_CTL_1, :_CTL_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_CTL_1, nil, [:range, "#x00-#x1F"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_CTL_2, nil, [:hex, "#x7F"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:DIGIT, nil, [:range, "#x30-#x39"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:DQUOTE, nil, [:hex, "#x22"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:HEXDIG, nil, [:alt, :DIGIT, :_HEXDIG_1], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_HEXDIG_1, nil, [:range, "A-F"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:HTAB, nil, [:hex, "#x09"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:LF, nil, [:hex, "#x0A"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:LWSP, nil, [:star, :_LWSP_1], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_LWSP_1, nil, [:alt, :WSP, :_LWSP_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_LWSP_2, nil, [:seq, :CRLF, :WSP], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:OCTET, nil, [:range, "#x00-#xFF"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:SP, nil, [:hex, "#x20"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:VCHAR, nil, [:range, "#x21-#x7E"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:WSP, nil, [:alt, :SP, :HTAB], kind: :terminal).extend(EBNF::PEG::Rule), + ] +end + diff --git a/lib/ebnf/base.rb b/lib/ebnf/base.rb index 5566e5e..f4b5d54 100644 --- a/lib/ebnf/base.rb +++ b/lib/ebnf/base.rb @@ -118,7 +118,7 @@ class Base # # @param [#read, #to_s] input # @param [Symbol] format (:ebnf) - # Format of input, one of :ebnf, or :sxp + # Format of input, one of :abnf, :ebnf, or :sxp # @param [Hash{Symbol => Object}] options # @option options [Boolean, Array] :debug # Output debug information to an array or $stdout. @@ -156,6 +156,9 @@ def initialize(input, format: :ebnf, **options) @ast << rule end end + when :abnf + abnf = ABNF.new(input, **options) + @ast = abnf.ast else raise "unknown input format #{format.inspect}" end @@ -209,16 +212,20 @@ def to_sxp ## # Output formatted EBNF + # + # @param [:abnf, :ebnf] format (:ebnf) # @return [String] - def to_s - Writer.string(*ast) + def to_s(format: :ebnf) + Writer.string(*ast, format: format) end ## # Output formatted EBNF as HTML + # + # @param [:abnf, :ebnf] format (:ebnf) # @return [String] - def to_html - Writer.html(*ast) + def to_html(format: :ebnf) + Writer.html(*ast, format: format) end ## diff --git a/lib/ebnf/writer.rb b/lib/ebnf/writer.rb index ba5aabc..d3b6d03 100644 --- a/lib/ebnf/writer.rb +++ b/lib/ebnf/writer.rb @@ -12,11 +12,12 @@ class Writer # Format rules to a String # # @param [Array] rules + # @param [:abnf, :ebnf] format (:ebnf) # @return [Object] - def self.string(*rules) + def self.string(*rules, format: :ebnf) require 'stringio' unless defined?(StringIO) buf = StringIO.new - write(buf, *rules) + write(buf, *rules, format: format) buf.string end @@ -24,9 +25,10 @@ def self.string(*rules) # Format rules to $stdout # # @param [Array] rules + # @param [:abnf, :ebnf] format (:ebnf) # @return [Object] - def self.print(*rules) - write($stdout, *rules) + def self.print(*rules, format: :ebnf) + write($stdout, *rules, format: format) end ## @@ -34,20 +36,22 @@ def self.print(*rules) # # @param [Object] out # @param [Array] rules + # @param [:abnf, :ebnf] format (:ebnf) # @return [Object] - def self.write(out, *rules) - Writer.new(rules, out: out) + def self.write(out, *rules, format: :ebnf) + Writer.new(rules, out: out, format: format) end ## # Write formatted rules to an IO like object as HTML # # @param [Array] rules + # @param [:abnf, :ebnf] format (:ebnf) # @return [Object] - def self.html(*rules) + def self.html(*rules, format: :ebnf) require 'stringio' unless defined?(StringIO) buf = StringIO.new - Writer.new(rules, out: buf, html: true) + Writer.new(rules, out: buf, html: true, format: format) buf.string end @@ -55,10 +59,12 @@ def self.html(*rules) # @param [Array] rules # @param [Hash{Symbol => Object}] options # @param [#write] out ($stdout) + # @param [:abnf, :ebnf] format (:ebnf) # @option options [Symbol] format # @option options [Boolean] html (false) - def initialize(rules, out: $stdout, html: false, **options) + def initialize(rules, out: $stdout, html: false, format: :ebnf, **options) @options = options.dup + return if rules.empty? # Determine max LHS length max_id = rules.max_by {|r| r.id.to_s.length}.id.to_s.length @@ -75,9 +81,15 @@ def initialize(rules, out: $stdout, html: false, **options) # Output as formatted HTML begin require 'haml' - hout = Haml::Engine.new(HAML_DESC).render(self, rules: rules) do |rule| - formatted_expr = format(rule.expr) - formatted_expr.length > rhs_length ? format(rule.expr, "\n") : formatted_expr + hout = Haml::Engine.new(HAML_DESC).render(self, rules: rules, format: format) do |rule| + case format + when :abnf + formatted_expr = format_abnf(rule.expr) + formatted_expr.length > rhs_length ? format_abnf(rule.expr, sep: "\n") : formatted_expr + when :ebnf + formatted_expr = format_ebnf(rule.expr) + formatted_expr.length > rhs_length ? format_ebnf(rule.expr, sep: "\n") : formatted_expr + end end out.write hout return @@ -93,23 +105,37 @@ def initialize(rules, out: $stdout, html: false, **options) else lhs_fmt % {id: "[#{rule.id}]", sym: rule.sym} end - formatted_expr = format(rule.expr) - if formatted_expr.length > rhs_length - buffer << format(rule.expr, ("\n" + " " * lhs_length)) - else - buffer << formatted_expr + if format == :abnf + formatted_expr = format_abnf(rule.expr) + if formatted_expr.length > rhs_length + buffer << format_abnf(rule.expr, sep: ("\n" + " " * lhs_length)) + else + buffer << formatted_expr + end + elsif format == :ebnf + formatted_expr = format_ebnf(rule.expr) + if formatted_expr.length > rhs_length + buffer << format_ebnf(rule.expr, sep: ("\n" + " " * lhs_length)) + else + buffer << formatted_expr + end end out.puts(buffer) end end protected + + ## + # W3C EBNF Formatters + ## + # Format the expression part of a rule - def format(expr, sep = nil) + def format_ebnf(expr, sep: nil, embedded: false) return (@options[:html] ? %(#{expr}) : expr.to_s) if expr.is_a?(Symbol) if expr.is_a?(String) if expr.length == 1 - return format_char(expr) + return format_ebnf_char(expr) elsif expr =~ /\A#x\h+/ return (@options[:html] ? %(#{expr}) : expr) elsif expr =~ /"/ @@ -129,28 +155,63 @@ def format(expr, sep = nil) rparen = (@options[:html] ? ") " : ")") case expr.first + when :istr + # Looses fidelity, but, oh well ... + format_ebnf(expr.last, embedded: true) when :alt, :diff this_sep = (sep ? sep : " ") + parts[expr.first.to_sym] - expr[1..-1].map {|e| format(e)}.join(this_sep) + res = expr[1..-1].map {|e| format_ebnf(e, embedded: true)}.join(this_sep) + embedded ? (lparen + res + rparen) : res when :star, :plus, :opt raise "Expected star expression to have a single operand" unless expr.length == 2 char = parts[expr.first.to_sym] - r = format(expr[1]) - (r.start_with?("(") || Array(expr[1]).length == 1) ? "#{r}#{char}" : "(#{r})#{char}" + r = format_ebnf(expr[1], embedded: true) + "#{r}#{char}" when :hex (@options[:html] ? %(#{expr.last}) : expr.last) when :range - format_range(expr.last) + format_ebnf_range(expr.last) when :seq this_sep = (sep ? sep : " ") - expr[1..-1].map {|e| r = format(e); Array(e).length > 2 ? "#{lparen}#{r}#{rparen}" : r}.join(this_sep) + res = expr[1..-1].map do |e| + format_ebnf(e, embedded: true) + end.join(this_sep) + embedded ? (lparen + res + rparen) : res + when :rept + # Expand repetition + min, max, value = expr[1..-1] + if min == 0 && max == 1 + format_ebnf([:opt, value], sep: sep, embedded: embedded) + elsif min == 0 && max == '*' + format_ebnf([:star, value], sep: sep, embedded: embedded) + elsif min == 1 && max == '*' + format_ebnf([:plus, value], sep: sep, embedded: embedded) + else + val2 = [:seq] + while min > 0 + val2 << value + min -= 1 + max -= 1 unless max == '*' + end + if max == '*' + val2 << [:star, value] + else + opt = nil + while max > 0 + opt = [:opt, opt ? [:seq, value, opt] : value] + max -= 1 + end + val2 << opt if opt + end + format_ebnf(val2, sep: sep, embedded: embedded) + end else raise "Unknown operator: #{expr.first}" end end # Format a single-character string, prefering hex for non-main ASCII - def format_char(c) + def format_ebnf_char(c) case c.ord when 0x22 then (@options[:html] ? %('"') : %{'"'}) when (0x23..0x7e) then (@options[:html] ? %("#{c}") : %{"#{c}"}) @@ -159,7 +220,7 @@ def format_char(c) end # Format a range - def format_range(string) + def format_ebnf_range(string) lbrac = (@options[:html] ? "[ " : "[") rbrac = (@options[:html] ? "] " : "]") dash = (@options[:html] ? "- " : "-") @@ -209,13 +270,17 @@ def escape_hex(u) - rules.each do |rule| %tr{id: "grammar-production-#{rule.sym}"} - if rule.pass? - %td{colspan: 3} + %td{colspan: (rule.id ? 3 : 2)} %code<="@pass" - else - %td<= "[#{rule.id}]" + - if rule.id + %td<= "[#{rule.id}]" %td< %code<= rule.sym - %td<= "::=" + - if format == :ebnf + %td<= "::=" + - else + %td<= "=" %td != yield rule ).gsub(/^ /, '') diff --git a/spec/writer_spec.rb b/spec/writer_spec.rb index 9479d6e..cc18919 100644 --- a/spec/writer_spec.rb +++ b/spec/writer_spec.rb @@ -3,9 +3,60 @@ require 'spec_helper' require 'ebnf' require 'sxp' +require 'nokogiri' describe EBNF::Writer do + RSpec::Matchers.define :have_xpath do |path, value| + match do |actual| + doc = Nokogiri::HTML.parse(actual) + return false unless doc + @result = doc.at_xpath(path.to_s) rescue false + case value + when false + @result.nil? + when true + !@result.nil? + when Array + @result.to_s.split(" ").include?(*value) + when Regexp + @result.to_s =~ value + else + @result.to_s == value + end + end + + failure_message do |actual| + msg = "expected that #{path.inspect}\nwould be: #{value.inspect}" + msg += "\n was: #{@result}" + msg += "\nsource:" + actual + msg + end + + failure_message_when_negated do |actual| + msg = "expected that #{path.inspect}\nwould not be #{value.inspect}" + msg += "\nsource:" + actual + msg + end + end + describe "#initialize" do + { + prolog: [ + %{[2] Prolog ::= BaseDecl? PrefixDecl*}, + %{[2] Prolog ::= BaseDecl? PrefixDecl*\n} + ], + }.each do |title, (grammar, plain)| + context title do + subject {EBNF::Base.new(grammar).ast} + + it "generates plain" do + expect {EBNF::Writer.new(subject)}.to write(plain).to(:output) + end + end + end + end + + describe ".string" do { prolog: [ %{[2] Prolog ::= BaseDecl? PrefixDecl*}, @@ -39,6 +90,102 @@ end end + describe ".html" do + { + prolog: [ + %{[2] Prolog ::= BaseDecl? PrefixDecl*}, + { + '//table/@class': "grammar", + '//table/tbody/@id': "grammar-productions", + '//tbody/tr/@id': "grammar-production-Prolog", + '//tbody/tr/td[1]/text()': "[2]", + '//tbody/tr/td[2]/code/text()': "Prolog", + '//tbody/tr/td[3]/text()': "::=", + '//tbody/tr/td[4]/text()': /BaseDecl\? PrefixDecl\*/, + } + ], + }.each do |title, (grammar, xpaths)| + context title do + subject {EBNF::Writer.html(*EBNF::Base.new(grammar).ast)} + + xpaths.each do |path, value| + specify {is_expected.to have_xpath(path, value)} + end + end + end + end + + describe "#format_ebnf" do + subject {EBNF::Writer.new([])} + + { + "alt": [ + [:alt, :A, :B], + "A | B" + ], + "hex": [ + [:hex, "#x20"], + "#x20" + ], + "istr": [ + [:istr, "foo"], + %("foo") + ], + "opt": [ + [:opt, :A], + "A?" + ], + "plus": [ + [:plus, :A], + "A+" + ], + "range": [ + [:range, "a-z"], + "[a-z]" + ], + "rept 0 1": [ + [:rept, 0, 1, :A], + "A?" + ], + "rept 0 *": [ + [:rept, 0, '*', :A], + "A*" + ], + "rept 1 1": [ + [:rept, 1, 1, :A], + "A" + ], + "rept 1 *": [ + [:rept, 1, '*', :A], + "A+" + ], + "rept 1 2": [ + [:rept, 1, 2, :A], + "A A?" + ], + "rept 1 3": [ + [:rept, 1, 3, :A], + "A (A A?)?" + ], + "rept 1 3 (A B)": [ + [:rept, 1, 3, [:seq, :A, :B]], + "(A B) ((A B) (A B)?)?" + ], + "rept 1 3 (A | B)": [ + [:rept, 1, 3, [:alt, :A, :B]], + "(A | B) ((A | B) (A | B)?)?" + ], + "star": [ + [:star, :A], + "A*" + ], + }.each do |title, (expr, result)| + it title do + expect(subject.send(:format_ebnf, expr)).to eql result + end + end + end + context "Existing grammars" do { "EBNF Grammar" => File.expand_path("../../etc/ebnf.ebnf", __FILE__), From 10a123fecb4ae69f5e286e56c0ab5f2f46652a96 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Wed, 1 Jul 2020 14:04:44 -0700 Subject: [PATCH 17/50] Exclude a rule spec for JRuby. --- spec/rule_spec.rb | 9 ++++++++- spec/spec_helper.rb | 14 ++++++++++---- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/spec/rule_spec.rb b/spec/rule_spec.rb index f17bd1a..affceaa 100644 --- a/spec/rule_spec.rb +++ b/spec/rule_spec.rb @@ -498,7 +498,6 @@ describe "#to_regexp" do { hex: ["#x20", / /], - istr: ["foo", /foo/ui], range: ["a-b", /[a-b]/], }.each do |title, (exp, regexp)| it title do @@ -506,6 +505,14 @@ end end + { + istr: ["foo", /foo/ui], + }.each do |title, (exp, regexp)| + it title, ruby: "!jruby" do + expect(EBNF::Rule.new(title, nil, [title, exp]).to_regexp).to eql regexp + end + end + it "raises an error for other operation" do expect {EBNF::Rule.new(:seq, nil, [:seq, :a]).to_regexp}.to raise_error(/Can't turn/) end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index bcad127..10837a5 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -24,10 +24,16 @@ ::RSpec.configure do |c| c.filter_run focus: true c.run_all_when_everything_filtered = true - c.exclusion_filter = { - ruby: lambda { |version| !(RUBY_VERSION.to_s =~ /^#{version.to_s}/) }, - not_jruby: lambda { RUBY_PLATFORM.to_s != 'jruby'} - } + c.filter_run_excluding ruby: ->(version) do + case version.to_s + when "!jruby" + RUBY_ENGINE == "jruby" + when /^> (.*)/ + !(RUBY_VERSION.to_s > $1) + else + !(RUBY_VERSION.to_s =~ /^#{version.to_s}/) + end + end end require 'ebnf' From 94fcbe24e68d58b29da363de6780297d67baa612 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Fri, 3 Jul 2020 12:52:06 -0700 Subject: [PATCH 18/50] Writer formatting whitespace when crossing lines. --- lib/ebnf/writer.rb | 151 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 138 insertions(+), 13 deletions(-) diff --git a/lib/ebnf/writer.rb b/lib/ebnf/writer.rb index d3b6d03..b4d0d20 100644 --- a/lib/ebnf/writer.rb +++ b/lib/ebnf/writer.rb @@ -69,9 +69,9 @@ def initialize(rules, out: $stdout, html: false, format: :ebnf, **options) # Determine max LHS length max_id = rules.max_by {|r| r.id.to_s.length}.id.to_s.length max_sym = rules.max_by {|r| r.sym.to_s.length}.sym.to_s.length - lhs_length = max_sym + 3 - lhs_fmt = "%-#{max_sym}s ::= " - if max_id > 0 + lhs_length = max_sym + 1 + lhs_fmt = "%-#{max_sym}s #{format == :ebnf ? '::=' : '='} " + if format == :ebnf && max_id > 0 lhs_fmt = "%-#{max_id+2}s " + lhs_fmt lhs_length += max_id + 3 end @@ -108,14 +108,16 @@ def initialize(rules, out: $stdout, html: false, format: :ebnf, **options) if format == :abnf formatted_expr = format_abnf(rule.expr) if formatted_expr.length > rhs_length - buffer << format_abnf(rule.expr, sep: ("\n" + " " * lhs_length)) + # Space out past "= " + buffer << format_abnf(rule.expr, sep: ("\n" + " " * (lhs_length + 2))) else + # Space out past "::= " buffer << formatted_expr end elsif format == :ebnf formatted_expr = format_ebnf(rule.expr) if formatted_expr.length > rhs_length - buffer << format_ebnf(rule.expr, sep: ("\n" + " " * lhs_length)) + buffer << format_ebnf(rule.expr, sep: ("\n" + " " * (lhs_length + 4))) else buffer << formatted_expr end @@ -139,9 +141,9 @@ def format_ebnf(expr, sep: nil, embedded: false) elsif expr =~ /\A#x\h+/ return (@options[:html] ? %(#{expr}) : expr) elsif expr =~ /"/ - return (@options[:html] ? %('#{escape(expr, "'")}') : %('#{escape(expr, "'")}')) + return (@options[:html] ? %('#{escape_ebnf(expr, "'")}') : %('#{escape_ebnf(expr, "'")}')) else - return (@options[:html] ? %("#{escape(expr, '"')}") : %("#{escape(expr, '"')}")) + return (@options[:html] ? %("#{escape_ebnf(expr, '"')}") : %("#{escape_ebnf(expr, '"')}")) end end parts = { @@ -215,7 +217,7 @@ def format_ebnf_char(c) case c.ord when 0x22 then (@options[:html] ? %('"') : %{'"'}) when (0x23..0x7e) then (@options[:html] ? %("#{c}") : %{"#{c}"}) - else (@options[:html] ? %(#{escape_hex(c)}) : escape_hex(c)) + else (@options[:html] ? %(#{escape_ebnf_hex(c)}) : escape_ebnf_hex(c)) end end @@ -236,14 +238,14 @@ def format_ebnf_range(string) when s.scan(/\A-/) buffer << dash else - buffer << (@options[:html] ? %(#{escape_hex(s.getch)}) : escape_hex(s.getch)) + buffer << (@options[:html] ? %(#{escape_ebnf_hex(s.getch)}) : escape_ebnf_hex(s.getch)) end end buffer + rbrac end # Escape a string, using as many UTF-8 characters as possible - def escape(string, quote = '"') + def escape_ebnf(string, quote = '"') buffer = "" string.each_char do |c| buffer << case (u = c.ord) @@ -255,7 +257,7 @@ def escape(string, quote = '"') buffer end - def escape_hex(u) + def escape_ebnf_hex(u) fmt = case u.ord when 0x0000..0x00ff then "#x%02X" when 0x0100..0xffff then "#x%04X" @@ -264,16 +266,139 @@ def escape_hex(u) sprintf(fmt, u.ord) end + ## + # ABNF Formatters + ## + + # Format the expression part of a rule + def format_abnf(expr, sep: nil, embedded: false, sensitive: true) + return (@options[:html] ? %(#{expr}) : expr.to_s) if expr.is_a?(Symbol) + if expr.is_a?(String) + if expr.length == 1 + return format_abnf_char(expr) + elsif expr =~ /"/ + # Split into segments + segments = expr.split('"') + + return format_abnf_char(expr) if segments.empty? + + seq = segments.inject([]) {|memo, s| memo.concat([[:hex, "#x22"], s])}[1..-1] + seq.unshift(:seq) + return format_abnf(seq, sep: nil, embedded: false) + else + return (@options[:html] ? %("#{'%s' if sensitive}#{expr}") : %(#{'%s' if sensitive}"#{expr}")) + end + end + parts = { + alt: (@options[:html] ? "| " : "| "), + star: (@options[:html] ? "* " : "*"), + plus: (@options[:html] ? "+ " : "1*"), + opt: (@options[:html] ? "? " : "?") + } + lbrac = (@options[:html] ? "[ " : "[") + rbrac = (@options[:html] ? "] " : "]") + lparen = (@options[:html] ? "( " : "(") + rparen = (@options[:html] ? ") " : ")") + + case expr.first + when :istr + # FIXME: if string part is segmented, need to do something different + format_abnf(expr.last, embedded: true, sensitive: false) + when :alt + this_sep = (sep ? sep : " ") + parts[expr.first.to_sym] + res = expr[1..-1].map {|e| format_abnf(e, embedded: true)}.join(this_sep) + embedded ? (lparen + res + rparen) : res + when :diff + raise "ABNF does not support the diff operator" + when :opt + char = parts[expr.first.to_sym] + r = format_abnf(expr[1], embedded: true) + "#{lbrac}#{r}#{rbrac}" + when :plus, :star + char = parts[expr.first.to_sym] + r = format_abnf(expr[1], embedded: true) + "#{char}#{r}" + when :hex + hex = expr.last.sub('#', '%') + (@options[:html] ? %(#{hex}) : hex) + when :range + format_abnf_range(expr.last) + when :seq + this_sep = (sep ? sep : " ") + res = expr[1..-1].map do |e| + format_abnf(e, embedded: true) + end.join(this_sep) + embedded ? (lparen + res + rparen) : res + when :rept + # Expand repetition + min, max, value = expr[1..-1] + r = format_abnf(value, embedded: true) + if min == max + "#{min}#{r}" + elsif min == 0 && max == '*' + "#{parts[:star]}#{r}" + elsif min > 0 && max == '*' + "#{min}#{parts[:star]}#{r}" + else + "#{min}#{parts[:star]}#{max}#{r}" + end + else + raise "Unknown operator: #{expr.first}" + end + end + + # Format a single-character string, prefering hex for non-main ASCII + def format_abnf_char(c) + (@options[:html] ? %(#{escape_abnf_hex(c)}) : escape_abnf_hex(c)) + end + + # Format a range + def format_abnf_range(string) + #require 'byebug'; byebug + if string.include?('-') + # Might include multiple ranges + # #x01-#x03#x05-#x06 + # a-bc-d + dash = (@options[:html] ? "- " : "-") + # Split into separate range segments + if string.start_with?('#x') + ranges = [] + scanner = StringScanner.new(string) + while !scanner.eos? + ranges << scanner.scan(/#x\h+-#x\h+/) + end + ranges.map {|range|"%x" + range.gsub('#x', '')}.join(" / ") + else + '%d' + string.gsub(/[^-]/) {|c| c.ord} + end + else + if string.start_with?('#x') + "%x" + string.split('#x').join('.') + else + "%d" + string.chars.map(&:ord).join(".") + end + end + end + + def escape_abnf_hex(u) + fmt = case u.ord + when 0x0000..0x00ff then "%02X" + when 0x0100..0xffff then "%04X" + else "%08X" + end + "%x" + (fmt % u.ord) + end + HAML_DESC = %q( %table.grammar %tbody#grammar-productions - rules.each do |rule| %tr{id: "grammar-production-#{rule.sym}"} - if rule.pass? - %td{colspan: (rule.id ? 3 : 2)} + %td{colspan: (format == :ebnf && rule.id ? 3 : 2)} %code<="@pass" - else - - if rule.id + - if format == :ebnf && rule.id %td<= "[#{rule.id}]" %td< %code<= rule.sym From 58eaa37686462f47fd0504abe87d67b857cc1c07 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Fri, 3 Jul 2020 12:52:43 -0700 Subject: [PATCH 19/50] Fix EBNF `PASS` terminal for actual whitespace characters. --- etc/ebnf.ebnf | 7 +++---- etc/ebnf.html | 14 +++++++------- etc/ebnf.ll1.sxp | 4 ++-- etc/ebnf.peg.rb | 4 ++-- etc/ebnf.peg.sxp | 4 ++-- etc/ebnf.sxp | 4 ++-- spec/rule_spec.rb | 2 +- 7 files changed, 19 insertions(+), 20 deletions(-) diff --git a/etc/ebnf.ebnf b/etc/ebnf.ebnf index a76a55a..0c9b54e 100644 --- a/etc/ebnf.ebnf +++ b/etc/ebnf.ebnf @@ -55,14 +55,13 @@ [21] R_CHAR ::= CHAR - ']' - # Should be able to do this inline, but not until terminal regular expressions are created automatically [22] POSTFIX ::= [?*+] - [23] PASS ::= ( [#x00-#x20] - | ( ('#' - '#x') | '//' ) [^#x0A#x0Dx]* + # Ignore all whitespace and comments between non-terminals + [23] PASS ::= ( [#x9#xA#xD#x20] + | ( ('#' - '#x') | '//' ) [^#xA#xD]* | '/*' (( '*' [^/] )? | [^*] )* '*/' | '(*' (( '*' [^)] )? | [^*] )* '*)' )+ - # Should be able to do this inline, but not until terminal regular expressions are created automatically @pass PASS diff --git a/etc/ebnf.html b/etc/ebnf.html index 0946fc6..f2b44da 100644 --- a/etc/ebnf.html +++ b/etc/ebnf.html @@ -77,7 +77,7 @@ | O_RANGE | STRING1 | STRING2 -| "(" expression ")" +| ("(" expression ")") @@ -117,7 +117,7 @@ ENUM ::= -"[" R_CHAR+ | HEX+ "]" - LHS +(("[" R_CHAR+) | (HEX+ "]")) - LHS @@ -125,7 +125,7 @@ O_ENUM ::= -"[^" R_CHAR+ | HEX+ "]" +("[^" R_CHAR+) | (HEX+ "]") @@ -133,7 +133,7 @@ RANGE ::= -"[" (R_CHAR "-" R_CHAR | HEX "-" HEX)+ "]" +"[" ((R_CHAR "-" R_CHAR) | (HEX "-" HEX))+ "]" @@ -141,7 +141,7 @@ O_RANGE ::= -"[^" (R_CHAR "-" R_CHAR | HEX "-" HEX)+ "]" +"[^" ((R_CHAR "-" R_CHAR) | (HEX "-" HEX))+ "]" @@ -192,11 +192,11 @@ PASS ::= -([#x00-#x20] | ("#" - "#x" | "//") ([^#x0A#x0Dx])* | "/*" ("*" [^/])? | [^*]* "*/" | "(*" ("*" [^)])? | [^*]* "*)")+ +([#x9#xA#xD#x20] | ((("#" - "#x") | "//") [^#xA#xD]*) | ("/*" (("*" [^/])? | [^*])* "*/") | ("(*" (("*" [^)])? | [^*])* "*)"))+ - + @pass diff --git a/etc/ebnf.ll1.sxp b/etc/ebnf.ll1.sxp index 65408e7..9395c05 100644 --- a/etc/ebnf.ll1.sxp +++ b/etc/ebnf.ll1.sxp @@ -174,7 +174,7 @@ (terminal PASS "23" (plus (alt - (range "#x00-#x20") - (seq (alt (diff "#" "#x") "//") (star (range "^#x0A#x0Dx"))) + (range "#x9#xA#xD#x20") + (seq (alt (diff "#" "#x") "//") (star (range "^#xA#xD"))) (seq "/*" (star (alt (opt (seq "*" (range "^/"))) (range "^*"))) "*/") (seq "(*" (star (alt (opt (seq "*" (range "^)"))) (range "^*"))) "*)")) )) ) diff --git a/etc/ebnf.peg.rb b/etc/ebnf.peg.rb index 575975c..7fc782a 100644 --- a/etc/ebnf.peg.rb +++ b/etc/ebnf.peg.rb @@ -71,12 +71,12 @@ module Meta EBNF::Rule.new(:POSTFIX, "22", [:range, "?*+"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:PASS, "23", [:plus, :_PASS_1], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_PASS_1, "23.1", [:alt, :_PASS_2, :_PASS_3, :_PASS_4, :_PASS_5], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_2, "23.2", [:range, "#x00-#x20"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_2, "23.2", [:range, "#x9#xA#xD#x20"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_PASS_3, "23.3", [:seq, :_PASS_6, :_PASS_7], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_PASS_6, "23.6", [:alt, :_PASS_8, "//"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_PASS_8, "23.8", [:diff, "#", "#x"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_PASS_7, "23.7", [:star, :_PASS_9], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_9, "23.9", [:range, "^#x0A#x0Dx"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_9, "23.9", [:range, "^#xA#xD"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_PASS_4, "23.4", [:seq, "/*", :_PASS_10, "*/"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_PASS_10, "23.10", [:star, :_PASS_11], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_PASS_11, "23.11", [:alt, :_PASS_12, :_PASS_13], kind: :terminal).extend(EBNF::PEG::Rule), diff --git a/etc/ebnf.peg.sxp b/etc/ebnf.peg.sxp index df6697b..8637a8e 100644 --- a/etc/ebnf.peg.sxp +++ b/etc/ebnf.peg.sxp @@ -80,7 +80,7 @@ (terminal _PASS_17 "23.17" (alt _PASS_18 _PASS_19)) (terminal _PASS_18 "23.18" (opt _PASS_20)) (terminal _PASS_19 "23.19" (range "^*")) - (terminal _PASS_2 "23.2" (range "#x00-#x20")) + (terminal _PASS_2 "23.2" (range "#x9#xA#xD#x20")) (terminal _PASS_20 "23.20" (seq "*" _PASS_21)) (terminal _PASS_21 "23.21" (range "^)")) (terminal _PASS_3 "23.3" (seq _PASS_6 _PASS_7)) @@ -89,4 +89,4 @@ (terminal _PASS_6 "23.6" (alt _PASS_8 "//")) (terminal _PASS_7 "23.7" (star _PASS_9)) (terminal _PASS_8 "23.8" (diff "#" "#x")) - (terminal _PASS_9 "23.9" (range "^#x0A#x0Dx"))) + (terminal _PASS_9 "23.9" (range "^#xA#xD"))) diff --git a/etc/ebnf.sxp b/etc/ebnf.sxp index 1d4d831..7841756 100644 --- a/etc/ebnf.sxp +++ b/etc/ebnf.sxp @@ -32,7 +32,7 @@ (terminal PASS "23" (plus (alt - (range "#x00-#x20") - (seq (alt (diff "#" "#x") "//") (star (range "^#x0A#x0Dx"))) + (range "#x9#xA#xD#x20") + (seq (alt (diff "#" "#x") "//") (star (range "^#xA#xD"))) (seq "/*" (star (alt (opt (seq "*" (range "^/"))) (range "^*"))) "*/") (seq "(*" (star (alt (opt (seq "*" (range "^)"))) (range "^*"))) "*)")) )) ) diff --git a/spec/rule_spec.rb b/spec/rule_spec.rb index affceaa..9ee047c 100644 --- a/spec/rule_spec.rb +++ b/spec/rule_spec.rb @@ -921,7 +921,7 @@ CHAR: ["#x9#xA#xD", "#x20-#xD7FF", "#xE000-#xFFFD", "#x10000-#x10FFFF"], R_CHAR: [:CHAR, "]"], POSTFIX: ["?*+"], - PASS: ["#x00-#x20", "#", "#x", "//", "/*", "(*"] + PASS: ["#x9#xA#xD#x20", "#", "#x", "//", "/*", "(*"] }.each do |sym, expected| it "#{sym} => #{expected.inspect}" do res = subject.ast.find {|r| r.sym == sym} From 9375cdf00877e526c0d760ea406fdc14d3060ecc Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Fri, 3 Jul 2020 12:55:37 -0700 Subject: [PATCH 20/50] Add comments to iso-ebnf grammar, add `comment` production, and include in `@pass`. --- examples/isoebnf/examples/iso-ebnf.isoebnf | 64 +++++++++++----- examples/isoebnf/iso-ebnf.ebnf | 56 ++++++++++---- examples/isoebnf/iso-ebnf.peg.sxp | 86 +++++++++++----------- examples/isoebnf/iso-ebnf.sxp | 16 ++-- examples/isoebnf/meta.rb | 50 +++++++------ 5 files changed, 166 insertions(+), 106 deletions(-) diff --git a/examples/isoebnf/examples/iso-ebnf.isoebnf b/examples/isoebnf/examples/iso-ebnf.isoebnf index 7ae2aa3..90084f1 100644 --- a/examples/isoebnf/examples/iso-ebnf.isoebnf +++ b/examples/isoebnf/examples/iso-ebnf.isoebnf @@ -1,19 +1,29 @@ (* W3C EBNF for ISO/IEC 14977 : 1996 EBNF *) (* Scoured from https://www.cl.cam.ac.uk/~mgk25/iso-14977.pdf *) -syntax = {syntax_rule} ; +syntax = syntax_rule, {syntax_rule} ; -syntax_rule = meta_identifier, defining_symbol, definitions_list, terminator_symbol ; +syntax_rule = meta_identifier, defining_symbol, definitions_list, terminator_symbol + (* A defines the sequences of + symbols represented by a *); -definitions_list = single_definition, {definition_separator_symbol, definitions_list} ; +definitions_list = single_definition, {definition_separator_symbol, definitions_list} + (* | separates alternative *); -single_definition = term, {',', term} ; +single_definition = term, {',', term} + (* , separates successive *); -term = factor, ['-', exception] ; +term = factor, ['-', exception] + (* A represents any sequence of symbols that is defined by the but + not defined by the *); -exception = factor ; +exception = factor + (* A may be used as an + if it could be replaced by a + containingno *); -factor = [integer, '*'], primary ; +factor = [integer, '*'], primary + (* The specifies the number of repetitions of the *); primary = optional_sequence | repeated_sequence @@ -24,11 +34,35 @@ primary = optional_sequence | empty ; -optional_sequence = start_option_symbol, definitions_list, end_option_symbol ; +optional_sequence = start_option_symbol, definitions_list, end_option_symbol + (* The brackets [ and ] enclose symbols which are optional *); -repeated_sequence = start_repeat_symbol, definitions_list, end_repeat_symbol ; +repeated_sequence = start_repeat_symbol, definitions_list, end_repeat_symbol + (* The brackets { and } enclose symbols + which may be repeated any number of times *); -grouped_sequence = '(', definitions_list, ')' ; +grouped_sequence = '(', definitions_list, ')' + (* The brackets ( and ) allow any to be a *); + +terminal_string = ("'", first_terminal_character, {first_terminal_character}, "'") + | ('"', second_terminal_character, {second_terminal_character}, '"') + (* A represents the + between the quote symbols ’_’ or "_" *); + +meta_identifier = letter, {meta_identifier_character} + (* A is the name of a syntactic element of the language being defined *); + +integer = decimal_digit, {decimal_digit} ; + +special_sequence = '?', {special_sequence_character}, '?' + (* The meaning of a is not defined in the standard metalanguage. *); + +comment = ’(*’, {comment_symbol}, ’*)’ + (* A comment is allowed anywhere outside a + , , + or *); + +comment_symbol = comment | terminal_string | special_sequence | character; letter = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" | "N" @@ -42,23 +76,13 @@ letter = "A" | "B" | "C" | "D" | "E" | "F" | "G" decimal_digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; -integer = decimal_digit, {decimal_digit} ; - -meta_identifier = letter, {meta_identifier_character} ; - (* Extended to allow '_' *) meta_identifier_character = letter | decimal_digit | '_' ; - -terminal_string = ("'", first_terminal_character, {first_terminal_character}, "'") - | ('"', second_terminal_character, {second_terminal_character}, '"') - ; first_terminal_character = terminal_character - "'" ; second_terminal_character = terminal_character - '"' ; -special_sequence = '?', {special_sequence_character}, '?' ; - special_sequence_character = terminal_character - '?' ; terminal_character = letter diff --git a/examples/isoebnf/iso-ebnf.ebnf b/examples/isoebnf/iso-ebnf.ebnf index 78157b8..05d6481 100644 --- a/examples/isoebnf/iso-ebnf.ebnf +++ b/examples/isoebnf/iso-ebnf.ebnf @@ -1,19 +1,30 @@ # W3C EBNF for ISO/IEC 14977 : 1996 EBNF # (Scoured from https://www.cl.cam.ac.uk/~mgk25/iso-14977.pdf) +# Extended to allow no syntax_rule to be valid. syntax ::= syntax_rule* - + syntax_rule ::= meta_identifier defining_symbol definitions_list terminator_symbol - + (* A defines the sequences of + symbols represented by a *) + definitions_list ::= single_definition (definition_separator_symbol definitions_list)* - + (* | separates alternative *) + single_definition ::= term (',' term)* - + (* , separates successive *) + term ::= factor ('-' exception)? - + (* A represents any sequence of symbols that is defined by the but + not defined by the *) + exception ::= factor - + (* A may be used as an + if it could be replaced by a + containingno *) + factor ::= (integer '*')? primary + (* The specifies the number of repetitions of the *) primary ::= optional_sequence | repeated_sequence @@ -24,35 +35,50 @@ primary ::= optional_sequence | empty optional_sequence ::= start_option_symbol definitions_list end_option_symbol + (* The brackets [ and ] enclose symbols which are optional *) repeated_sequence ::= start_repeat_symbol definitions_list end_repeat_symbol + (* The brackets { and } enclose symbols + which may be repeated any number of times *) grouped_sequence ::= '(' definitions_list ')' + (* The brackets ( and ) allow any to be a *) # Note, the following are nominally terminal rules, # although ISO EBNF does not really distinguish between non-terminal and terminal rules. @terminals -letter ::= [a-zA-Z] -decimal_digit ::= [0-9] +terminal_string ::= ("'" first_terminal_character+ "'") + | ('"' second_terminal_character+ '"') + (* A represents the + between the quote symbols '_' or "_" *) + +meta_identifier ::= letter meta_identifier_character* + (* A is the name of a syntactic element of the language being defined *) integer ::= decimal_digit+ -meta_identifier ::= letter meta_identifier_character* +special_sequence ::= '?' special_sequence_character* '?' + (* The meaning of a is not defined in the standard metalanguage. *) + +comment ::= start_comment_symbol comment_symbol* end_comment_symbol + (* A comment is allowed anywhere outside a + , , + or *) + +comment_symbol ::= comment | terminal_string | special_sequence | character + +letter ::= [a-zA-Z] +decimal_digit ::= [0-9] # Extended to allow '_' meta_identifier_character ::= letter | decimal_digit | '_' - -terminal_string ::= ("'" first_terminal_character+ "'") - | ('"' second_terminal_character+ '"') first_terminal_character ::= terminal_character - "'" second_terminal_character ::= terminal_character - '"' -special_sequence ::= '?' special_sequence_character* '?' - special_sequence_character ::= terminal_character - '?' terminal_character ::= letter @@ -80,7 +106,7 @@ other_character ::= [:+_%@&$<>^` ̃#x20#x23] | '\' gap_separator ::= [#x9#xa#xb#xc#xd#x20] -@pass gap_separator+ +@pass gap_separator+ | comment empty ::= '' diff --git a/examples/isoebnf/iso-ebnf.peg.sxp b/examples/isoebnf/iso-ebnf.peg.sxp index 3900ccc..691c9ba 100644 --- a/examples/isoebnf/iso-ebnf.peg.sxp +++ b/examples/isoebnf/iso-ebnf.peg.sxp @@ -3,28 +3,18 @@ (rule syntax_rule (seq meta_identifier defining_symbol definitions_list terminator_symbol)) (rule definitions_list (seq single_definition _definitions_list_1)) - (terminal defining_symbol (alt "=" ":")) - (terminal definition_separator_symbol (alt "|" "/" "!")) - (terminal terminator_symbol (alt ";" ".")) - (terminal start_option_symbol (alt "[" "(/")) - (terminal end_option_symbol (alt "]" "/)")) - (terminal start_repeat_symbol (alt "{" "(:")) - (terminal end_repeat_symbol (alt "}" ":)")) - (terminal gap_free_symbol (alt _gap_free_symbol_1 terminal_string)) - (terminal repetition_symbol (seq "*")) - (terminal except_symbol (seq "-")) - (terminal concatenate_symbol (seq ",")) - (terminal first_quote_symbol (seq "'")) - (terminal second_quote_symbol (seq "\"")) - (terminal start_comment_symbol (seq "(*")) - (terminal end_comment_symbol (seq "*)")) - (terminal start_group_symbol (seq "(")) - (terminal end_group_symbol (seq ")")) - (rule syntax (star syntax_rule)) + (rule _definitions_list_1 (star _definitions_list_2)) + (rule _definitions_list_2 (seq definition_separator_symbol definitions_list)) (rule single_definition (seq term _single_definition_1)) + (rule _single_definition_1 (star _single_definition_2)) + (rule _single_definition_2 (seq "," term)) (rule term (seq factor _term_1)) + (rule _term_1 (opt _term_2)) + (rule _term_2 (seq "-" exception)) (rule exception (seq factor)) (rule factor (seq _factor_1 primary)) + (rule _factor_1 (opt _factor_2)) + (rule _factor_2 (seq integer "*")) (rule primary (alt optional_sequence repeated_sequence special_sequence grouped_sequence meta_identifier terminal_string empty )) @@ -33,15 +23,24 @@ (rule repeated_sequence (seq start_repeat_symbol definitions_list end_repeat_symbol)) (rule grouped_sequence (seq "(" definitions_list ")")) + (terminal terminal_string (alt _terminal_string_1 _terminal_string_2)) + (rule _terminal_string_1 (seq "'" _terminal_string_3 "'")) + (rule _terminal_string_3 (plus first_terminal_character)) + (rule _terminal_string_2 (seq "\"" _terminal_string_4 "\"")) + (rule _terminal_string_4 (plus second_terminal_character)) + (terminal meta_identifier (seq letter _meta_identifier_1)) + (rule _meta_identifier_1 (star meta_identifier_character)) + (terminal integer (plus decimal_digit)) + (terminal special_sequence (seq "?" _special_sequence_1 "?")) + (rule _special_sequence_1 (star special_sequence_character)) + (terminal comment (seq start_comment_symbol _comment_1 end_comment_symbol)) + (rule _comment_1 (star comment_symbol)) + (terminal comment_symbol (alt comment terminal_string special_sequence character)) (terminal letter (range "a-zA-Z")) (terminal decimal_digit (range "0-9")) - (terminal integer (plus decimal_digit)) - (terminal meta_identifier (seq letter _meta_identifier_1)) (terminal meta_identifier_character (alt letter decimal_digit "_")) - (terminal terminal_string (alt _terminal_string_1 _terminal_string_2)) (terminal first_terminal_character (diff terminal_character "'")) (terminal second_terminal_character (diff terminal_character "\"")) - (terminal special_sequence (seq "?" _special_sequence_1 "?")) (terminal special_sequence_character (diff terminal_character "?")) (terminal terminal_character (alt letter decimal_digit concatenate_symbol defining_symbol @@ -51,24 +50,29 @@ start_comment_symbol start_group_symbol start_option_symbol start_repeat_symbol terminator_symbol other_character )) (terminal other_character (alt _other_character_1 "\\")) + (terminal _other_character_1 (range ":+_%@&$<>^` ̃#x20#x23")) (terminal gap_separator (range "#x9#xa#xb#xc#xd#x20")) - (pass _pass (plus gap_separator)) + (pass _pass (alt __pass_1 comment)) + (rule __pass_1 (plus gap_separator)) (terminal empty (seq ())) - (rule _definitions_list_1 ".1" (star _definitions_list_2)) - (rule _gap_free_symbol_1 ".1" (seq _gap_free_symbol_3 terminal_character)) - (rule _single_definition_1 ".1" (star _single_definition_2)) - (terminal _other_character_1 ".1" (range ":+_%@&$<>^` ̃#x20#x23")) - (rule _term_1 ".1" (opt _term_2)) - (rule _meta_identifier_1 ".1" (star meta_identifier_character)) - (rule _special_sequence_1 ".1" (star special_sequence_character)) - (rule _factor_1 ".1" (opt _factor_2)) - (rule _terminal_string_1 ".1" (seq "'" _terminal_string_3 "'")) - (rule _terminal_string_2 ".2" (seq "\"" _terminal_string_4 "\"")) - (rule _definitions_list_2 ".2" (seq definition_separator_symbol definitions_list)) - (rule _single_definition_2 ".2" (seq "," term)) - (terminal _gap_free_symbol_2 ".2" (range "'\"")) - (rule _term_2 ".2" (seq "-" exception)) - (rule _factor_2 ".2" (seq integer "*")) - (rule _gap_free_symbol_3 ".3" (not _gap_free_symbol_2)) - (rule _terminal_string_3 ".3" (plus first_terminal_character)) - (rule _terminal_string_4 ".4" (plus second_terminal_character))) + (terminal defining_symbol (alt "=" ":")) + (terminal definition_separator_symbol (alt "|" "/" "!")) + (terminal terminator_symbol (alt ";" ".")) + (terminal start_option_symbol (alt "[" "(/")) + (terminal end_option_symbol (alt "]" "/)")) + (terminal start_repeat_symbol (alt "{" "(:")) + (terminal end_repeat_symbol (alt "}" ":)")) + (terminal gap_free_symbol (alt _gap_free_symbol_1 terminal_string)) + (rule _gap_free_symbol_1 (seq _gap_free_symbol_3 terminal_character)) + (rule _gap_free_symbol_3 (not _gap_free_symbol_2)) + (terminal _gap_free_symbol_2 (range "'\"")) + (terminal repetition_symbol (seq "*")) + (terminal except_symbol (seq "-")) + (terminal concatenate_symbol (seq ",")) + (terminal first_quote_symbol (seq "'")) + (terminal second_quote_symbol (seq "\"")) + (terminal start_comment_symbol (seq "(*")) + (terminal end_comment_symbol (seq "*)")) + (terminal start_group_symbol (seq "(")) + (terminal end_group_symbol (seq ")")) + (rule syntax (star syntax_rule))) diff --git a/examples/isoebnf/iso-ebnf.sxp b/examples/isoebnf/iso-ebnf.sxp index 4ef3718..d4abf3a 100644 --- a/examples/isoebnf/iso-ebnf.sxp +++ b/examples/isoebnf/iso-ebnf.sxp @@ -16,18 +16,20 @@ (rule repeated_sequence (seq start_repeat_symbol definitions_list end_repeat_symbol)) (rule grouped_sequence (seq "(" definitions_list ")")) - (terminal letter (range "a-zA-Z")) - (terminal decimal_digit (range "0-9")) - (terminal integer (plus decimal_digit)) - (terminal meta_identifier (seq letter (star meta_identifier_character))) - (terminal meta_identifier_character (alt letter decimal_digit "_")) (terminal terminal_string (alt (seq "'" (plus first_terminal_character) "'") (seq "\"" (plus second_terminal_character) "\"")) ) + (terminal meta_identifier (seq letter (star meta_identifier_character))) + (terminal integer (plus decimal_digit)) + (terminal special_sequence (seq "?" (star special_sequence_character) "?")) + (terminal comment (seq start_comment_symbol (star comment_symbol) end_comment_symbol)) + (terminal comment_symbol (alt comment terminal_string special_sequence character)) + (terminal letter (range "a-zA-Z")) + (terminal decimal_digit (range "0-9")) + (terminal meta_identifier_character (alt letter decimal_digit "_")) (terminal first_terminal_character (diff terminal_character "'")) (terminal second_terminal_character (diff terminal_character "\"")) - (terminal special_sequence (seq "?" (star special_sequence_character) "?")) (terminal special_sequence_character (diff terminal_character "?")) (terminal terminal_character (alt letter decimal_digit concatenate_symbol defining_symbol @@ -38,7 +40,7 @@ start_repeat_symbol terminator_symbol other_character )) (terminal other_character (alt (range ":+_%@&$<>^` ̃#x20#x23") "\\")) (terminal gap_separator (range "#x9#xa#xb#xc#xd#x20")) - (pass _pass (plus gap_separator)) + (pass _pass (alt (plus gap_separator) comment)) (terminal empty (seq ())) (terminal defining_symbol (alt "=" ":")) (terminal definition_separator_symbol (alt "|" "/" "!")) diff --git a/examples/isoebnf/meta.rb b/examples/isoebnf/meta.rb index fa77c7e..3c943c6 100644 --- a/examples/isoebnf/meta.rb +++ b/examples/isoebnf/meta.rb @@ -5,43 +5,47 @@ module ISOEBNFMeta EBNF::Rule.new(:syntax, nil, [:star, :syntax_rule]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:syntax_rule, nil, [:seq, :meta_identifier, :defining_symbol, :definitions_list, :terminator_symbol]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:definitions_list, nil, [:seq, :single_definition, :_definitions_list_1]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_definitions_list_1, ".1", [:star, :_definitions_list_2]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_definitions_list_2, ".2", [:seq, :definition_separator_symbol, :definitions_list]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_definitions_list_1, nil, [:star, :_definitions_list_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_definitions_list_2, nil, [:seq, :definition_separator_symbol, :definitions_list]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:single_definition, nil, [:seq, :term, :_single_definition_1]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_single_definition_1, ".1", [:star, :_single_definition_2]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_single_definition_2, ".2", [:seq, ",", :term]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_single_definition_1, nil, [:star, :_single_definition_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_single_definition_2, nil, [:seq, ",", :term]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:term, nil, [:seq, :factor, :_term_1]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_term_1, ".1", [:opt, :_term_2]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_term_2, ".2", [:seq, "-", :exception]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_term_1, nil, [:opt, :_term_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_term_2, nil, [:seq, "-", :exception]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:exception, nil, [:seq, :factor]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:factor, nil, [:seq, :_factor_1, :primary]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_factor_1, ".1", [:opt, :_factor_2]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_factor_2, ".2", [:seq, :integer, "*"]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_factor_1, nil, [:opt, :_factor_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_factor_2, nil, [:seq, :integer, "*"]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:primary, nil, [:alt, :optional_sequence, :repeated_sequence, :special_sequence, :grouped_sequence, :meta_identifier, :terminal_string, :empty]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:optional_sequence, nil, [:seq, :start_option_symbol, :definitions_list, :end_option_symbol]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:repeated_sequence, nil, [:seq, :start_repeat_symbol, :definitions_list, :end_repeat_symbol]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:grouped_sequence, nil, [:seq, "(", :definitions_list, ")"]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:terminal_string, nil, [:alt, :_terminal_string_1, :_terminal_string_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_terminal_string_1, nil, [:seq, "'", :_terminal_string_3, "'"]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_terminal_string_3, nil, [:plus, :first_terminal_character]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_terminal_string_2, nil, [:seq, "\"", :_terminal_string_4, "\""]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_terminal_string_4, nil, [:plus, :second_terminal_character]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:meta_identifier, nil, [:seq, :letter, :_meta_identifier_1], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_meta_identifier_1, nil, [:star, :meta_identifier_character]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:integer, nil, [:plus, :decimal_digit], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:special_sequence, nil, [:seq, "?", :_special_sequence_1, "?"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_special_sequence_1, nil, [:star, :special_sequence_character]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:comment, nil, [:seq, :start_comment_symbol, :_comment_1, :end_comment_symbol], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_comment_1, nil, [:star, :comment_symbol]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:comment_symbol, nil, [:alt, :comment, :terminal_string, :special_sequence, :character], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:letter, nil, [:range, "a-zA-Z"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:decimal_digit, nil, [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:integer, nil, [:plus, :decimal_digit], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:meta_identifier, nil, [:seq, :letter, :_meta_identifier_1], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_meta_identifier_1, ".1", [:star, :meta_identifier_character]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:meta_identifier_character, nil, [:alt, :letter, :decimal_digit, "_"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:terminal_string, nil, [:alt, :_terminal_string_1, :_terminal_string_2], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_terminal_string_1, ".1", [:seq, "'", :_terminal_string_3, "'"]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_terminal_string_3, ".3", [:plus, :first_terminal_character]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_terminal_string_2, ".2", [:seq, "\"", :_terminal_string_4, "\""]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_terminal_string_4, ".4", [:plus, :second_terminal_character]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:first_terminal_character, nil, [:diff, :terminal_character, "'"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:second_terminal_character, nil, [:diff, :terminal_character, "\""], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:special_sequence, nil, [:seq, "?", :_special_sequence_1, "?"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_special_sequence_1, ".1", [:star, :special_sequence_character]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:special_sequence_character, nil, [:diff, :terminal_character, "?"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:terminal_character, nil, [:alt, :letter, :decimal_digit, :concatenate_symbol, :defining_symbol, :definition_separator_symbol, :end_comment_symbol, :end_group_symbol, :end_option_symbol, :end_repeat_symbol, :except_symbol, :first_quote_symbol, :repetition_symbol, :second_quote_symbol, :special_sequence_symbol, :start_comment_symbol, :start_group_symbol, :start_option_symbol, :start_repeat_symbol, :terminator_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:other_character, nil, [:alt, :_other_character_1, "\\"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_other_character_1, ".1", [:range, ":+_%@&$<>^` ̃#x20#x23"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_other_character_1, nil, [:range, ":+_%@&$<>^` ̃#x20#x23"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:gap_separator, nil, [:range, "#x9#xa#xb#xc#xd#x20"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_pass, nil, [:plus, :gap_separator], kind: :pass).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_pass, nil, [:alt, :__pass_1, :comment], kind: :pass).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:__pass_1, nil, [:plus, :gap_separator]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:empty, nil, [:seq, []], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:defining_symbol, nil, [:alt, "=", ":"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:definition_separator_symbol, nil, [:alt, "|", "/", "!"], kind: :terminal).extend(EBNF::PEG::Rule), @@ -51,9 +55,9 @@ module ISOEBNFMeta EBNF::Rule.new(:start_repeat_symbol, nil, [:alt, "{", "(:"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:end_repeat_symbol, nil, [:alt, "}", ":)"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:gap_free_symbol, nil, [:alt, :_gap_free_symbol_1, :terminal_string], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_gap_free_symbol_1, ".1", [:seq, :_gap_free_symbol_3, :terminal_character]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_gap_free_symbol_3, ".3", [:not, :_gap_free_symbol_2]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_gap_free_symbol_2, ".2", [:range, "'\""], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_gap_free_symbol_1, nil, [:seq, :_gap_free_symbol_3, :terminal_character]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_gap_free_symbol_3, nil, [:not, :_gap_free_symbol_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_gap_free_symbol_2, nil, [:range, "'\""], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:repetition_symbol, nil, [:seq, "*"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:except_symbol, nil, [:seq, "-"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:concatenate_symbol, nil, [:seq, ","], kind: :terminal).extend(EBNF::PEG::Rule), From c48bb967e49daba5fdae0f44a52761540a546ef5 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Sat, 4 Jul 2020 16:45:17 -0700 Subject: [PATCH 21/50] ISO EBNF reader/writer. --- README.md | 46 +++++--- Rakefile | 57 +++------- bin/ebnf | 36 ++++--- etc/sparql.ebnf | 2 +- etc/sparql.sxp | 2 +- etc/turtle.ebnf | 2 +- etc/turtle.sxp | 2 +- lib/ebnf.rb | 1 + lib/ebnf/base.rb | 25 ++--- lib/ebnf/isoebnf.rb | 226 +++++++++++++++++++++++++++++++++++++++ lib/ebnf/isoebnf/meta.rb | 73 +++++++++++++ lib/ebnf/writer.rb | 204 +++++++++++++++++++++++++++++------ spec/parser_spec.rb | 8 +- 13 files changed, 560 insertions(+), 124 deletions(-) create mode 100644 lib/ebnf/isoebnf.rb create mode 100644 lib/ebnf/isoebnf/meta.rb diff --git a/README.md b/README.md index 0bb17ce..f4cfacb 100644 --- a/README.md +++ b/README.md @@ -41,42 +41,60 @@ See {EBNF::LL1} and {EBNF::LL1::Parser} for further information. require 'ebnf' - ebnf = EBNF.parse(File.open('./etc/ebnf.ebnf')) + grammar = EBNF.parse(File.open('./etc/ebnf.ebnf')) Output rules and terminals as [S-Expressions][S-Expression], [Turtle][], HTML or [BNF][] - puts ebnf.to_sxp - puts ebnf.to_ttl - puts ebnf.to_html - puts ebnf.to_s + puts grammar.to_sxp + puts grammar.to_ttl + puts grammar.to_html + puts grammar.to_s Transform [EBNF][] to [PEG][] (generates sub-rules for embedded expressions) and the RULES table as Ruby for parsing grammars: - ebnf.make_peg - ebnf.to_ruby + grammar.make_peg + grammar.to_ruby Transform [EBNF][] to [BNF][] (generates sub-rules using `alt` or `seq` from `plus`, `star` or `opt`) - ebnf.make_bnf + grammar.make_bnf Generate [First/Follow][] rules for BNF grammars (using "ebnf" as the starting production): - ebnf.first_follow(:ebnf) + grammar.first_follow(:ebnf) Generate Terminal, [First/Follow][], Cleanup and Branch tables as Ruby for parsing grammars: - ebnf.build_tables - ebnf.to_ruby + grammar.build_tables + grammar.to_ruby Generate formatted grammar using HTML (requires [Haml][Haml] gem): - ebnf.to_html + grammar.to_html -### Parser debugging +### Parsing an ISO/IEC 14977 Grammar + +The EBNF gem can also parse [ISO/EIC 14977] Grammars (ISOEBNF) to [S-Expressions][S-Expression]. + + grammar = EBNF.parse(File.open('./etc/iso-ebnf.isoebnf', format: :isoebnf)) + +### Parsing an ABNF Grammar + +The EBNF gem can also parse [ABNF] Grammars to [S-Expressions][S-Expression]. + + grammar = EBNF.parse(File.open('./etc/abnf.abnf', format: :abnf)) + +### Parser Debugging Inevitably while implementing a parser for some specific grammar, a developer will need greater insight into the operation of the parser. While this can involve sorting through a tremendous amount of data, the parser can be provided a [Logger][] instance which will output messages at varying levels of detail to document the state of the parser at any given point. Most useful is likely the `INFO` level of debugging, but even more detail is revealed using the `DEBUG` level. `WARN` and `ERROR` statements will typically also be provided as part of an exception if parsing fails, but can be shown in the context of other parsing state with appropriate indentation as part of the logger. -### Parser errors +### Writing Grammars + +The {EBNF::Writer} class can be used to write parsed grammars out, either as formatted text, or HTML. Because grammars are written from the Abstract Syntax Tree, represented as [S-Expressions][S-Expression], this provides a means of transforming between grammar formats (e.g., W3C [EBNF][] to [ABNF][]), although with some potential loss in semantic fidelity (case-insensitive string matching vs. case-sensitive matching). + +The formatted HTML results are designed to be appropriate for including in specifications. + +### Parser Errors On a parsing failure, and exception is raised with information that may be useful in determining the source of the error. ## EBNF Grammar diff --git a/Rakefile b/Rakefile index c36e857..fdf3050 100755 --- a/Rakefile +++ b/Rakefile @@ -42,9 +42,10 @@ end namespace :etc do ETC_FILES = %w{ + etc/abnf.sxp etc/iso-ebnf.sxp etc/ebnf.sxp etc/ebnf.ll1.sxp etc/ebnf.peg.sxp etc/ebnf.html etc/ebnf.ll1.rb etc/ebnf.peg.rb - etc/turtle.sxp etc/turtle.ll1.sxp etc/turtle.peg.sxp etc/turtle.html etc/turtle.peg.rb etc/turtle.ll1.rb - etc/sparql.sxp etc/sparql.ll1.sxp etc/sparql.peg.sxp etc/sparql.html etc/sparql.peg.rb etc/turtle.ll1.rb + etc/turtle.sxp + etc/sparql.sxp } desc 'Remove generated files in etc' task :clean do @@ -55,6 +56,20 @@ namespace :etc do task build: ETC_FILES end +desc "Build meta files for ISO EBNF and ABNF" +task :meta => %w{lib/ebnf/abnf/meta.rb lib/ebnf/abnf/core.rb lib/ebnf/isoebnf/meta.rb} do + file "lib/ebnf/abnf/meta.rb" => "etc/abnf.ebnf" do + %x(bin/ebnf --peg -f rb --mod-name ABNFMeta -o lib/ebnf/abnf/meta.rb etc/abnf.ebnf) + end + + file "lib/ebnf/abnf/core.rb" => "etc/abnf-core.ebnf" do + %x(bin/ebnf --peg -f rb --mod-name ABNFCore -o lib/ebnf/abnf/core.rb etc/abnf-core.ebnf) + end + + file "lib/ebnf/isoebnf/meta.rb" => "etc/iso-ebnf.ebnf" do + %x(bin/ebnf --peg -f rb --mod-name ISOEBNFMeta -o lib/ebnf/isoebnf/meta.rb etc/iso-ebnf.ebnf) + end +end # Build SXP output with leading space to allow for Markdown formatting. rule ".sxp" => %w{.ebnf} do |t| @@ -98,41 +113,3 @@ file "etc/ebnf.ll1.rb" => "etc/ebnf.ebnf" do |t| puts "build #{t.name}" %x(bin/ebnf --ll1 ebnf -f rb -o etc/ebnf.ll1.rb etc/ebnf.ebnf) end - -file "etc/turtle.ll1.sxp" => "etc/turtle.ebnf" do |t| - puts "build #{t.name}" - File.open(t.name, "w") do |f| - IO.popen(%(bin/ebnf --ll1 turtleDoc #{t.source})).each_line do |line| - f.puts ' ' + line - end - end -end - -file "etc/turtle.peg.rb" => "etc/turtle.ebnf" do |t| - puts "build #{t.name}" - %x(bin/ebnf --peg -f rb -o etc/turtle.peg.rb etc/turtle.ebnf) -end - -file "etc/turtle.ll1.rb" => "etc/turtle.ebnf" do |t| - puts "build #{t.name}" - %x(bin/ebnf --ll1 turtleDoc -f rb -o etc/turtle.ll1.rb etc/turtle.ebnf) -end - -file "etc/sparql.ll1.sxp" => "etc/sparql.ebnf" do |t| - puts "build #{t.name}" - File.open(t.name, "w") do |f| - IO.popen(%(bin/ebnf --ll1 QueryUnit --ll1 UpdateUnit #{t.source})).each_line do |line| - f.puts ' ' + line - end - end -end - -file "etc/sparql.peg.rb" => "etc/sparql.ebnf" do |t| - puts "build #{t.name}" - %x(bin/ebnf --peg -f rb -o etc/sparql.peg.rb etc/sparql.ebnf) -end - -file "etc/sparql.ll1.rb" => "etc/sparql.ebnf" do |t| - puts "build #{t.name}" - %x(bin/ebnf --ll1 QueryUnit --ll1 UpdateUnit -f rb -o etc/sparql.ll1.rb etc/sparql.ebnf) -end diff --git a/bin/ebnf b/bin/ebnf index 2067dde..0462edc 100755 --- a/bin/ebnf +++ b/bin/ebnf @@ -24,8 +24,8 @@ OPT_ARGS = [ ["--bnf", GetoptLong::NO_ARGUMENT, "Transform EBNF to BNF"], ["--evaluate","-e", GetoptLong::REQUIRED_ARGUMENT,"Evaluate argument as an EBNF document"], ["--ll1", GetoptLong::REQUIRED_ARGUMENT,"Generate First/Follow rules, argument is start symbol"], - ["--format", "-f", GetoptLong::REQUIRED_ARGUMENT,"Specify output format one of abnf, abnfh, ebnf, html, ttl, sxp, or rb"], - ["--input-format", GetoptLong::REQUIRED_ARGUMENT,"Specify input format one of abnf, ebnf or sxp"], + ["--format", "-f", GetoptLong::REQUIRED_ARGUMENT,"Specify output format one of abnf, abnfh, ebnf, html, isoebnf, isoebnfh, ttl, sxp, or rb"], + ["--input-format", GetoptLong::REQUIRED_ARGUMENT,"Specify input format one of abnf, ebnf isoebnf, or sxp"], ["--mod-name", GetoptLong::REQUIRED_ARGUMENT,"Module name used when creating ruby tables"], ["--output", "-o", GetoptLong::REQUIRED_ARGUMENT,"Output to the specified file path"], ["--peg", GetoptLong::NO_ARGUMENT, "Transform EBNF to PEG"], @@ -57,8 +57,18 @@ opts.each do |opt, arg| when '--debug' then options[:debug] = true when '--bnf' then options[:bnf] = true when '--evaluate' then input = arg - when '--input-format' then options[:format] = arg.to_sym - when '--format' then options[:output_format] = arg.to_sym + when '--input-format' + unless %w(abnf ebnf isoebnf sxp).include?(arg) + STDERR.puts("unrecognized input format #{arg}") + usage + end + options[:format] = arg.to_sym + when '--format' + unless %w(abnf abnfh ebnf html isoebnf isoebnfh rb sxp).include?(arg) + STDERR.puts("unrecognized output format #{arg}") + usage + end + options[:output_format] = arg.to_sym when '--ll1' then (options[:ll1] ||= []) << arg.to_sym when '--mod-name' then options[:mod_name] = arg when '--output' then out = File.open(arg, "w") @@ -81,14 +91,16 @@ if options[:ll1] end res = case options[:output_format] -when :abnf then ebnf.to_s(:abnf) -when :abnfh then ebnf.to_html(:abnf) -when :ebnf then ebnf.to_s -when :html then ebnf.to_html -when :sxp then ebnf.to_sxp -when :ttl then ebnf.to_ttl(options[:prefix], options[:namespace]) -when :rb then ebnf.to_ruby(out, grammarFile: ARGV[0], **options) -else ebnf.ast.inspect +when :abnf then ebnf.to_s(format: :abnf) +when :abnfh then ebnf.to_html(format: :abnf) +when :ebnf then ebnf.to_s +when :html then ebnf.to_html +when :isoebnf then ebnf.to_s(format: :isoebnf) +when :isoebnfh then ebnf.to_html(format: :isoebnf) +when :sxp then ebnf.to_sxp +when :ttl then ebnf.to_ttl(options[:prefix], options[:namespace]) +when :rb then ebnf.to_ruby(out, grammarFile: ARGV[0], **options) +else ebnf.ast.inspect end out.puts res \ No newline at end of file diff --git a/etc/sparql.ebnf b/etc/sparql.ebnf index 2177399..1b23a15 100644 --- a/etc/sparql.ebnf +++ b/etc/sparql.ebnf @@ -243,7 +243,7 @@ @terminals - [139] IRIREF ::= '<' ([^#x00-#x20<>"{}|^`\] | UCHAR)* '>' + [139] IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20])* '>' [140] PNAME_NS ::= PN_PREFIX? ':' [141] PNAME_LN ::= PNAME_NS PN_LOCAL [142] BLANK_NODE_LABEL ::= '_:' ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)? diff --git a/etc/sparql.sxp b/etc/sparql.sxp index 3ca6a41..ff3a130 100644 --- a/etc/sparql.sxp +++ b/etc/sparql.sxp @@ -282,7 +282,7 @@ (rule iri "136" (alt IRIREF PrefixedName)) (rule PrefixedName "137" (alt PNAME_LN PNAME_NS)) (rule BlankNode "138" (alt BLANK_NODE_LABEL ANON)) - (terminal IRIREF "139" (seq "<" (range "^#x00-#x20<>\"{}|^`] | UCHAR)* '>'"))) + (terminal IRIREF "139" (seq "<" (star (range "^<>\"{}|^`]-[#x00-#x20")) ">")) (terminal PNAME_NS "140" (seq (opt PN_PREFIX) ":")) (terminal PNAME_LN "141" (seq PNAME_NS PN_LOCAL)) (terminal BLANK_NODE_LABEL "142" diff --git a/etc/turtle.ebnf b/etc/turtle.ebnf index 9b9c4a1..da86a68 100644 --- a/etc/turtle.ebnf +++ b/etc/turtle.ebnf @@ -26,7 +26,7 @@ @terminals -[18] IRIREF ::= '<' ([^#x00-#x20<>"{}|^`\] | UCHAR)* '>' +[18] IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20] | UCHAR)* '>' [139s] PNAME_NS ::= PN_PREFIX? ":" [140s] PNAME_LN ::= PNAME_NS PN_LOCAL [141s] BLANK_NODE_LABEL ::= '_:' ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)? diff --git a/etc/turtle.sxp b/etc/turtle.sxp index b80b4cf..8fe9099 100644 --- a/etc/turtle.sxp +++ b/etc/turtle.sxp @@ -22,7 +22,7 @@ (rule String "17" (alt STRING_LITERAL_QUOTE STRING_LITERAL_SINGLE_QUOTE STRING_LITERAL_LONG_SINGLE_QUOTE STRING_LITERAL_LONG_QUOTE )) - (terminal IRIREF "18" (seq "<" (range "^#x00-#x20<>\"{}|^`] | UCHAR)* '>'"))) + (terminal IRIREF "18" (seq "<" (star (alt (range "^<>\"{}|^`]-[#x00-#x20") UCHAR)) ">")) (terminal INTEGER "19" (seq (opt (range "+-")) (plus (range "0-9")))) (terminal DECIMAL "20" (seq (opt (range "+-")) (seq (star (range "0-9")) "." (plus (range "0-9"))))) diff --git a/lib/ebnf.rb b/lib/ebnf.rb index 7c32687..4c54337 100755 --- a/lib/ebnf.rb +++ b/lib/ebnf.rb @@ -2,6 +2,7 @@ module EBNF autoload :ABNF, "ebnf/abnf" autoload :Base, "ebnf/base" autoload :BNF, "ebnf/bnf" + autoload :ISOEBNF, "ebnf/isoebnf" autoload :LL1, "ebnf/ll1" autoload :Parser, "ebnf/parser" autoload :PEG, "ebnf/peg" diff --git a/lib/ebnf/base.rb b/lib/ebnf/base.rb index f4b5d54..0977f3a 100644 --- a/lib/ebnf/base.rb +++ b/lib/ebnf/base.rb @@ -66,22 +66,6 @@ # [Cwm Release 1.1.0rc1]: https://lists.w3.org/Archives/Public/public-cwm-announce/2005JulSep/0000.html # [bnf-rules.n3]: https://www.w3.org/2000/10/swap/grammar/bnf-rules.n3 # -# Open Issues and Future Work -# --------------------------- -# -# The yacker output also has the terminals compiled to elaborate regular -# expressions. The best strategy for dealing with lexical tokens is not -# yet clear. Many tokens in SPARQL are case insensitive; this is not yet -# captured formally. -# -# The schema for the EBNF vocabulary used here (``g:seq``, ``g:alt``, ...) -# is not yet published; it should be aligned with [swap/grammar/bnf][] -# and the [bnf2html.n3][] rules (and/or the style of linked XHTML grammar -# in the SPARQL and XML specificiations). -# -# It would be interesting to corroborate the claim in the SPARQL spec -# that the grammar is LL(1) with a mechanical proof based on N3 rules. -# # [swap/grammar/bnf]: https://www.w3.org/2000/10/swap/grammar/bnf # [bnf2html.n3]: https://www.w3.org/2000/10/swap/grammar/bnf2html.n3 # @@ -118,7 +102,7 @@ class Base # # @param [#read, #to_s] input # @param [Symbol] format (:ebnf) - # Format of input, one of :abnf, :ebnf, or :sxp + # Format of input, one of :abnf, :ebnf, :isoebnf, :isoebnf, or :sxp # @param [Hash{Symbol => Object}] options # @option options [Boolean, Array] :debug # Output debug information to an array or $stdout. @@ -159,6 +143,9 @@ def initialize(input, format: :ebnf, **options) when :abnf abnf = ABNF.new(input, **options) @ast = abnf.ast + when :isoebnf + iso = ISOEBNF.new(input, **options) + @ast = iso.ast else raise "unknown input format #{format.inspect}" end @@ -213,7 +200,7 @@ def to_sxp ## # Output formatted EBNF # - # @param [:abnf, :ebnf] format (:ebnf) + # @param [:abnf, :ebnf, :isoebnf] format (:ebnf) # @return [String] def to_s(format: :ebnf) Writer.string(*ast, format: format) @@ -222,7 +209,7 @@ def to_s(format: :ebnf) ## # Output formatted EBNF as HTML # - # @param [:abnf, :ebnf] format (:ebnf) + # @param [:abnf, :ebnf, :isoebnf] format (:ebnf) # @return [String] def to_html(format: :ebnf) Writer.html(*ast, format: format) diff --git a/lib/ebnf/isoebnf.rb b/lib/ebnf/isoebnf.rb new file mode 100644 index 0000000..98661e2 --- /dev/null +++ b/lib/ebnf/isoebnf.rb @@ -0,0 +1,226 @@ +require_relative 'isoebnf/meta' + +# ABNF parser +# Parses ABNF into an array of {EBNF::Rule}. +module EBNF + class ISOEBNF + include EBNF::PEG::Parser + + # The base for terminal-character, which omits "'", '"', and '?'. + # Could be more optimized, and one might quible + # with the overly-strictly defined character set, + # but it is correct. + TERMINAL_CHARACTER_BASE = %r{ + [a-zA-Z0-9] | # letter | decimal digit + , | # concatenate symbol + = | # defining symbol + [\|\/!] | # definition separator symbol + \*\) | # end comment symbol + \) | # end group symbol + \] | # end option symbol + \} | # end repeat symbol + \- | # except symbol + #\' | # first quote symbol + \* | # repetition symbol + #\" | # second quote symbol + #\? | # special sequence symbol + \(\* | # start comment symbol + \( | # start group symbol + \[ | # start option symbol + \{ | # start repeat symbol + [;\.] | # terminator symbol + [:+_%@&$<>^\x20\x23\\`~] # other character + }x + + TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"\?]} + FIRST_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|["\?]} + SECOND_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['\?]} + SPECIAL_SEQUENCE_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"]} + + # Abstract syntax tree from parse + # + # @return [Array] + attr_reader :ast + + # `[14] integer ::= decimal_digit+` + terminal(:integer, /\d+/) do |value, prod| + value.to_i + end + + # `[15] meta_identifier ::= letter meta_identifier_character*` + terminal(:meta_identifier, /[a-zA-Z][a-zA-Z0-9_]*/) do |value| + value.to_sym + end + + # `[17] terminal_string ::= ("'" first_terminal_character+ "'")` + # ` | ('"' second_terminal_character+ '"')` + terminal(:terminal_string, /(?:'#{FIRST_TERMINAL_CHARACTER}+')|(?:"#{SECOND_TERMINAL_CHARACTER}+")/x) do |value| + value[1..-2] + end + + # `[20] special_sequence ::= '?' special_sequence_character* '?'` + terminal(:special_sequence, /\?#{SPECIAL_SEQUENCE_CHARACTER}+\?/) + + # `[22] terminal_character ::= [a-zA-Z0-9]` + # ` | [,=;*}#x2d?([{;]` + # ` | '*)'` + # ` | '(*'` + # ` | ']'` + # ` | other_character` + terminal(:terminal_character, TERMINAL_CHARACTER) + + # `[25] empty ::= ''` + terminal(:empty, //) + + # `[26] definition_separator_symbol ::= '|' | '/' | '!'` + terminal(:definition_separator_symbol, /[\|\/!]/) + + # `[27] terminator_symbol ::= ';' | '.'` + terminal(:terminator_symbol, /[;\.]/) + + # `[28] start_option_symbol ::= '[' | '(/'` + terminal(:start_option_symbol, /\[|\(\//) + + # `[29] end_option_symbol ::= ']' | '/)'` + terminal(:end_option_symbol, /[\]\/]/) + + # `[30] start_repeat_symbol ::= '{' | '(:'` + terminal(:start_repeat_symbol, /{|\(:/) + + # `[31] end_repeat_symbol ::= '}' | ':)'` + terminal(:end_repeat_symbol, /}|:\)/) + + # ## Non-terminal productions + + # `[2] syntax_rule ::= meta_identifier '=' definitions_list terminator_symbol` + production(:syntax_rule, clear_packrat: true) do |value, data, callback| + # value contains an expression. + # Invoke callback + sym = value[0][:meta_identifier] + definitions_list = value[2][:definitions_list] + callback.call(:rule, EBNF::Rule.new(sym.to_sym, nil, definitions_list)) + nil + end + + # Setting `as_hash: true` in the start production makes the value of the form of a hash, rather than an array of hashes. + # + # `[3] definitions_list ::= single_definition (definition_separator_symbol definitions_list)*` + start_production(:definitions_list, as_hash: true) + production(:definitions_list) do |value| + if value[:_definitions_list_1].length > 0 + [:alt, value[:single_definition]] + value[:_definitions_list_1] + else + value[:single_definition] + end + end + production(:_definitions_list_1) do |value| + Array(value.first) + end + start_production(:_definitions_list_2, as_hash: true) + production(:_definitions_list_2) do |value| + if Array(value[:definitions_list]).first == :alt + value[:definitions_list][1..-1] + else + [value[:definitions_list]] + end + end + + # `[4] single_definition ::= term (',' term)*` + start_production(:single_definition, as_hash: true) + production(:single_definition) do |value| + if value[:_single_definition_1].length > 0 + [:seq, value[:term]] + value[:_single_definition_1] + else + value[:term] + end + end + production(:_single_definition_1) do |value| + value.map {|a1| a1.last[:term]}.compact # Get rid of '|' + end + + # `[5] term ::= factor ('-' exception)?` + start_production(:term, as_hash: true) + production(:term) do |value| + if value[:_diff_1] + [:diff, value[:postfix], value[:_term_1]] + else + value[:factor] + end + end + production(:_term_1) do |value| + value.last[:exception] if value + end + + # `[6] exception ::= factor` + start_production(:exception, as_hash: true) + production(:exception) do |value| + value[:factor] + end + + # `[7] factor ::= (integer '*')? primary` + start_production(:factor, as_hash: true) + production(:factor) do |value| + if value[:_factor_1] + [:rept, value[:_factor_1], value[:_factor_1], value[:primary]] + else + value[:primary] + end + end + production(:_factor_2) do |value| + value.first[:integer] + end + + # `[9] optional_sequence ::= start_option_symbol definitions_list end_option_symbol` + production(:optional_sequence) do |value| + [:opt, value[1][:definitions_list]] + end + + # `[10] repeated_sequence ::= start_repeat_symbol definitions_list end_repeat_symbol` + production(:repeated_sequence) do |value| + [:star, value[1][:definitions_list]] + end + + # `[11] grouped_sequence ::= '(' definitions_list ')'` + production(:grouped_sequence) do |value| + [:seq, value[1][:definitions_list]] + end + + # ## Parser invocation. + # On start, yield ourselves if a block is given, otherwise, return this parser instance + # + # @param [#read, #to_s] input + # @param [Hash{Symbol => Object}] options + # @option options [Boolean] :level + # Trace level. 0(debug), 1(info), 2(warn), 3(error). + # @return [EBNFParser] + def initialize(input, **options, &block) + # If the `level` option is set, instantiate a logger for collecting trace information. + if options.has_key?(:level) + options[:logger] = Logger.new(STDERR) + options[:logger].level = options[:level] + options[:logger].formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"} + end + + # Read input, if necessary, which will be used in a Scanner. + @input = input.respond_to?(:read) ? input.read : input.to_s + + parsing_terminals = false + @ast = [] + parse(@input, + :syntax, + ISOEBNFMeta::RULES, + whitespace: %r{([\x09-\x0d\x20]|(?:\(\*(?:(?:\*[^\)])|[^*])*\*\)))+}, + **options + ) do |context, *data| + rule = case context + when :rule + # A rule which has already been turned into a `Rule` object. + rule = data.first + rule.kind = :terminal if parsing_terminals + rule + end + @ast << rule if rule + end + end + end +end diff --git a/lib/ebnf/isoebnf/meta.rb b/lib/ebnf/isoebnf/meta.rb new file mode 100644 index 0000000..3c943c6 --- /dev/null +++ b/lib/ebnf/isoebnf/meta.rb @@ -0,0 +1,73 @@ +# This file is automatically generated by ebnf version 2.0.0 +# Derived from iso-ebnf.ebnf +module ISOEBNFMeta + RULES = [ + EBNF::Rule.new(:syntax, nil, [:star, :syntax_rule]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:syntax_rule, nil, [:seq, :meta_identifier, :defining_symbol, :definitions_list, :terminator_symbol]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:definitions_list, nil, [:seq, :single_definition, :_definitions_list_1]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_definitions_list_1, nil, [:star, :_definitions_list_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_definitions_list_2, nil, [:seq, :definition_separator_symbol, :definitions_list]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:single_definition, nil, [:seq, :term, :_single_definition_1]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_single_definition_1, nil, [:star, :_single_definition_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_single_definition_2, nil, [:seq, ",", :term]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:term, nil, [:seq, :factor, :_term_1]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_term_1, nil, [:opt, :_term_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_term_2, nil, [:seq, "-", :exception]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:exception, nil, [:seq, :factor]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:factor, nil, [:seq, :_factor_1, :primary]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_factor_1, nil, [:opt, :_factor_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_factor_2, nil, [:seq, :integer, "*"]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:primary, nil, [:alt, :optional_sequence, :repeated_sequence, :special_sequence, :grouped_sequence, :meta_identifier, :terminal_string, :empty]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:optional_sequence, nil, [:seq, :start_option_symbol, :definitions_list, :end_option_symbol]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:repeated_sequence, nil, [:seq, :start_repeat_symbol, :definitions_list, :end_repeat_symbol]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:grouped_sequence, nil, [:seq, "(", :definitions_list, ")"]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:terminal_string, nil, [:alt, :_terminal_string_1, :_terminal_string_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_terminal_string_1, nil, [:seq, "'", :_terminal_string_3, "'"]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_terminal_string_3, nil, [:plus, :first_terminal_character]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_terminal_string_2, nil, [:seq, "\"", :_terminal_string_4, "\""]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_terminal_string_4, nil, [:plus, :second_terminal_character]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:meta_identifier, nil, [:seq, :letter, :_meta_identifier_1], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_meta_identifier_1, nil, [:star, :meta_identifier_character]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:integer, nil, [:plus, :decimal_digit], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:special_sequence, nil, [:seq, "?", :_special_sequence_1, "?"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_special_sequence_1, nil, [:star, :special_sequence_character]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:comment, nil, [:seq, :start_comment_symbol, :_comment_1, :end_comment_symbol], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_comment_1, nil, [:star, :comment_symbol]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:comment_symbol, nil, [:alt, :comment, :terminal_string, :special_sequence, :character], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:letter, nil, [:range, "a-zA-Z"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:decimal_digit, nil, [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:meta_identifier_character, nil, [:alt, :letter, :decimal_digit, "_"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:first_terminal_character, nil, [:diff, :terminal_character, "'"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:second_terminal_character, nil, [:diff, :terminal_character, "\""], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:special_sequence_character, nil, [:diff, :terminal_character, "?"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:terminal_character, nil, [:alt, :letter, :decimal_digit, :concatenate_symbol, :defining_symbol, :definition_separator_symbol, :end_comment_symbol, :end_group_symbol, :end_option_symbol, :end_repeat_symbol, :except_symbol, :first_quote_symbol, :repetition_symbol, :second_quote_symbol, :special_sequence_symbol, :start_comment_symbol, :start_group_symbol, :start_option_symbol, :start_repeat_symbol, :terminator_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:other_character, nil, [:alt, :_other_character_1, "\\"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_other_character_1, nil, [:range, ":+_%@&$<>^` ̃#x20#x23"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:gap_separator, nil, [:range, "#x9#xa#xb#xc#xd#x20"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_pass, nil, [:alt, :__pass_1, :comment], kind: :pass).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:__pass_1, nil, [:plus, :gap_separator]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:empty, nil, [:seq, []], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:defining_symbol, nil, [:alt, "=", ":"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:definition_separator_symbol, nil, [:alt, "|", "/", "!"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:terminator_symbol, nil, [:alt, ";", "."], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:start_option_symbol, nil, [:alt, "[", "(/"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:end_option_symbol, nil, [:alt, "]", "/)"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:start_repeat_symbol, nil, [:alt, "{", "(:"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:end_repeat_symbol, nil, [:alt, "}", ":)"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:gap_free_symbol, nil, [:alt, :_gap_free_symbol_1, :terminal_string], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_gap_free_symbol_1, nil, [:seq, :_gap_free_symbol_3, :terminal_character]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_gap_free_symbol_3, nil, [:not, :_gap_free_symbol_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_gap_free_symbol_2, nil, [:range, "'\""], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:repetition_symbol, nil, [:seq, "*"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:except_symbol, nil, [:seq, "-"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:concatenate_symbol, nil, [:seq, ","], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:first_quote_symbol, nil, [:seq, "'"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:second_quote_symbol, nil, [:seq, "\""], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:start_comment_symbol, nil, [:seq, "(*"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:end_comment_symbol, nil, [:seq, "*)"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:start_group_symbol, nil, [:seq, "("], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:end_group_symbol, nil, [:seq, ")"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:special_sequence_symbol, nil, [:seq, "?"], kind: :terminal).extend(EBNF::PEG::Rule), + ] +end + diff --git a/lib/ebnf/writer.rb b/lib/ebnf/writer.rb index b4d0d20..c546079 100644 --- a/lib/ebnf/writer.rb +++ b/lib/ebnf/writer.rb @@ -12,7 +12,7 @@ class Writer # Format rules to a String # # @param [Array] rules - # @param [:abnf, :ebnf] format (:ebnf) + # @param [:abnf, :ebnf, :isoebnf] format (:ebnf) # @return [Object] def self.string(*rules, format: :ebnf) require 'stringio' unless defined?(StringIO) @@ -25,7 +25,7 @@ def self.string(*rules, format: :ebnf) # Format rules to $stdout # # @param [Array] rules - # @param [:abnf, :ebnf] format (:ebnf) + # @param [:abnf, :ebnf, :isoebnf] format (:ebnf) # @return [Object] def self.print(*rules, format: :ebnf) write($stdout, *rules, format: format) @@ -36,7 +36,7 @@ def self.print(*rules, format: :ebnf) # # @param [Object] out # @param [Array] rules - # @param [:abnf, :ebnf] format (:ebnf) + # @param [:abnf, :ebnf, :isoebnf] format (:ebnf) # @return [Object] def self.write(out, *rules, format: :ebnf) Writer.new(rules, out: out, format: format) @@ -46,7 +46,7 @@ def self.write(out, *rules, format: :ebnf) # Write formatted rules to an IO like object as HTML # # @param [Array] rules - # @param [:abnf, :ebnf] format (:ebnf) + # @param [:abnf, :ebnf, :isoebnf] format (:ebnf) # @return [Object] def self.html(*rules, format: :ebnf) require 'stringio' unless defined?(StringIO) @@ -59,7 +59,7 @@ def self.html(*rules, format: :ebnf) # @param [Array] rules # @param [Hash{Symbol => Object}] options # @param [#write] out ($stdout) - # @param [:abnf, :ebnf] format (:ebnf) + # @param [:abnf, :ebnf, :isoebnf] format (:ebnf) # @option options [Symbol] format # @option options [Boolean] html (false) def initialize(rules, out: $stdout, html: false, format: :ebnf, **options) @@ -67,6 +67,7 @@ def initialize(rules, out: $stdout, html: false, format: :ebnf, **options) return if rules.empty? # Determine max LHS length + format_meth = "format_#{format}".to_sym max_id = rules.max_by {|r| r.id.to_s.length}.id.to_s.length max_sym = rules.max_by {|r| r.sym.to_s.length}.sym.to_s.length lhs_length = max_sym + 1 @@ -82,14 +83,8 @@ def initialize(rules, out: $stdout, html: false, format: :ebnf, **options) begin require 'haml' hout = Haml::Engine.new(HAML_DESC).render(self, rules: rules, format: format) do |rule| - case format - when :abnf - formatted_expr = format_abnf(rule.expr) - formatted_expr.length > rhs_length ? format_abnf(rule.expr, sep: "\n") : formatted_expr - when :ebnf - formatted_expr = format_ebnf(rule.expr) - formatted_expr.length > rhs_length ? format_ebnf(rule.expr, sep: "\n") : formatted_expr - end + formatted_expr = self.send(format_meth, rule.expr) + formatted_expr.length > rhs_length ? self.send(format_meth, rule.expr, sep: "\n") : formatted_expr end out.write hout return @@ -105,22 +100,12 @@ def initialize(rules, out: $stdout, html: false, format: :ebnf, **options) else lhs_fmt % {id: "[#{rule.id}]", sym: rule.sym} end - if format == :abnf - formatted_expr = format_abnf(rule.expr) - if formatted_expr.length > rhs_length - # Space out past "= " - buffer << format_abnf(rule.expr, sep: ("\n" + " " * (lhs_length + 2))) - else - # Space out past "::= " - buffer << formatted_expr - end - elsif format == :ebnf - formatted_expr = format_ebnf(rule.expr) - if formatted_expr.length > rhs_length - buffer << format_ebnf(rule.expr, sep: ("\n" + " " * (lhs_length + 4))) - else - buffer << formatted_expr - end + formatted_expr = self.send(format_meth, rule.expr) + if formatted_expr.length > rhs_length + # Space out past "= " + buffer << self.send(format_meth, rule.expr, sep: ("\n" + " " * (lhs_length + (format == :ebnf ? 4 : 2)))) + else + buffer << formatted_expr end out.puts(buffer) end @@ -165,7 +150,6 @@ def format_ebnf(expr, sep: nil, embedded: false) res = expr[1..-1].map {|e| format_ebnf(e, embedded: true)}.join(this_sep) embedded ? (lparen + res + rparen) : res when :star, :plus, :opt - raise "Expected star expression to have a single operand" unless expr.length == 2 char = parts[expr.first.to_sym] r = format_ebnf(expr[1], embedded: true) "#{r}#{char}" @@ -285,6 +269,9 @@ def format_abnf(expr, sep: nil, embedded: false, sensitive: true) seq = segments.inject([]) {|memo, s| memo.concat([[:hex, "#x22"], s])}[1..-1] seq.unshift(:seq) return format_abnf(seq, sep: nil, embedded: false) + elsif expr.match?(/[\x00-\x1F\u{7F}-\u{10FFFF}]/) + # Express using %d notation + return format_abnf_range(expr) else return (@options[:html] ? %("#{'%s' if sensitive}#{expr}") : %(#{'%s' if sensitive}"#{expr}")) end @@ -349,17 +336,21 @@ def format_abnf(expr, sep: nil, embedded: false, sensitive: true) # Format a single-character string, prefering hex for non-main ASCII def format_abnf_char(c) - (@options[:html] ? %(#{escape_abnf_hex(c)}) : escape_abnf_hex(c)) + if /[\x20-\x21\x23-\x7E]/.match?(c) + return c.inspect + else + (@options[:html] ? %(#{escape_abnf_hex(c)}) : escape_abnf_hex(c)) + end end # Format a range + # FIXME: O_RANGE def format_abnf_range(string) - #require 'byebug'; byebug if string.include?('-') # Might include multiple ranges # #x01-#x03#x05-#x06 # a-bc-d - dash = (@options[:html] ? "- " : "-") + dash = (@options[:html] ? "- " : "-") # Split into separate range segments if string.start_with?('#x') ranges = [] @@ -367,7 +358,7 @@ def format_abnf_range(string) while !scanner.eos? ranges << scanner.scan(/#x\h+-#x\h+/) end - ranges.map {|range|"%x" + range.gsub('#x', '')}.join(" / ") + ranges.map {|range|"%x" + range.gsub('#x', '').sub('-', dash)}.join(" / ") else '%d' + string.gsub(/[^-]/) {|c| c.ord} end @@ -389,6 +380,151 @@ def escape_abnf_hex(u) "%x" + (fmt % u.ord) end + ## + # ISO EBNF Formatters + ## + + # Format the expression part of a rule + def format_isoebnf(expr, sep: nil, embedded: false) + return (@options[:html] ? %(#{expr}) : expr.to_s) if expr.is_a?(Symbol) + if expr.is_a?(String) + expr = expr[2..-1].hex.chr if expr =~ /\A#x\h+/ + expr.chars.each do |c| + raise RangeError, "cannot format #{expr.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless + ISOEBNF::TERMINAL_CHARACTER.match?(c) + end + if expr =~ /"/ + return (@options[:html] ? %('#{expr}') : %('#{expr}')) + else + return (@options[:html] ? %("#{expr}") : %("#{expr}")) + end + end + parts = { + alt: (@options[:html] ? "| " : "| "), + diff: (@options[:html] ? "- " : "- "), + } + lparen = (@options[:html] ? "( " : "(") + rparen = (@options[:html] ? ") " : ")") + + case expr.first + when :istr + # Looses fidelity, but, oh well ... + format_isoebnf(expr.last, embedded: true) + when :alt, :diff + this_sep = (sep ? sep : " ") + parts[expr.first.to_sym] + res = expr[1..-1].map {|e| format_isoebnf(e, embedded: true)}.join(this_sep) + embedded ? (lparen + res + rparen) : res + when :opt + r = format_isoebnf(expr[1], embedded: true) + "[#{r}]" + when :star + r = format_isoebnf(expr[1], embedded: true) + "{#{r}}" + when :plus + r = format_isoebnf(expr[1], embedded: true) + "#{r}, {#{r}}" + when :hex + format_isoebnf(expr[1], embedded: true) + when :range + format_isoebnf_range(expr.last) + when :seq + this_sep = "," + (sep ? sep : " ") + res = expr[1..-1].map do |e| + format_isoebnf(e, embedded: true) + end.join(this_sep) + embedded ? (lparen + res + rparen) : res + when :rept + # Expand repetition + min, max, value = expr[1..-1] + if min == 0 && max == 1 + format_isoebnf([:opt, value], sep: sep, embedded: embedded) + elsif min == 0 && max == '*' + format_isoebnf([:star, value], sep: sep, embedded: embedded) + elsif min == 1 && max == '*' + format_isoebnf([:plus, value], sep: sep, embedded: embedded) + elsif min > 0 && min == max + "#{min}*" + format_isoebnf(value, sep: sep, embedded: embedded) + else + val2 = [:seq] + while min > 0 + val2 << value + min -= 1 + max -= 1 unless max == '*' + end + if max == '*' + val2 << [:star, value] + else + opt = nil + while max > 0 + opt = [:opt, opt ? [:seq, value, opt] : value] + max -= 1 + end + val2 << opt if opt + end + format_isoebnf(val2, sep: sep, embedded: embedded) + end + else + raise "Unknown operator: #{expr.first}" + end + end + + # Format a range + # Range is formatted as a aliteration of characters + # FIXME: O_RANGE + def format_isoebnf_range(string) + chars = [] + scanner = StringScanner.new(string) + if string.include?('-') + ranges = [] + # Might include multiple ranges + # #x01-#x03#x05-#x06 + # a-bc-d + # Split into separate range segments + if string.start_with?('#x') + while !scanner.eos? + ranges << scanner.scan(/#x\h+-#x\h+/) + end + ranges.each do |range| + first, last = range.split('-').map {|h| h[2..-1].hex.ord} + while first <= last + c = first.chr(Encoding::UTF_8) + raise RangeError, "cannot format #{string.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless + ISOEBNF::TERMINAL_CHARACTER.match?(c) + chars << c + first += 1 + end + end + else + while !scanner.eos? + r = scanner.scan(/.-./) + require 'byebug'; byebug unless r + ranges << r + end + ranges.each do |range| + first, last = range.split('-').map {|c| c.ord} + while first <= last + c = first.chr(Encoding::UTF_8) + raise RangeError, "cannot format #{string.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless + ISOEBNF::TERMINAL_CHARACTER.match?(c) + chars << c + first += 1 + end + end + end + else + while !scanner.eos? + c = if hex = scanner.scan(/#x\h+/) + hex[2..-1].hex.ord.chr(Encoding::UTF_8) + else + scanner.scan(/./) + end + end + raise RangeError, "cannot format #{string.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless + ISOEBNF::TERMINAL_CHARACTER.match?(c) + chars << c + end + end + HAML_DESC = %q( %table.grammar %tbody#grammar-productions diff --git a/spec/parser_spec.rb b/spec/parser_spec.rb index 924a2b6..6969e0b 100644 --- a/spec/parser_spec.rb +++ b/spec/parser_spec.rb @@ -15,6 +15,8 @@ %{(rule postfix "9" (seq primary (opt (range "?*+"))))}, %{[18] STRING2 ::= "'" (CHAR - "'")* "'"} => %{(terminal STRING2 "18" (seq "'" (star (diff CHAR "'")) "'"))}, + %([18] IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20] | UCHAR)* '>') => + %{(terminal IRIREF "18" (seq "<" (star (alt (diff (range "^<>\\\"{}|^`") (range "#x00-#x20")) UCHAR)) ">"))} }.each do |input, expected| it "given #{input.inspect} produces #{expected}" do expect(ebnf(:ruleParts, input).to_sxp).to produce(expected, @debug) @@ -31,6 +33,8 @@ %{(rule postfix (seq primary (opt (range "?*+"))))}, %{STRING2 ::= "'" (CHAR - "'")* "'"} => %{(terminal STRING2 (seq "'" (star (diff CHAR "'")) "'"))}, + %(IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20] | UCHAR)* '>') => + %{(terminal IRIREF (seq "<" (star (alt (diff (range "^<>\\\"{}|^`") (range "#x00-#x20")) UCHAR)) ">"))} }.each do |input, expected| it "given #{input.inspect} produces #{expected}" do expect(ebnf(:ruleParts, input).to_sxp).to produce(expected, @debug) @@ -59,7 +63,9 @@ %{a) b c} => %{(a " b c")}, %(BaseDecl? PrefixDecl*) => %{((seq (opt BaseDecl) (star PrefixDecl)) "")}, %(NCCHAR1 | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]) => - %{((alt NCCHAR1 "-" (range "0-9") (hex "#x00B7") (range "#x0300-#x036F") (range "#x203F-#x2040")) "")} + %{((alt NCCHAR1 "-" (range "0-9") (hex "#x00B7") (range "#x0300-#x036F") (range "#x203F-#x2040")) "")}, + %('<' ([^<>"{}|^`\]-[#x00-#x20] | UCHAR)* '>') => + %{((seq "<" (star (alt (diff (range "^<>\\\"{}|^`") (range "#x00-#x20")) UCHAR)) ">") "")} }.each do |input, expected| it "given #{input.inspect} produces #{expected}" do expect(ebnf(:expression, input).to_sxp).to produce(expected, @debug) From 8be170e18148ab0b3cb151f03201d5b327f85e73 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Sat, 4 Jul 2020 16:45:31 -0700 Subject: [PATCH 22/50] Files for abnf and isoebnf in /etc. --- etc/abnf-core.ebnf | 52 +++++++++++++++++ etc/abnf.abnf | 121 ++++++++++++++++++++++++++++++++++++++ etc/abnf.ebnf | 124 +++++++++++++++++++++++++++++++++++++++ etc/abnf.sxp | 44 ++++++++++++++ etc/iso-ebnf.ebnf | 136 +++++++++++++++++++++++++++++++++++++++++++ etc/iso-ebnf.isoebnf | 123 ++++++++++++++++++++++++++++++++++++++ etc/iso-ebnf.sxp | 62 ++++++++++++++++++++ 7 files changed, 662 insertions(+) create mode 100644 etc/abnf-core.ebnf create mode 100644 etc/abnf.abnf create mode 100644 etc/abnf.ebnf create mode 100644 etc/abnf.sxp create mode 100644 etc/iso-ebnf.ebnf create mode 100644 etc/iso-ebnf.isoebnf create mode 100644 etc/iso-ebnf.sxp diff --git a/etc/abnf-core.ebnf b/etc/abnf-core.ebnf new file mode 100644 index 0000000..5856444 --- /dev/null +++ b/etc/abnf-core.ebnf @@ -0,0 +1,52 @@ +# Core terminals available in uses of ABNF +ALPHA ::= [#x41-#x5A#x61-#x7A] # A-Z | a-z + +BIT ::= '0' | '1' + +CHAR ::= [#x01-#x7F] + # any 7-bit US-ASCII character, + # excluding NUL +CR ::= #x0D + # carriage return + +CRLF ::= CR? LF + # Internet standard newline + +CTL ::= [#x00-#x1F] | #x7F + # controls + +DIGIT ::= [#x30-#x39] + # 0-9 + +DQUOTE ::= #x22 + # " (Double Quote) + +HEXDIG ::= DIGIT | [A-F] + +HTAB ::= #x09 + # horizontal tab + +LF ::= #x0A + # linefeed + +LWSP ::= (WSP | CRLF WSP)* + # Use of this linear-white-space rule + # permits lines containing only white + # space that are no longer legal in + # mail headers and have caused + # interoperability problems in other + # contexts. + # Do not use when defining mail + # headers and use with caution in + # other contexts. + +OCTET ::= [#x00-#xFF] + # 8 bits of data + +SP ::= #x20 + +VCHAR ::= [#x21-#x7E] + # visible (printing) characters + +WSP ::= SP | HTAB + # white space diff --git a/etc/abnf.abnf b/etc/abnf.abnf new file mode 100644 index 0000000..9acd3fb --- /dev/null +++ b/etc/abnf.abnf @@ -0,0 +1,121 @@ +rulelist = 1*( rule / (*c-wsp c-nl) ) + +rule = rulename defined-as elements c-nl + ; continues if next line starts + ; with white space + +rulename = ALPHA *(ALPHA / DIGIT / "-") + +defined-as = *c-wsp ("=" / "=/") *c-wsp + ; basic rules definition and + ; incremental alternatives + +elements = alternation *c-wsp + +c-wsp = WSP / (c-nl WSP) + +c-nl = comment / CRLF + ; comment or newline + +comment = ";" *(WSP / VCHAR) CRLF + +alternation = concatenation + *(*c-wsp "/" *c-wsp concatenation) + +concatenation = repetition *(1*c-wsp repetition) + +repetition = [repeat] element + +repeat = (*DIGIT "*" *DIGIT) / 1*DIGIT + +element = rulename / group / option / + char-val / num-val / prose-val + +group = "(" *c-wsp alternation *c-wsp ")" + +option = "[" *c-wsp alternation *c-wsp "]" + +char-val = case-insensitive-string / + case-sensitive-string + +case-insensitive-string = + [ "%i" ] quoted-string + +case-sensitive-string = + "%s" quoted-string + +quoted-string = DQUOTE *(%x20-21 / %x23-7E) DQUOTE + ; quoted string of SP and VCHAR + ; without DQUOTE + +num-val = "%" (bin-val / dec-val / hex-val) + +bin-val = "b" 1*BIT + [ 1*("." 1*BIT) / ("-" 1*BIT) ] + ; series of concatenated bit values + ; or single ONEOF range + +dec-val = "d" 1*DIGIT + [ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ] + +hex-val = "x" 1*HEXDIG + [ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ] + +prose-val = "<" *(%x20-3D / %x3F-7E) ">" + ; bracketed string of SP and VCHAR + ; without angles + ; prose description, to be used as + ; last resort + +ALPHA = %x41-5A / %x61-7A ; A-Z / a-z + +BIT = "0" / "1" + +CHAR = %x01-7F + ; any 7-bit US-ASCII character, + ; excluding NUL +CR = %x0D + ; carriage return + +CRLF = [CR] LF + ; Internet standard newline + ; Extended to allow only newline + +CTL = %x00-1F / %x7F + ; controls + +DIGIT = %x30-39 + ; 0-9 + +DQUOTE = %x22 + ; " (Double Quote) + +HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" + +HTAB = %x09 + ; horizontal tab + +LF = %x0A + ; linefeed + +LWSP = *(WSP / CRLF WSP) + ; Use of this linear-white-space rule + ; permits lines containing only white + ; space that are no longer legal in + ; mail headers and have caused + ; interoperability problems in other + ; contexts. + ; Do not use when defining mail + ; headers and use with caution in + ; other contexts. + +OCTET = %x00-FF + ; 8 bits of data + +SP = %x20 + +VCHAR = %x21-7E + ; visible (printing) characters + +WSP = SP / HTAB + ; white space diff --git a/etc/abnf.ebnf b/etc/abnf.ebnf new file mode 100644 index 0000000..6e8d708 --- /dev/null +++ b/etc/abnf.ebnf @@ -0,0 +1,124 @@ +rulelist ::= ( rule | (c_wsp* c_nl) )+ + +rule ::= rulename defined_as elements c_nl + # continues if next line starts + # with white space + +elements ::= alternation c_wsp* + +alternation ::= concatenation + (c_wsp* "/" c_wsp* concatenation)* + +concatenation::= repetition (c_wsp+ repetition)* + +repetition ::= repeat? element + +repeat ::= (DIGIT* "*" DIGIT*) | DIGIT+ + +element ::= rulename | group | option | + char_val | num_val | prose_val + +group ::= "(" c_wsp* alternation c_wsp* ")" + +option ::= "[" c_wsp* alternation c_wsp* "]" + +char_val ::= case_insensitive_string | + case_sensitive_string + +case_insensitive_string ::= + "%i"? quoted_string + +case_sensitive_string ::= + "%s" quoted_string + +num_val ::= "%" (bin_val | dec_val | hex_val) + +@terminals + +# Terminals used in ABNF, itself +rulename ::= ALPHA (ALPHA | DIGIT | "-")* + +defined_as ::= c_wsp* ("=" | "=/") c_wsp* + # basic rules definition and + # incremental alternatives + +c_wsp ::= WSP | (c_nl WSP) + +c_nl ::= COMMENT | CRLF + # comment or newline + +comment ::= ";" (WSP | VCHAR)* CRLF + +quoted_string::= DQUOTE [#x20-#x21#x23-#x7E]* DQUOTE + # quoted string of SP and VCHAR + # without DQUOTE + +bin_val ::= "b" BIT+ + (("." BIT+)+ | ("-" BIT+))? + # series of concatenated bit values + # or single ONEOF range + +dec_val ::= "d" DIGIT+ + (("." DIGIT+)+ | ("-" DIGIT+))? + +hex_val ::= "x" HEXDIG+ + (("." HEXDIG+)+ | ("-" HEXDIG+))? + +prose_val ::= "<" [#x20-#x3D#x3F-#x7E]* ">" + # bracketed string of SP and VCHAR + # without angles + # prose description, to be used as + # last resort + +# Core terminals available in uses of ABNF +ALPHA ::= [#x41-#x5A#x61-#x7A] # A-Z | a-z + +BIT ::= '0' | '1' + +CHAR ::= [#x01-#x7F] + # any 7-bit US-ASCII character, + # excluding NUL +CR ::= #x0D + # carriage return + +CRLF ::= CR? LF + # Internet standard newline + +CTL ::= [#x00-#x1F] | #x7F + # controls + +DIGIT ::= [#x30-#x39] + # 0-9 + +DQUOTE ::= #x22 + # " (Double Quote) + +HEXDIG ::= DIGIT | "A" | "B" | "C" | "D" | "E" | "F" + +HTAB ::= #x09 + # horizontal tab + +LF ::= #x0A + # linefeed + +LWSP ::= (WSP | CRLF WSP)* + # Use of this linear-white-space rule + # permits lines containing only white + # space that are no longer legal in + # mail headers and have caused + # interoperability problems in other + # contexts. + # Do not use when defining mail + # headers and use with caution in + # other contexts. + +OCTET ::= [#x00-#xFF] + # 8 bits of data + +SP ::= #x20 + +VCHAR ::= [#x21-#x7E] + # visible (printing) characters + +WSP ::= SP | HTAB + # white space diff --git a/etc/abnf.sxp b/etc/abnf.sxp new file mode 100644 index 0000000..5877eb4 --- /dev/null +++ b/etc/abnf.sxp @@ -0,0 +1,44 @@ + ( + (terminal WSP (alt SP HTAB)) + (rule rule (seq rulename defined_as elements c_nl)) + (rule elements (seq alternation (star c_wsp))) + (rule alternation + (seq concatenation (star (seq (star c_wsp) "/" (star c_wsp) concatenation)))) + (rule concatenation (seq repetition (star (seq (plus c_wsp) repetition)))) + (rule repetition (seq (opt repeat) element)) + (rule repeat (alt (seq (star DIGIT) "*" (star DIGIT)) (plus DIGIT))) + (rule element (alt rulename group option char_val num_val prose_val)) + (rule group (seq "(" (star c_wsp) alternation (star c_wsp) ")")) + (rule option (seq "[" (star c_wsp) alternation (star c_wsp) "]")) + (rule char_val (alt case_insensitive_string case_sensitive_string)) + (rule case_insensitive_string (seq (opt "%i") quoted_string)) + (rule case_sensitive_string (seq "%s" quoted_string)) + (rule num_val (seq "%" (alt bin_val dec_val hex_val))) + (terminal rulename (seq ALPHA (star (alt ALPHA DIGIT "-")))) + (terminal defined_as (seq (star c_wsp) (alt "=" "=/") (star c_wsp))) + (terminal c_wsp (alt WSP (seq c_nl WSP))) + (terminal c_nl (alt COMMENT CRLF)) + (terminal comment (seq ";" (star (alt WSP VCHAR)) CRLF)) + (terminal quoted_string (seq DQUOTE (star (range "#x20-#x21#x23-#x7E")) DQUOTE)) + (terminal bin_val (seq "b" (plus BIT) (opt (alt (plus (seq "." (plus BIT))) (seq "-" (plus BIT)))))) + (terminal dec_val + (seq "d" (plus DIGIT) (opt (alt (plus (seq "." (plus DIGIT))) (seq "-" (plus DIGIT)))))) + (terminal hex_val + (seq "x" (plus HEXDIG) (opt (alt (plus (seq "." (plus HEXDIG))) (seq "-" (plus HEXDIG)))))) + (terminal prose_val (seq "<" (star (range "#x20-#x3D#x3F-#x7E")) ">")) + (terminal ALPHA (range "#x41-#x5A#x61-#x7A")) + (terminal BIT (alt "0" "1")) + (terminal CHAR (range "#x01-#x7F")) + (terminal CR (hex "#x0D")) + (terminal CRLF (seq (opt CR) LF)) + (terminal CTL (alt (range "#x00-#x1F") (hex "#x7F"))) + (terminal DIGIT (range "#x30-#x39")) + (terminal DQUOTE (hex "#x22")) + (terminal HEXDIG (alt DIGIT "A" "B" "C" "D" "E" "F")) + (terminal HTAB (hex "#x09")) + (terminal LF (hex "#x0A")) + (terminal LWSP (star (alt WSP (seq CRLF WSP)))) + (terminal OCTET (range "#x00-#xFF")) + (terminal SP (hex "#x20")) + (terminal VCHAR (range "#x21-#x7E")) + (rule rulelist (plus (alt rule (seq (star c_wsp) c_nl))))) diff --git a/etc/iso-ebnf.ebnf b/etc/iso-ebnf.ebnf new file mode 100644 index 0000000..05d6481 --- /dev/null +++ b/etc/iso-ebnf.ebnf @@ -0,0 +1,136 @@ +# W3C EBNF for ISO/IEC 14977 : 1996 EBNF +# (Scoured from https://www.cl.cam.ac.uk/~mgk25/iso-14977.pdf) + +# Extended to allow no syntax_rule to be valid. +syntax ::= syntax_rule* + +syntax_rule ::= meta_identifier defining_symbol definitions_list terminator_symbol + (* A defines the sequences of + symbols represented by a *) + +definitions_list ::= single_definition (definition_separator_symbol definitions_list)* + (* | separates alternative *) + +single_definition ::= term (',' term)* + (* , separates successive *) + +term ::= factor ('-' exception)? + (* A represents any sequence of symbols that is defined by the but + not defined by the *) + +exception ::= factor + (* A may be used as an + if it could be replaced by a + containingno *) + +factor ::= (integer '*')? primary + (* The specifies the number of repetitions of the *) + +primary ::= optional_sequence + | repeated_sequence + | special_sequence + | grouped_sequence + | meta_identifier + | terminal_string + | empty + +optional_sequence ::= start_option_symbol definitions_list end_option_symbol + (* The brackets [ and ] enclose symbols which are optional *) + +repeated_sequence ::= start_repeat_symbol definitions_list end_repeat_symbol + (* The brackets { and } enclose symbols + which may be repeated any number of times *) + +grouped_sequence ::= '(' definitions_list ')' + (* The brackets ( and ) allow any to be a *) + +# Note, the following are nominally terminal rules, +# although ISO EBNF does not really distinguish between non-terminal and terminal rules. + +@terminals + +terminal_string ::= ("'" first_terminal_character+ "'") + | ('"' second_terminal_character+ '"') + (* A represents the + between the quote symbols '_' or "_" *) + +meta_identifier ::= letter meta_identifier_character* + (* A is the name of a syntactic element of the language being defined *) + +integer ::= decimal_digit+ + +special_sequence ::= '?' special_sequence_character* '?' + (* The meaning of a is not defined in the standard metalanguage. *) + +comment ::= start_comment_symbol comment_symbol* end_comment_symbol + (* A comment is allowed anywhere outside a + , , + or *) + +comment_symbol ::= comment | terminal_string | special_sequence | character + +letter ::= [a-zA-Z] +decimal_digit ::= [0-9] + +# Extended to allow '_' +meta_identifier_character ::= letter | decimal_digit | '_' + +first_terminal_character ::= terminal_character - "'" + +second_terminal_character ::= terminal_character - '"' + +special_sequence_character ::= terminal_character - '?' + +terminal_character ::= letter + | decimal_digit + | concatenate_symbol + | defining_symbol + | definition_separator_symbol + | end_comment_symbol + | end_group_symbol + | end_option_symbol + | end_repeat_symbol + | except_symbol + | first_quote_symbol + | repetition_symbol + | second_quote_symbol + | special_sequence_symbol + | start_comment_symbol + | start_group_symbol + | start_option_symbol + | start_repeat_symbol + | terminator_symbol + | other_character + +other_character ::= [:+_%@&$<>^` ̃#x20#x23] | '\' + +gap_separator ::= [#x9#xa#xb#xc#xd#x20] + +@pass gap_separator+ | comment + +empty ::= '' + +# Simple terminals that are often extended +defining_symbol ::= '=' | ':' +definition_separator_symbol ::= '|' | '/' | '!' +terminator_symbol ::= ';' | '.' +start_option_symbol ::= '[' | '(/' +end_option_symbol ::= ']' | '/)' +start_repeat_symbol ::= '{' | '(:' +end_repeat_symbol ::= '}' | ':)' + +# Symbols described, but not actually used. + +gap_free_symbol ::= (terminal_character - ['"]) + | terminal_string + +repetition_symbol ::= '*' +except_symbol ::= '-' +concatenate_symbol ::= ',' +first_quote_symbol ::= "'" +second_quote_symbol ::= '"' +start_comment_symbol ::= '(*' +end_comment_symbol ::= '*)' +start_group_symbol ::= '(' +end_group_symbol ::= ')' +special_sequence_symbol ::= '?' diff --git a/etc/iso-ebnf.isoebnf b/etc/iso-ebnf.isoebnf new file mode 100644 index 0000000..90084f1 --- /dev/null +++ b/etc/iso-ebnf.isoebnf @@ -0,0 +1,123 @@ +(* W3C EBNF for ISO/IEC 14977 : 1996 EBNF *) +(* Scoured from https://www.cl.cam.ac.uk/~mgk25/iso-14977.pdf *) + +syntax = syntax_rule, {syntax_rule} ; + +syntax_rule = meta_identifier, defining_symbol, definitions_list, terminator_symbol + (* A defines the sequences of + symbols represented by a *); + +definitions_list = single_definition, {definition_separator_symbol, definitions_list} + (* | separates alternative *); + +single_definition = term, {',', term} + (* , separates successive *); + +term = factor, ['-', exception] + (* A represents any sequence of symbols that is defined by the but + not defined by the *); + +exception = factor + (* A may be used as an + if it could be replaced by a + containingno *); + +factor = [integer, '*'], primary + (* The specifies the number of repetitions of the *); + +primary = optional_sequence + | repeated_sequence + | special_sequence + | grouped_sequence + | meta_identifier + | terminal_string + | empty + ; + +optional_sequence = start_option_symbol, definitions_list, end_option_symbol + (* The brackets [ and ] enclose symbols which are optional *); + +repeated_sequence = start_repeat_symbol, definitions_list, end_repeat_symbol + (* The brackets { and } enclose symbols + which may be repeated any number of times *); + +grouped_sequence = '(', definitions_list, ')' + (* The brackets ( and ) allow any to be a *); + +terminal_string = ("'", first_terminal_character, {first_terminal_character}, "'") + | ('"', second_terminal_character, {second_terminal_character}, '"') + (* A represents the + between the quote symbols ’_’ or "_" *); + +meta_identifier = letter, {meta_identifier_character} + (* A is the name of a syntactic element of the language being defined *); + +integer = decimal_digit, {decimal_digit} ; + +special_sequence = '?', {special_sequence_character}, '?' + (* The meaning of a is not defined in the standard metalanguage. *); + +comment = ’(*’, {comment_symbol}, ’*)’ + (* A comment is allowed anywhere outside a + , , + or *); + +comment_symbol = comment | terminal_string | special_sequence | character; + +letter = "A" | "B" | "C" | "D" | "E" | "F" | "G" + | "H" | "I" | "J" | "K" | "L" | "M" | "N" + | "O" | "P" | "Q" | "R" | "S" | "T" | "U" + | "V" | "W" | "X" | "Y" | "Z" | "a" | "b" + | "c" | "d" | "e" | "f" | "g" | "h" | "i" + | "j" | "k" | "l" | "m" | "n" | "o" | "p" + | "q" | "r" | "s" | "t" | "u" | "v" | "w" + | "x" | "y" | "z" + ; + +decimal_digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; + +(* Extended to allow '_' *) +meta_identifier_character = letter | decimal_digit | '_' ; + +first_terminal_character = terminal_character - "'" ; + +second_terminal_character = terminal_character - '"' ; + +special_sequence_character = terminal_character - '?' ; + +terminal_character = letter + | decimal_digit + | concatenate_symbol + | defining_symbol + | definition_separator_symbol + | end_comment_symbol + | end_group_symbol + | end_option_symbol + | end_repeat_symbol + | except_symbol + | first_quote_symbol + | repetition_symbol + | second_quote_symbol + | special_sequence_symbol + | start_comment_symbol + | start_group_symbol + | start_option_symbol + | start_repeat_symbol + | terminator_symbol + | other_character + ; + +other_character = ' ' | ':' | '+' | '_' | '%' | '@' | '&' + | '#' | '$' | '<' | '>' | '\' | '^' | '`' + | '~' ; + +empty = ; + +(* Simple terminals that are often extended *) +defining_symbol = '=' | ':' ; +definition_separator_symbol = '|' | '/' | '!' ; +terminator_symbol = ';' | '.' ; +start_option_symbol = '[' | '(/' ; +end_option_symbol = ']' | '/)' ; +start_repeat_symbol = '{' | '(:' ; +end_repeat_symbol = '}' | ':)' ; diff --git a/etc/iso-ebnf.sxp b/etc/iso-ebnf.sxp new file mode 100644 index 0000000..05a13fc --- /dev/null +++ b/etc/iso-ebnf.sxp @@ -0,0 +1,62 @@ + ( + (terminal special_sequence_symbol (seq "?")) + (rule syntax_rule + (seq meta_identifier defining_symbol definitions_list terminator_symbol)) + (rule definitions_list + (seq single_definition (star (seq definition_separator_symbol definitions_list)))) + (rule single_definition (seq term (star (seq "," term)))) + (rule term (seq factor (opt (seq "-" exception)))) + (rule exception (seq factor)) + (rule factor (seq (opt (seq integer "*")) primary)) + (rule primary + (alt optional_sequence repeated_sequence special_sequence grouped_sequence + meta_identifier terminal_string empty )) + (rule optional_sequence + (seq start_option_symbol definitions_list end_option_symbol)) + (rule repeated_sequence + (seq start_repeat_symbol definitions_list end_repeat_symbol)) + (rule grouped_sequence (seq "(" definitions_list ")")) + (terminal terminal_string + (alt + (seq "'" (plus first_terminal_character) "'") + (seq "\"" (plus second_terminal_character) "\"")) ) + (terminal meta_identifier (seq letter (star meta_identifier_character))) + (terminal integer (plus decimal_digit)) + (terminal special_sequence (seq "?" (star special_sequence_character) "?")) + (terminal comment (seq start_comment_symbol (star comment_symbol) end_comment_symbol)) + (terminal comment_symbol (alt comment terminal_string special_sequence character)) + (terminal letter (range "a-zA-Z")) + (terminal decimal_digit (range "0-9")) + (terminal meta_identifier_character (alt letter decimal_digit "_")) + (terminal first_terminal_character (diff terminal_character "'")) + (terminal second_terminal_character (diff terminal_character "\"")) + (terminal special_sequence_character (diff terminal_character "?")) + (terminal terminal_character + (alt letter decimal_digit concatenate_symbol defining_symbol + definition_separator_symbol end_comment_symbol end_group_symbol + end_option_symbol end_repeat_symbol except_symbol first_quote_symbol + repetition_symbol second_quote_symbol special_sequence_symbol + start_comment_symbol start_group_symbol start_option_symbol + start_repeat_symbol terminator_symbol other_character )) + (terminal other_character (alt (range ":+_%@&$<>^` ̃#x20#x23") "\\")) + (terminal gap_separator (range "#x9#xa#xb#xc#xd#x20")) + (pass _pass (alt (plus gap_separator) comment)) + (terminal empty (seq ())) + (terminal defining_symbol (alt "=" ":")) + (terminal definition_separator_symbol (alt "|" "/" "!")) + (terminal terminator_symbol (alt ";" ".")) + (terminal start_option_symbol (alt "[" "(/")) + (terminal end_option_symbol (alt "]" "/)")) + (terminal start_repeat_symbol (alt "{" "(:")) + (terminal end_repeat_symbol (alt "}" ":)")) + (terminal gap_free_symbol (alt (diff terminal_character (range "'\"")) terminal_string)) + (terminal repetition_symbol (seq "*")) + (terminal except_symbol (seq "-")) + (terminal concatenate_symbol (seq ",")) + (terminal first_quote_symbol (seq "'")) + (terminal second_quote_symbol (seq "\"")) + (terminal start_comment_symbol (seq "(*")) + (terminal end_comment_symbol (seq "*)")) + (terminal start_group_symbol (seq "(")) + (terminal end_group_symbol (seq ")")) + (rule syntax (star syntax_rule))) From 07cf933f85dc0d025aae39e03c8bda8fdec42b8b Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Sat, 4 Jul 2020 17:49:13 -0700 Subject: [PATCH 23/50] Update RANGE and O_RANGE to allow just a single range, not multiple ranges in the same expression. --- etc/abnf.ebnf | 6 +- etc/abnf.sxp | 7 ++- etc/ebnf.ebnf | 4 +- etc/ebnf.html | 4 +- etc/ebnf.ll1.sxp | 5 +- etc/ebnf.peg.rb | 14 ++--- etc/ebnf.peg.sxp | 18 +++--- etc/ebnf.sxp | 5 +- etc/iso-ebnf.ebnf | 2 +- etc/iso-ebnf.isoebnf | 4 +- etc/iso-ebnf.sxp | 2 +- examples/abnf/README.md | 56 +++++++++--------- examples/abnf/abnf.ebnf | 6 +- examples/abnf/abnf.peg.sxp | 15 +++-- examples/abnf/abnf.sxp | 9 +-- examples/abnf/doc/parser.html | 57 ++++++++++++------- examples/abnf/meta.rb | 15 +++-- examples/ebnf-ll1-parser/README.md | 9 ++- examples/ebnf-peg-parser/README.md | 9 ++- examples/ebnf-peg-parser/meta.rb | 18 +++--- examples/isoebnf/README.md | 63 ++++++++++++--------- examples/isoebnf/examples/iso-ebnf.isoebnf | 4 +- lib/ebnf/terminals.rb | 4 +- lib/ebnf/writer.rb | 66 ++++++---------------- 24 files changed, 201 insertions(+), 201 deletions(-) diff --git a/etc/abnf.ebnf b/etc/abnf.ebnf index 6e8d708..c6e91e8 100644 --- a/etc/abnf.ebnf +++ b/etc/abnf.ebnf @@ -49,7 +49,7 @@ c_nl ::= COMMENT | CRLF comment ::= ";" (WSP | VCHAR)* CRLF -quoted_string::= DQUOTE [#x20-#x21#x23-#x7E]* DQUOTE +quoted_string::= DQUOTE ([#x20-#x21] | [#x23-#x7E])* DQUOTE # quoted string of SP and VCHAR # without DQUOTE @@ -64,14 +64,14 @@ dec_val ::= "d" DIGIT+ hex_val ::= "x" HEXDIG+ (("." HEXDIG+)+ | ("-" HEXDIG+))? -prose_val ::= "<" [#x20-#x3D#x3F-#x7E]* ">" +prose_val ::= "<" ([#x20-#x3D] | [#x3F-#x7E])* ">" # bracketed string of SP and VCHAR # without angles # prose description, to be used as # last resort # Core terminals available in uses of ABNF -ALPHA ::= [#x41-#x5A#x61-#x7A] # A-Z | a-z +ALPHA ::= [#x41-#x5A] | [#x61-#x7A] # A-Z | a-z BIT ::= '0' | '1' diff --git a/etc/abnf.sxp b/etc/abnf.sxp index 5877eb4..ba8d642 100644 --- a/etc/abnf.sxp +++ b/etc/abnf.sxp @@ -19,14 +19,15 @@ (terminal c_wsp (alt WSP (seq c_nl WSP))) (terminal c_nl (alt COMMENT CRLF)) (terminal comment (seq ";" (star (alt WSP VCHAR)) CRLF)) - (terminal quoted_string (seq DQUOTE (star (range "#x20-#x21#x23-#x7E")) DQUOTE)) + (terminal quoted_string + (seq DQUOTE (star (alt (range "#x20-#x21") (range "#x23-#x7E"))) DQUOTE)) (terminal bin_val (seq "b" (plus BIT) (opt (alt (plus (seq "." (plus BIT))) (seq "-" (plus BIT)))))) (terminal dec_val (seq "d" (plus DIGIT) (opt (alt (plus (seq "." (plus DIGIT))) (seq "-" (plus DIGIT)))))) (terminal hex_val (seq "x" (plus HEXDIG) (opt (alt (plus (seq "." (plus HEXDIG))) (seq "-" (plus HEXDIG)))))) - (terminal prose_val (seq "<" (star (range "#x20-#x3D#x3F-#x7E")) ">")) - (terminal ALPHA (range "#x41-#x5A#x61-#x7A")) + (terminal prose_val (seq "<" (star (alt (range "#x20-#x3D") (range "#x3F-#x7E"))) ">")) + (terminal ALPHA (alt (range "#x41-#x5A") (range "#x61-#x7A"))) (terminal BIT (alt "0" "1")) (terminal CHAR (range "#x01-#x7F")) (terminal CR (hex "#x0D")) diff --git a/etc/ebnf.ebnf b/etc/ebnf.ebnf index 0c9b54e..0d6af57 100644 --- a/etc/ebnf.ebnf +++ b/etc/ebnf.ebnf @@ -42,9 +42,9 @@ [15] O_ENUM ::= '[^' R_CHAR+ | HEX+ ']' - [16] RANGE ::= '[' ((R_CHAR '-' R_CHAR) | (HEX '-' HEX))+ ']' + [16] RANGE ::= '[' ((R_CHAR '-' R_CHAR) | (HEX '-' HEX)) ']' - [17] O_RANGE ::= '[^' ((R_CHAR '-' R_CHAR) | (HEX '-' HEX))+ ']' + [17] O_RANGE ::= '[^' ((R_CHAR '-' R_CHAR) | (HEX '-' HEX)) ']' # Strings are unescaped Unicode, excepting control characters and hash (#) [18] STRING1 ::= '"' (CHAR - '"')* '"' diff --git a/etc/ebnf.html b/etc/ebnf.html index f2b44da..e0ee042 100644 --- a/etc/ebnf.html +++ b/etc/ebnf.html @@ -133,7 +133,7 @@ RANGE ::= -"[" ((R_CHAR "-" R_CHAR) | (HEX "-" HEX))+ "]" +"[" ((R_CHAR "-" R_CHAR) | (HEX "-" HEX)) "]" @@ -141,7 +141,7 @@ O_RANGE ::= -"[^" ((R_CHAR "-" R_CHAR) | (HEX "-" HEX))+ "]" +"[^" ((R_CHAR "-" R_CHAR) | (HEX "-" HEX)) "]" diff --git a/etc/ebnf.ll1.sxp b/etc/ebnf.ll1.sxp index 9395c05..0f71ae7 100644 --- a/etc/ebnf.ll1.sxp +++ b/etc/ebnf.ll1.sxp @@ -158,9 +158,8 @@ (terminal HEX "13" (seq "#x" (plus (alt (range "a-f") (range "A-F") (range "0-9"))))) (terminal ENUM "14" (diff (alt (seq "[" (plus R_CHAR)) (seq (plus HEX) "]")) LHS)) (terminal O_ENUM "15" (alt (seq "[^" (plus R_CHAR)) (seq (plus HEX) "]"))) - (terminal RANGE "16" (seq "[" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) - (terminal O_RANGE "17" - (seq "[^" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) + (terminal RANGE "16" (seq "[" (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX)) "]")) + (terminal O_RANGE "17" (seq "[^" (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX)) "]")) (terminal STRING1 "18" (seq "\"" (star (diff CHAR "\"")) "\"")) (terminal STRING2 "19" (seq "'" (star (diff CHAR "'")) "'")) (terminal CHAR "20" diff --git a/etc/ebnf.peg.rb b/etc/ebnf.peg.rb index 7fc782a..915e74e 100644 --- a/etc/ebnf.peg.rb +++ b/etc/ebnf.peg.rb @@ -47,15 +47,13 @@ module Meta EBNF::Rule.new(:_O_ENUM_2, "15.2", [:seq, :_O_ENUM_4, "]"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_O_ENUM_4, "15.4", [:plus, :HEX], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:RANGE, "16", [:seq, "[", :_RANGE_1, "]"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_RANGE_1, "16.1", [:plus, :_RANGE_2], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_RANGE_2, "16.2", [:alt, :_RANGE_3, :_RANGE_4], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_RANGE_3, "16.3", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_RANGE_4, "16.4", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_1, "16.1", [:alt, :_RANGE_2, :_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_2, "16.2", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_3, "16.3", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:O_RANGE, "17", [:seq, "[^", :_O_RANGE_1, "]"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_RANGE_1, "17.1", [:plus, :_O_RANGE_2], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_RANGE_2, "17.2", [:alt, :_O_RANGE_3, :_O_RANGE_4], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_RANGE_3, "17.3", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_RANGE_4, "17.4", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_1, "17.1", [:alt, :_O_RANGE_2, :_O_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_2, "17.2", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_3, "17.3", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:STRING1, "18", [:seq, "\"", :_STRING1_1, "\""], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_STRING1_1, "18.1", [:star, :_STRING1_2], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_STRING1_2, "18.2", [:diff, :CHAR, "\""], kind: :terminal).extend(EBNF::PEG::Rule), diff --git a/etc/ebnf.peg.sxp b/etc/ebnf.peg.sxp index 8637a8e..1516e43 100644 --- a/etc/ebnf.peg.sxp +++ b/etc/ebnf.peg.sxp @@ -46,15 +46,13 @@ (terminal _O_ENUM_3 "15.3" (plus R_CHAR)) (terminal _O_ENUM_4 "15.4" (plus HEX)) (terminal RANGE "16" (seq "[" _RANGE_1 "]")) - (terminal _RANGE_1 "16.1" (plus _RANGE_2)) - (terminal _RANGE_2 "16.2" (alt _RANGE_3 _RANGE_4)) - (terminal _RANGE_3 "16.3" (seq R_CHAR "-" R_CHAR)) - (terminal _RANGE_4 "16.4" (seq HEX "-" HEX)) + (terminal _RANGE_1 "16.1" (alt _RANGE_2 _RANGE_3)) + (terminal _RANGE_2 "16.2" (seq R_CHAR "-" R_CHAR)) + (terminal _RANGE_3 "16.3" (seq HEX "-" HEX)) (terminal O_RANGE "17" (seq "[^" _O_RANGE_1 "]")) - (terminal _O_RANGE_1 "17.1" (plus _O_RANGE_2)) - (terminal _O_RANGE_2 "17.2" (alt _O_RANGE_3 _O_RANGE_4)) - (terminal _O_RANGE_3 "17.3" (seq R_CHAR "-" R_CHAR)) - (terminal _O_RANGE_4 "17.4" (seq HEX "-" HEX)) + (terminal _O_RANGE_1 "17.1" (alt _O_RANGE_2 _O_RANGE_3)) + (terminal _O_RANGE_2 "17.2" (seq R_CHAR "-" R_CHAR)) + (terminal _O_RANGE_3 "17.3" (seq HEX "-" HEX)) (terminal STRING1 "18" (seq "\"" _STRING1_1 "\"")) (terminal _STRING1_1 "18.1" (star _STRING1_2)) (terminal _STRING1_2 "18.2" (diff CHAR "\"")) @@ -69,8 +67,8 @@ (terminal R_CHAR "21" (diff CHAR "]")) (terminal POSTFIX "22" (range "?*+")) (terminal PASS "23" (plus _PASS_1)) - (terminal _PASS_1 "23.1" (alt _PASS_2 _PASS_3 _PASS_4 _PASS_5)) (terminal _PASS_10 "23.10" (star _PASS_11)) + (terminal _PASS_1 "23.1" (alt _PASS_2 _PASS_3 _PASS_4 _PASS_5)) (terminal _PASS_11 "23.11" (alt _PASS_12 _PASS_13)) (terminal _PASS_12 "23.12" (opt _PASS_14)) (terminal _PASS_13 "23.13" (range "^*")) @@ -80,8 +78,8 @@ (terminal _PASS_17 "23.17" (alt _PASS_18 _PASS_19)) (terminal _PASS_18 "23.18" (opt _PASS_20)) (terminal _PASS_19 "23.19" (range "^*")) - (terminal _PASS_2 "23.2" (range "#x9#xA#xD#x20")) (terminal _PASS_20 "23.20" (seq "*" _PASS_21)) + (terminal _PASS_2 "23.2" (range "#x9#xA#xD#x20")) (terminal _PASS_21 "23.21" (range "^)")) (terminal _PASS_3 "23.3" (seq _PASS_6 _PASS_7)) (terminal _PASS_4 "23.4" (seq "/*" _PASS_10 "*/")) diff --git a/etc/ebnf.sxp b/etc/ebnf.sxp index 7841756..87e6d04 100644 --- a/etc/ebnf.sxp +++ b/etc/ebnf.sxp @@ -16,9 +16,8 @@ (terminal HEX "13" (seq "#x" (plus (alt (range "a-f") (range "A-F") (range "0-9"))))) (terminal ENUM "14" (diff (alt (seq "[" (plus R_CHAR)) (seq (plus HEX) "]")) LHS)) (terminal O_ENUM "15" (alt (seq "[^" (plus R_CHAR)) (seq (plus HEX) "]"))) - (terminal RANGE "16" (seq "[" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) - (terminal O_RANGE "17" - (seq "[^" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) + (terminal RANGE "16" (seq "[" (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX)) "]")) + (terminal O_RANGE "17" (seq "[^" (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX)) "]")) (terminal STRING1 "18" (seq "\"" (star (diff CHAR "\"")) "\"")) (terminal STRING2 "19" (seq "'" (star (diff CHAR "'")) "'")) (terminal CHAR "20" diff --git a/etc/iso-ebnf.ebnf b/etc/iso-ebnf.ebnf index 05d6481..5d37f91 100644 --- a/etc/iso-ebnf.ebnf +++ b/etc/iso-ebnf.ebnf @@ -69,7 +69,7 @@ comment ::= start_comment_symbol comment_symbol* end_comment comment_symbol ::= comment | terminal_string | special_sequence | character -letter ::= [a-zA-Z] +letter ::= [a-z] | [A-Z] decimal_digit ::= [0-9] # Extended to allow '_' diff --git a/etc/iso-ebnf.isoebnf b/etc/iso-ebnf.isoebnf index 90084f1..8bcda08 100644 --- a/etc/iso-ebnf.isoebnf +++ b/etc/iso-ebnf.isoebnf @@ -47,7 +47,7 @@ grouped_sequence = '(', definitions_list, ')' terminal_string = ("'", first_terminal_character, {first_terminal_character}, "'") | ('"', second_terminal_character, {second_terminal_character}, '"') (* A represents the - between the quote symbols ’_’ or "_" *); + between the quote symbols '_' or "_" *); meta_identifier = letter, {meta_identifier_character} (* A is the name of a syntactic element of the language being defined *); @@ -57,7 +57,7 @@ integer = decimal_digit, {decimal_digit} ; special_sequence = '?', {special_sequence_character}, '?' (* The meaning of a is not defined in the standard metalanguage. *); -comment = ’(*’, {comment_symbol}, ’*)’ +comment = '(*', {comment_symbol}, '*)' (* A comment is allowed anywhere outside a , , or *); diff --git a/etc/iso-ebnf.sxp b/etc/iso-ebnf.sxp index 05a13fc..3b89954 100644 --- a/etc/iso-ebnf.sxp +++ b/etc/iso-ebnf.sxp @@ -25,7 +25,7 @@ (terminal special_sequence (seq "?" (star special_sequence_character) "?")) (terminal comment (seq start_comment_symbol (star comment_symbol) end_comment_symbol)) (terminal comment_symbol (alt comment terminal_string special_sequence character)) - (terminal letter (range "a-zA-Z")) + (terminal letter (alt (range "a-z") (range "A-Z"))) (terminal decimal_digit (range "0-9")) (terminal meta_identifier_character (alt letter decimal_digit "_")) (terminal first_terminal_character (diff terminal_character "'")) diff --git a/examples/abnf/README.md b/examples/abnf/README.md index f6d7a43..5faa0d4 100644 --- a/examples/abnf/README.md +++ b/examples/abnf/README.md @@ -15,49 +15,49 @@ Output rules and terminals as [S-Expression][S-Expression]: This generates a [S-Expression][] form of the grammar suitable for use by {EBNF}. ( - (rule rulelist (plus (alt rule (seq (star c-wsp) c-nl)))) - (rule rule (seq rulename defined-as elements c-nl)) - (rule rulename (seq ALPHA (star (alt ALPHA DIGIT "-")))) - (rule defined-as (seq (star c-wsp) (alt "=" "=/") (star c-wsp))) - (rule elements (seq alternation (star c-wsp))) - (rule c-wsp (alt WSP (seq c-nl WSP))) - (rule c-nl (alt comment CRLF)) - (rule comment (seq ";" (star (alt WSP VCHAR)) CRLF)) + (rule rulelist (plus (alt rule (seq (star c_wsp) c_nl)))) + (rule rule (seq rulename defined_as elements c_nl)) + (rule elements (seq alternation (star c_wsp))) (rule alternation - (seq concatenation (star (seq (star c-wsp) "/" (star c-wsp) concatenation)))) - (rule concatenation (seq repetition (star (seq (plus c-wsp) repetition)))) + (seq concatenation (star (seq (star c_wsp) "/" (star c_wsp) concatenation)))) + (rule concatenation (seq repetition (star (seq (plus c_wsp) repetition)))) (rule repetition (seq (opt repeat) element)) (rule repeat (alt (seq (star DIGIT) "*" (star DIGIT)) (plus DIGIT))) - (rule element (alt rulename group option char-val num-val prose-val)) - (rule group (seq "(" (star c-wsp) alternation (star c-wsp) ")")) - (rule option (seq "[" (star c-wsp) alternation (star c-wsp) "]")) - (rule char-val (alt case-insensitive-string case-sensitive-string)) - (rule case-insensitive-string (seq (opt "%i") quoted-string)) - (rule case-sensitive-string (seq "%s" quoted-string)) - (rule quoted-string - (seq DQUOTE (star (alt (range "#x20-#x21") (range "#x23-#x7e"))) DQUOTE)) - (rule num-val (seq "%" (alt bin-val dec-val hex-val))) - (rule bin-val (seq "b" (plus BIT) (opt (alt (plus (seq "." (plus BIT))) (seq "-" (plus BIT)))))) - (rule dec-val + (rule element (alt rulename group option char_val num_val prose_val)) + (rule group (seq "(" (star c_wsp) alternation (star c_wsp) ")")) + (rule option (seq "[" (star c_wsp) alternation (star c_wsp) "]")) + (rule char_val (alt case_insensitive_string case_sensitive_string)) + (rule case_insensitive_string (seq (opt "%i") quoted_string)) + (rule case_sensitive_string (seq "%s" quoted_string)) + (rule num_val (seq "%" (alt bin_val dec_val hex_val))) + (terminal rulename (seq ALPHA (star (alt ALPHA DIGIT "-")))) + (terminal defined_as (seq (star c_wsp) (alt "=" "=/") (star c_wsp))) + (terminal c_wsp (alt WSP (seq c_nl WSP))) + (terminal c_nl (alt COMMENT CRLF)) + (terminal comment (seq ";" (star (alt WSP VCHAR)) CRLF)) + (terminal quoted_string + (seq DQUOTE (star (alt (range "#x20-#x21") (range "#x23-#x7E"))) DQUOTE)) + (terminal bin_val (seq "b" (plus BIT) (opt (alt (plus (seq "." (plus BIT))) (seq "-" (plus BIT)))))) + (terminal dec_val (seq "d" (plus DIGIT) (opt (alt (plus (seq "." (plus DIGIT))) (seq "-" (plus DIGIT)))))) - (rule hex-val + (terminal hex_val (seq "x" (plus HEXDIG) (opt (alt (plus (seq "." (plus HEXDIG))) (seq "-" (plus HEXDIG)))))) - (rule prose-val (seq "<" (star (alt (range "#x20-#x3d") (range "#x3f-#x7e"))) ">")) - (terminal ALPHA (alt (range "#x41-#x5a") (range "#x61-#x7a"))) + (terminal prose_val (seq "<" (star (alt (range "#x20-#x3D") (range "#x3F-#x7E"))) ">")) + (terminal ALPHA (alt (range "#x41-#x5A") (range "#x61-#x7A"))) (terminal BIT (alt "0" "1")) - (terminal CHAR (range "#x1-#x7f")) + (terminal CHAR (range "#x01-#x7F")) (terminal CR (hex "#x0D")) (terminal CRLF (seq (opt CR) LF)) - (terminal CTL (alt (range "#x0-#x1f") (hex "#x7F"))) + (terminal CTL (alt (range "#x00-#x1F") (hex "#x7F"))) (terminal DIGIT (range "#x30-#x39")) (terminal DQUOTE (hex "#x22")) (terminal HEXDIG (alt DIGIT "A" "B" "C" "D" "E" "F")) (terminal HTAB (hex "#x09")) (terminal LF (hex "#x0A")) (terminal LWSP (star (alt WSP (seq CRLF WSP)))) - (terminal OCTET (range "#x0-#xff")) + (terminal OCTET (range "#x00-#xFF")) (terminal SP (hex "#x20")) - (terminal VCHAR (range "#x21-#x7e")) + (terminal VCHAR (range "#x21-#x7E")) (terminal WSP (alt SP HTAB))) This can then be used as input to {EBNF.parse} to transform ABNF to PEG for parsing examples of the grammar using {EBNF::PEG::Parser}. diff --git a/examples/abnf/abnf.ebnf b/examples/abnf/abnf.ebnf index 6e8d708..c6e91e8 100644 --- a/examples/abnf/abnf.ebnf +++ b/examples/abnf/abnf.ebnf @@ -49,7 +49,7 @@ c_nl ::= COMMENT | CRLF comment ::= ";" (WSP | VCHAR)* CRLF -quoted_string::= DQUOTE [#x20-#x21#x23-#x7E]* DQUOTE +quoted_string::= DQUOTE ([#x20-#x21] | [#x23-#x7E])* DQUOTE # quoted string of SP and VCHAR # without DQUOTE @@ -64,14 +64,14 @@ dec_val ::= "d" DIGIT+ hex_val ::= "x" HEXDIG+ (("." HEXDIG+)+ | ("-" HEXDIG+))? -prose_val ::= "<" [#x20-#x3D#x3F-#x7E]* ">" +prose_val ::= "<" ([#x20-#x3D] | [#x3F-#x7E])* ">" # bracketed string of SP and VCHAR # without angles # prose description, to be used as # last resort # Core terminals available in uses of ABNF -ALPHA ::= [#x41-#x5A#x61-#x7A] # A-Z | a-z +ALPHA ::= [#x41-#x5A] | [#x61-#x7A] # A-Z | a-z BIT ::= '0' | '1' diff --git a/examples/abnf/abnf.peg.sxp b/examples/abnf/abnf.peg.sxp index 3373889..37fb27c 100644 --- a/examples/abnf/abnf.peg.sxp +++ b/examples/abnf/abnf.peg.sxp @@ -50,7 +50,9 @@ (rule _comment_2 (alt WSP VCHAR)) (terminal quoted_string (seq DQUOTE _quoted_string_1 DQUOTE)) (rule _quoted_string_1 (star _quoted_string_2)) - (terminal _quoted_string_2 (range "#x20-#x21#x23-#x7E")) + (rule _quoted_string_2 (alt _quoted_string_3 _quoted_string_4)) + (terminal _quoted_string_3 (range "#x20-#x21")) + (terminal _quoted_string_4 (range "#x23-#x7E")) (terminal bin_val (seq "b" _bin_val_1 _bin_val_2)) (rule _bin_val_1 (plus BIT)) (rule _bin_val_2 (opt _bin_val_3)) @@ -80,8 +82,12 @@ (rule _hex_val_8 (plus HEXDIG)) (terminal prose_val (seq "<" _prose_val_1 ">")) (rule _prose_val_1 (star _prose_val_2)) - (terminal _prose_val_2 (range "#x20-#x3D#x3F-#x7E")) - (terminal ALPHA (range "#x41-#x5A#x61-#x7A")) + (rule _prose_val_2 (alt _prose_val_3 _prose_val_4)) + (terminal _prose_val_3 (range "#x20-#x3D")) + (terminal _prose_val_4 (range "#x3F-#x7E")) + (terminal ALPHA (alt _ALPHA_1 _ALPHA_2)) + (terminal _ALPHA_1 (range "#x41-#x5A")) + (terminal _ALPHA_2 (range "#x61-#x7A")) (terminal BIT (alt "0" "1")) (terminal CHAR (range "#x01-#x7F")) (terminal CR (hex "#x0D")) @@ -92,8 +98,7 @@ (terminal _CTL_2 (hex "#x7F")) (terminal DIGIT (range "#x30-#x39")) (terminal DQUOTE (hex "#x22")) - (terminal HEXDIG (alt DIGIT _HEXDIG_1)) - (terminal _HEXDIG_1 (range "A-F")) + (terminal HEXDIG (alt DIGIT "A" "B" "C" "D" "E" "F")) (terminal HTAB (hex "#x09")) (terminal LF (hex "#x0A")) (terminal LWSP (star _LWSP_1)) diff --git a/examples/abnf/abnf.sxp b/examples/abnf/abnf.sxp index 4b96a3e..3fd8590 100644 --- a/examples/abnf/abnf.sxp +++ b/examples/abnf/abnf.sxp @@ -19,14 +19,15 @@ (terminal c_wsp (alt WSP (seq c_nl WSP))) (terminal c_nl (alt COMMENT CRLF)) (terminal comment (seq ";" (star (alt WSP VCHAR)) CRLF)) - (terminal quoted_string (seq DQUOTE (star (range "#x20-#x21#x23-#x7E")) DQUOTE)) + (terminal quoted_string + (seq DQUOTE (star (alt (range "#x20-#x21") (range "#x23-#x7E"))) DQUOTE)) (terminal bin_val (seq "b" (plus BIT) (opt (alt (plus (seq "." (plus BIT))) (seq "-" (plus BIT)))))) (terminal dec_val (seq "d" (plus DIGIT) (opt (alt (plus (seq "." (plus DIGIT))) (seq "-" (plus DIGIT)))))) (terminal hex_val (seq "x" (plus HEXDIG) (opt (alt (plus (seq "." (plus HEXDIG))) (seq "-" (plus HEXDIG)))))) - (terminal prose_val (seq "<" (star (range "#x20-#x3D#x3F-#x7E")) ">")) - (terminal ALPHA (range "#x41-#x5A#x61-#x7A")) + (terminal prose_val (seq "<" (star (alt (range "#x20-#x3D") (range "#x3F-#x7E"))) ">")) + (terminal ALPHA (alt (range "#x41-#x5A") (range "#x61-#x7A"))) (terminal BIT (alt "0" "1")) (terminal CHAR (range "#x01-#x7F")) (terminal CR (hex "#x0D")) @@ -34,7 +35,7 @@ (terminal CTL (alt (range "#x00-#x1F") (hex "#x7F"))) (terminal DIGIT (range "#x30-#x39")) (terminal DQUOTE (hex "#x22")) - (terminal HEXDIG (alt DIGIT (range "A-F"))) + (terminal HEXDIG (alt DIGIT "A" "B" "C" "D" "E" "F")) (terminal HTAB (hex "#x09")) (terminal LF (hex "#x0A")) (terminal LWSP (star (alt WSP (seq CRLF WSP)))) diff --git a/examples/abnf/doc/parser.html b/examples/abnf/doc/parser.html index 5c26cae..c3e79b9 100644 --- a/examples/abnf/doc/parser.html +++ b/examples/abnf/doc/parser.html @@ -667,7 +667,7 @@

EBNF Parser for EBNF.

hex_val ::= "x" HEXDIG+ (("." HEXDIG+)+ | ("-" HEXDIG+))?

-
  terminal(:hex_val, /x[0-9A-F]+(?:(?:(?:\.[0-9A-F]+)+)|(?:-[0-9A-F]+))?/) do |value|
+        
  terminal(:hex_val, /x[0-9A-F]+(?:(?:(?:\.[0-9A-F]+)+)|(?:-[0-9A-F]+))?/i) do |value|
     if value.include?('.')
@@ -838,6 +838,7 @@

Non-terminal productions

      raise "Redefining rule #{sym}" if parsed_rules.has_key?(sym)
       parsed_rules[sym] = EBNF::Rule.new(sym.to_sym, nil, elements)
     end
+    progress(:rule, level: 2) {parsed_rules[sym].to_sxp}
     sym
   end
@@ -1000,10 +1001,12 @@

Non-terminal productions

-

char_val ::= case_insensitive_string | case_sensitive_string

+

case_insensitive_string ::= "%i"? quoted_string

-
  production(:char_val) do |value|
+
  production(:case_insensitive_string) do |value|
+    str = value.last[:quoted_string]
+    if str.match?(/[[:alpha:]]/)
@@ -1011,10 +1014,13 @@

Non-terminal productions

-

FIXME: need rule logic for insensitive matching of strings

+

Only need to use case-insensitive if there are alphabetic characters in the string.

-
    value.last[:quoted_string]
+        
      [:istr, value.last[:quoted_string]]
+    else
+      value.last[:quoted_string]
+    end
   end
@@ -1023,6 +1029,19 @@

Non-terminal productions

+

case_sensitive_string ::= "%s" quoted_string

+ + +
  production(:case_sensitive_string) do |value|
+    value.last[:quoted_string]
+  end
+ + + + +
+ +

num_val ::= "%" (bin_val | dec_val | hex_val)

@@ -1050,10 +1069,10 @@

Parser invocation.

  def initialize(input, **options, &block)
- +
- +

If the level option is set, instantiate a logger for collecting trace information.

@@ -1065,10 +1084,10 @@

Parser invocation.

end
- +
- +

Read input, if necessary, which will be used in a Scanner.

@@ -1078,10 +1097,10 @@

Parser invocation.

@parsed_rules = {}
- +
- +

Parses into @parsed_rules

@@ -1094,10 +1113,10 @@

Parser invocation.

end
- +
- +

The AST includes the parsed rules along with built-in rules for ABNF used within the parsed grammar.

@@ -1107,10 +1126,10 @@

Parser invocation.

  def ast
- +
- +

Add built-in rules for standard ABNF rules not

@@ -1124,10 +1143,10 @@

Parser invocation.

end
- +
- +

Output formatted S-Expression of grammar

@@ -1136,10 +1155,10 @@

Parser invocation.

require 'sxp' unless defined?(SXP) - +
- +

Output rules as a formatted S-Expression

diff --git a/examples/abnf/meta.rb b/examples/abnf/meta.rb index b5ce638..c1e8c8e 100644 --- a/examples/abnf/meta.rb +++ b/examples/abnf/meta.rb @@ -53,7 +53,9 @@ module ABNFMeta EBNF::Rule.new(:_comment_2, nil, [:alt, :WSP, :VCHAR]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:quoted_string, nil, [:seq, :DQUOTE, :_quoted_string_1, :DQUOTE], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_quoted_string_1, nil, [:star, :_quoted_string_2]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_quoted_string_2, nil, [:range, "#x20-#x21#x23-#x7E"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_quoted_string_2, nil, [:alt, :_quoted_string_3, :_quoted_string_4]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_quoted_string_3, nil, [:range, "#x20-#x21"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_quoted_string_4, nil, [:range, "#x23-#x7E"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:bin_val, nil, [:seq, "b", :_bin_val_1, :_bin_val_2], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_bin_val_1, nil, [:plus, :BIT]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_bin_val_2, nil, [:opt, :_bin_val_3]).extend(EBNF::PEG::Rule), @@ -83,8 +85,12 @@ module ABNFMeta EBNF::Rule.new(:_hex_val_8, nil, [:plus, :HEXDIG]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:prose_val, nil, [:seq, "<", :_prose_val_1, ">"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_prose_val_1, nil, [:star, :_prose_val_2]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_prose_val_2, nil, [:range, "#x20-#x3D#x3F-#x7E"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:ALPHA, nil, [:range, "#x41-#x5A#x61-#x7A"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_prose_val_2, nil, [:alt, :_prose_val_3, :_prose_val_4]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_prose_val_3, nil, [:range, "#x20-#x3D"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_prose_val_4, nil, [:range, "#x3F-#x7E"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:ALPHA, nil, [:alt, :_ALPHA_1, :_ALPHA_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_ALPHA_1, nil, [:range, "#x41-#x5A"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_ALPHA_2, nil, [:range, "#x61-#x7A"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:BIT, nil, [:alt, "0", "1"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:CHAR, nil, [:range, "#x01-#x7F"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:CR, nil, [:hex, "#x0D"], kind: :terminal).extend(EBNF::PEG::Rule), @@ -95,8 +101,7 @@ module ABNFMeta EBNF::Rule.new(:_CTL_2, nil, [:hex, "#x7F"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:DIGIT, nil, [:range, "#x30-#x39"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:DQUOTE, nil, [:hex, "#x22"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:HEXDIG, nil, [:alt, :DIGIT, :_HEXDIG_1], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_HEXDIG_1, nil, [:range, "A-F"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:HEXDIG, nil, [:alt, :DIGIT, "A", "B", "C", "D", "E", "F"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:HTAB, nil, [:hex, "#x09"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:LF, nil, [:hex, "#x0A"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:LWSP, nil, [:star, :_LWSP_1], kind: :terminal).extend(EBNF::PEG::Rule), diff --git a/examples/ebnf-ll1-parser/README.md b/examples/ebnf-ll1-parser/README.md index e30dcd7..f347b9a 100644 --- a/examples/ebnf-ll1-parser/README.md +++ b/examples/ebnf-ll1-parser/README.md @@ -32,9 +32,8 @@ This generates a [S-Expression][] form of the grammar suitable for use by {EBNF} (terminal HEX "13" (seq "#x" (plus (alt (range "a-f") (range "A-F") (range "0-9"))))) (terminal ENUM "14" (diff (alt (seq "[" (plus R_CHAR)) (seq (plus HEX) "]")) LHS)) (terminal O_ENUM "15" (alt (seq "[^" (plus R_CHAR)) (seq (plus HEX) "]"))) - (terminal RANGE "16" (seq "[" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) - (terminal O_RANGE "17" - (seq "[^" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) + (terminal RANGE "16" (seq "[" (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX)) "]")) + (terminal O_RANGE "17" (seq "[^" (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX)) "]")) (terminal STRING1 "18" (seq "\"" (star (diff CHAR "\"")) "\"")) (terminal STRING2 "19" (seq "'" (star (diff CHAR "'")) "'")) (terminal CHAR "20" @@ -48,8 +47,8 @@ This generates a [S-Expression][] form of the grammar suitable for use by {EBNF} (terminal PASS "23" (plus (alt - (range "#x00-#x20") - (seq (alt (diff "#" "#x") "//") (star (range "^#x0A#x0Dx"))) + (range "#x9#xA#xD#x20") + (seq (alt (diff "#" "#x") "//") (star (range "^#xA#xD"))) (seq "/*" (star (alt (opt (seq "*" (range "^/"))) (range "^*"))) "*/") (seq "(*" (star (alt (opt (seq "*" (range "^)"))) (range "^*"))) "*)")) )) ) diff --git a/examples/ebnf-peg-parser/README.md b/examples/ebnf-peg-parser/README.md index a51ccb3..0981538 100644 --- a/examples/ebnf-peg-parser/README.md +++ b/examples/ebnf-peg-parser/README.md @@ -32,9 +32,8 @@ This generates a [S-Expression][] form of the grammar suitable for use by {EBNF} (terminal HEX "13" (seq "#x" (plus (alt (range "a-f") (range "A-F") (range "0-9"))))) (terminal ENUM "14" (diff (alt (seq "[" (plus R_CHAR)) (seq (plus HEX) "]")) LHS)) (terminal O_ENUM "15" (alt (seq "[^" (plus R_CHAR)) (seq (plus HEX) "]"))) - (terminal RANGE "16" (seq "[" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) - (terminal O_RANGE "17" - (seq "[^" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) + (terminal RANGE "16" (seq "[" (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX)) "]")) + (terminal O_RANGE "17" (seq "[^" (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX)) "]")) (terminal STRING1 "18" (seq "\"" (star (diff CHAR "\"")) "\"")) (terminal STRING2 "19" (seq "'" (star (diff CHAR "'")) "'")) (terminal CHAR "20" @@ -48,8 +47,8 @@ This generates a [S-Expression][] form of the grammar suitable for use by {EBNF} (terminal PASS "23" (plus (alt - (range "#x00-#x20") - (seq (alt (diff "#" "#x") "//") (star (range "^#x0A#x0Dx"))) + (range "#x9#xA#xD#x20") + (seq (alt (diff "#" "#x") "//") (star (range "^#xA#xD"))) (seq "/*" (star (alt (opt (seq "*" (range "^/"))) (range "^*"))) "*/") (seq "(*" (star (alt (opt (seq "*" (range "^)"))) (range "^*"))) "*)")) )) ) diff --git a/examples/ebnf-peg-parser/meta.rb b/examples/ebnf-peg-parser/meta.rb index 2aaa876..535c52b 100644 --- a/examples/ebnf-peg-parser/meta.rb +++ b/examples/ebnf-peg-parser/meta.rb @@ -47,15 +47,13 @@ module EBNFPegMeta EBNF::Rule.new(:_O_ENUM_2, "15.2", [:seq, :_O_ENUM_4, "]"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_O_ENUM_4, "15.4", [:plus, :HEX], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:RANGE, "16", [:seq, "[", :_RANGE_1, "]"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_RANGE_1, "16.1", [:plus, :_RANGE_2], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_RANGE_2, "16.2", [:alt, :_RANGE_3, :_RANGE_4], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_RANGE_3, "16.3", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_RANGE_4, "16.4", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_1, "16.1", [:alt, :_RANGE_2, :_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_2, "16.2", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_3, "16.3", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:O_RANGE, "17", [:seq, "[^", :_O_RANGE_1, "]"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_RANGE_1, "17.1", [:plus, :_O_RANGE_2], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_RANGE_2, "17.2", [:alt, :_O_RANGE_3, :_O_RANGE_4], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_RANGE_3, "17.3", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_RANGE_4, "17.4", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_1, "17.1", [:alt, :_O_RANGE_2, :_O_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_2, "17.2", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_3, "17.3", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:STRING1, "18", [:seq, "\"", :_STRING1_1, "\""], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_STRING1_1, "18.1", [:star, :_STRING1_2], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_STRING1_2, "18.2", [:diff, :CHAR, "\""], kind: :terminal).extend(EBNF::PEG::Rule), @@ -71,12 +69,12 @@ module EBNFPegMeta EBNF::Rule.new(:POSTFIX, "22", [:range, "?*+"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:PASS, "23", [:plus, :_PASS_1], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_PASS_1, "23.1", [:alt, :_PASS_2, :_PASS_3, :_PASS_4, :_PASS_5], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_2, "23.2", [:range, "#x00-#x20"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_2, "23.2", [:range, "#x9#xA#xD#x20"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_PASS_3, "23.3", [:seq, :_PASS_6, :_PASS_7], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_PASS_6, "23.6", [:alt, :_PASS_8, "//"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_PASS_8, "23.8", [:diff, "#", "#x"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_PASS_7, "23.7", [:star, :_PASS_9], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_9, "23.9", [:range, "^#x0A#x0Dx"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_9, "23.9", [:range, "^#xA#xD"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_PASS_4, "23.4", [:seq, "/*", :_PASS_10, "*/"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_PASS_10, "23.10", [:star, :_PASS_11], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_PASS_11, "23.11", [:alt, :_PASS_12, :_PASS_13], kind: :terminal).extend(EBNF::PEG::Rule), diff --git a/examples/isoebnf/README.md b/examples/isoebnf/README.md index 957fb6e..2789438 100644 --- a/examples/isoebnf/README.md +++ b/examples/isoebnf/README.md @@ -32,39 +32,50 @@ This generates a [S-Expression][] form of the grammar suitable for use by {EBNF} (rule repeated_sequence (seq start_repeat_symbol definitions_list end_repeat_symbol)) (rule grouped_sequence (seq "(" definitions_list ")")) - (rule letter - (alt "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" - "S" "T" "U" "V" "W" "X" "Y" "Z" "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" - "l" "m" "n" "o" "p" "q" "r" "s" "t" "u" "v" "w" "x" "y" "z" )) - (rule decimal_digit (alt "0" "1" "2" "3" "4" "5" "6" "7" "8" "9")) - (rule integer (seq decimal_digit (star decimal_digit))) - (rule meta_identifier (seq letter (star meta_identifier_character))) - (rule meta_identifier_character (alt letter decimal_digit "_")) - (rule terminal_string + (terminal terminal_string (alt - (seq (seq "'" first_terminal_character (star first_terminal_character) "'")) - (seq (seq "\"" second_terminal_character (star second_terminal_character) "\""))) ) - (rule first_terminal_character (seq terminal_character)) - (rule second_terminal_character (seq terminal_character)) - (rule special_sequence (seq "?" (star special_sequence_character) "?")) - (rule special_sequence_character (seq terminal_character)) - (rule terminal_character + (seq "'" (plus first_terminal_character) "'") + (seq "\"" (plus second_terminal_character) "\"")) ) + (terminal meta_identifier (seq letter (star meta_identifier_character))) + (terminal integer (plus decimal_digit)) + (terminal special_sequence (seq "?" (star special_sequence_character) "?")) + (terminal comment (seq start_comment_symbol (star comment_symbol) end_comment_symbol)) + (terminal comment_symbol (alt comment terminal_string special_sequence character)) + (terminal letter (range "a-zA-Z")) + (terminal decimal_digit (range "0-9")) + (terminal meta_identifier_character (alt letter decimal_digit "_")) + (terminal first_terminal_character (diff terminal_character "'")) + (terminal second_terminal_character (diff terminal_character "\"")) + (terminal special_sequence_character (diff terminal_character "?")) + (terminal terminal_character (alt letter decimal_digit concatenate_symbol defining_symbol definition_separator_symbol end_comment_symbol end_group_symbol end_option_symbol end_repeat_symbol except_symbol first_quote_symbol repetition_symbol second_quote_symbol special_sequence_symbol start_comment_symbol start_group_symbol start_option_symbol start_repeat_symbol terminator_symbol other_character )) - (rule other_character - (alt " " ":" "+" "_" "%" "@" "&" "#" "$" "<" ">" "\\" "^" "`" "~")) - (rule empty (seq "")) - (rule defining_symbol (alt "=" ":")) - (rule definition_separator_symbol (alt "|" "/" "!")) - (rule terminator_symbol (alt ";" ".")) - (rule start_option_symbol (alt "[" "(/")) - (rule end_option_symbol (alt "]" "/)")) - (rule start_repeat_symbol (alt "{" "(:")) - (rule end_repeat_symbol (alt "}" ":)"))) + (terminal other_character (alt (range ":+_%@&$<>^` ̃#x20#x23") "\\")) + (terminal gap_separator (range "#x9#xa#xb#xc#xd#x20")) + (pass _pass (alt (plus gap_separator) comment)) + (terminal empty (seq ())) + (terminal defining_symbol (alt "=" ":")) + (terminal definition_separator_symbol (alt "|" "/" "!")) + (terminal terminator_symbol (alt ";" ".")) + (terminal start_option_symbol (alt "[" "(/")) + (terminal end_option_symbol (alt "]" "/)")) + (terminal start_repeat_symbol (alt "{" "(:")) + (terminal end_repeat_symbol (alt "}" ":)")) + (terminal gap_free_symbol (alt (diff terminal_character (range "'\"")) terminal_string)) + (terminal repetition_symbol (seq "*")) + (terminal except_symbol (seq "-")) + (terminal concatenate_symbol (seq ",")) + (terminal first_quote_symbol (seq "'")) + (terminal second_quote_symbol (seq "\"")) + (terminal start_comment_symbol (seq "(*")) + (terminal end_comment_symbol (seq "*)")) + (terminal start_group_symbol (seq "(")) + (terminal end_group_symbol (seq ")")) + (terminal special_sequence_symbol (seq "?"))) This can then be used as input to {EBNF.parse} to transform [EBNF][] to [PEG][] for parsing examples of the grammar using {EBNF::PEG::Parser}. diff --git a/examples/isoebnf/examples/iso-ebnf.isoebnf b/examples/isoebnf/examples/iso-ebnf.isoebnf index 90084f1..8bcda08 100644 --- a/examples/isoebnf/examples/iso-ebnf.isoebnf +++ b/examples/isoebnf/examples/iso-ebnf.isoebnf @@ -47,7 +47,7 @@ grouped_sequence = '(', definitions_list, ')' terminal_string = ("'", first_terminal_character, {first_terminal_character}, "'") | ('"', second_terminal_character, {second_terminal_character}, '"') (* A represents the - between the quote symbols ’_’ or "_" *); + between the quote symbols '_' or "_" *); meta_identifier = letter, {meta_identifier_character} (* A is the name of a syntactic element of the language being defined *); @@ -57,7 +57,7 @@ integer = decimal_digit, {decimal_digit} ; special_sequence = '?', {special_sequence_character}, '?' (* The meaning of a is not defined in the standard metalanguage. *); -comment = ’(*’, {comment_symbol}, ’*)’ +comment = '(*', {comment_symbol}, '*)' (* A comment is allowed anywhere outside a , , or *); diff --git a/lib/ebnf/terminals.rb b/lib/ebnf/terminals.rb index 4ad3ea7..18e98de 100644 --- a/lib/ebnf/terminals.rb +++ b/lib/ebnf/terminals.rb @@ -6,11 +6,11 @@ module EBNF::Terminals HEX = %r(\#x[a-fA-F0-9]+)u.freeze CHAR = %r([\u0009\u000A\u000D\u0020-\uD7FF\u{10000}-\u{10FFFF}])u.freeze R_CHAR = %r([\u0009\u000A\u000D\u0020-\u005C\u005E-\uD7FF\u{10000}-\u{10FFFF}])u.freeze - RANGE = %r(\[(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}-#{HEX}))+\])u.freeze + RANGE = %r(\[(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}-#{HEX}))\])u.freeze ENUM_BASE = %r(\[(?:(?:#{R_CHAR})+|(?:#{HEX})+)\])u.freeze ENUM = %r(#{ENUM_BASE}(?!\s+#{SYMBOL_BASE}\s*::=))u.freeze LHS = %r((?:\[#{SYMBOL_BASE}\])?\s*#{SYMBOL_BASE}\s*::=)u.freeze - O_RANGE = %r(\[^(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}-#{HEX}))+\])u.freeze + O_RANGE = %r(\[^(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}-#{HEX}))\])u.freeze O_ENUM = %r(\[^#{R_CHAR}+\])u.freeze STRING1 = %r("[\u0009\u000A\u000D\u0020\u0021\u0023-\uD7FF\u{10000}-\u{10FFFF}]*")u.freeze STRING2 = %r('[\u0009\u000A\u000D\u0020-\u0026\u0028-\uD7FF\u{10000}-\u{10FFFF}]*')u.freeze diff --git a/lib/ebnf/writer.rb b/lib/ebnf/writer.rb index c546079..599b109 100644 --- a/lib/ebnf/writer.rb +++ b/lib/ebnf/writer.rb @@ -347,18 +347,10 @@ def format_abnf_char(c) # FIXME: O_RANGE def format_abnf_range(string) if string.include?('-') - # Might include multiple ranges - # #x01-#x03#x05-#x06 - # a-bc-d dash = (@options[:html] ? "- " : "-") # Split into separate range segments if string.start_with?('#x') - ranges = [] - scanner = StringScanner.new(string) - while !scanner.eos? - ranges << scanner.scan(/#x\h+-#x\h+/) - end - ranges.map {|range|"%x" + range.gsub('#x', '').sub('-', dash)}.join(" / ") + '%x' + string[2..-1].gsub('#x', '') else '%d' + string.gsub(/[^-]/) {|c| c.ord} end @@ -473,55 +465,31 @@ def format_isoebnf(expr, sep: nil, embedded: false) # FIXME: O_RANGE def format_isoebnf_range(string) chars = [] - scanner = StringScanner.new(string) if string.include?('-') - ranges = [] - # Might include multiple ranges - # #x01-#x03#x05-#x06 - # a-bc-d - # Split into separate range segments - if string.start_with?('#x') - while !scanner.eos? - ranges << scanner.scan(/#x\h+-#x\h+/) - end - ranges.each do |range| - first, last = range.split('-').map {|h| h[2..-1].hex.ord} - while first <= last - c = first.chr(Encoding::UTF_8) - raise RangeError, "cannot format #{string.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless - ISOEBNF::TERMINAL_CHARACTER.match?(c) - chars << c - first += 1 - end - end + first, last = if string.start_with?('#x') + string.split('-').map {|h| h[2..-1].hex.ord} else - while !scanner.eos? - r = scanner.scan(/.-./) - require 'byebug'; byebug unless r - ranges << r - end - ranges.each do |range| - first, last = range.split('-').map {|c| c.ord} - while first <= last - c = first.chr(Encoding::UTF_8) - raise RangeError, "cannot format #{string.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless - ISOEBNF::TERMINAL_CHARACTER.match?(c) - chars << c - first += 1 - end - end + string.split('-').map {|c| c.ord} + end + while first <= last + c = first.chr(Encoding::UTF_8) + raise RangeError, "cannot format #{string.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless + ISOEBNF::TERMINAL_CHARACTER.match?(c) + chars << c + first += 1 end else + scanner = StringScanner.new(string) while !scanner.eos? - c = if hex = scanner.scan(/#x\h+/) - hex[2..-1].hex.ord.chr(Encoding::UTF_8) + c = if h = scanner.scan(/#x\h+/) + h[2..-1].hex.ord.chr(Encoding::UTF_8) else scanner.scan(/./) end + raise RangeError, "cannot format #{string.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless + ISOEBNF::TERMINAL_CHARACTER.match?(c) + chars << c end - raise RangeError, "cannot format #{string.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless - ISOEBNF::TERMINAL_CHARACTER.match?(c) - chars << c end end From 22883db22fdb6e7874621f70f572ffe3121863f6 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Sun, 5 Jul 2020 15:14:01 -0700 Subject: [PATCH 24/50] Tweak R_CHAR terminal to not allow either ']' or '-'. Add better rule validation for ranges, istr and hex. --- bin/ebnf | 2 + etc/ebnf.ebnf | 4 +- etc/ebnf.html | 2 +- etc/ebnf.ll1.sxp | 2 +- etc/ebnf.peg.rb | 3 +- etc/ebnf.peg.sxp | 7 ++- etc/ebnf.sxp | 2 +- lib/ebnf/base.rb | 16 +++-- lib/ebnf/rule.rb | 75 +++++++++++++++++++---- lib/ebnf/terminals.rb | 4 +- spec/examples/ebnf-parser-spec.rb | 46 -------------- spec/rule_spec.rb | 99 +++++++++++++++++++++++++++++-- 12 files changed, 183 insertions(+), 79 deletions(-) delete mode 100644 spec/examples/ebnf-parser-spec.rb diff --git a/bin/ebnf b/bin/ebnf index 0462edc..c813e85 100755 --- a/bin/ebnf +++ b/bin/ebnf @@ -32,6 +32,7 @@ OPT_ARGS = [ ["--prefix", "-p", GetoptLong::REQUIRED_ARGUMENT,"Prefix to use when generating Turtle"], ["--progress", "-v", GetoptLong::NO_ARGUMENT, "Detail on execution"], ["--namespace", "-n", GetoptLong::REQUIRED_ARGUMENT,"Namespace to use when generating Turtle"], + ["--validate", GetoptLong::NO_ARGUMENT, "Validate grammar"], ["--help", "-?", GetoptLong::NO_ARGUMENT, "This message"] ] def usage @@ -76,6 +77,7 @@ opts.each do |opt, arg| when '--prefix' then options[:prefix] = arg when '--namespace' then options[:namespace] = arg when '--progress' then options[:progress] = true + when '--validate' then options[:validate] = true when '--help' then usage end end diff --git a/etc/ebnf.ebnf b/etc/ebnf.ebnf index 0d6af57..70baf33 100644 --- a/etc/ebnf.ebnf +++ b/etc/ebnf.ebnf @@ -38,7 +38,7 @@ [13] HEX ::= '#x' ([a-f] | [A-F] | [0-9])+ - [14] ENUM ::= ('[' R_CHAR+ | HEX+ ']') - LHS + [14] ENUM ::= ('[' R_CHAR+ | HEX+ ']') - LHS # exclusively R_CHAR or HEX [15] O_ENUM ::= '[^' R_CHAR+ | HEX+ ']' @@ -53,7 +53,7 @@ [20] CHAR ::= [#x9#xA#xD] | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] - [21] R_CHAR ::= CHAR - ']' + [21] R_CHAR ::= CHAR - (']' | '-') [22] POSTFIX ::= [?*+] diff --git a/etc/ebnf.html b/etc/ebnf.html index e0ee042..0cc61d0 100644 --- a/etc/ebnf.html +++ b/etc/ebnf.html @@ -176,7 +176,7 @@ R_CHAR ::= -CHAR - "]" +CHAR - ("]" | "-") diff --git a/etc/ebnf.ll1.sxp b/etc/ebnf.ll1.sxp index 0f71ae7..4c20739 100644 --- a/etc/ebnf.ll1.sxp +++ b/etc/ebnf.ll1.sxp @@ -168,7 +168,7 @@ (range "#x20-#xD7FF") (range "#xE000-#xFFFD") (range "#x10000-#x10FFFF")) ) - (terminal R_CHAR "21" (diff CHAR "]")) + (terminal R_CHAR "21" (diff CHAR (alt "]" "-"))) (terminal POSTFIX "22" (range "?*+")) (terminal PASS "23" (plus diff --git a/etc/ebnf.peg.rb b/etc/ebnf.peg.rb index 915e74e..0c8cda8 100644 --- a/etc/ebnf.peg.rb +++ b/etc/ebnf.peg.rb @@ -65,7 +65,8 @@ module Meta EBNF::Rule.new(:_CHAR_2, "20.2", [:range, "#x20-#xD7FF"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_CHAR_3, "20.3", [:range, "#xE000-#xFFFD"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_CHAR_4, "20.4", [:range, "#x10000-#x10FFFF"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, :_R_CHAR_1], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_R_CHAR_1, "21.1", [:alt, "]", "-"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:POSTFIX, "22", [:range, "?*+"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:PASS, "23", [:plus, :_PASS_1], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_PASS_1, "23.1", [:alt, :_PASS_2, :_PASS_3, :_PASS_4, :_PASS_5], kind: :terminal).extend(EBNF::PEG::Rule), diff --git a/etc/ebnf.peg.sxp b/etc/ebnf.peg.sxp index 1516e43..a1d5e77 100644 --- a/etc/ebnf.peg.sxp +++ b/etc/ebnf.peg.sxp @@ -64,11 +64,12 @@ (terminal _CHAR_2 "20.2" (range "#x20-#xD7FF")) (terminal _CHAR_3 "20.3" (range "#xE000-#xFFFD")) (terminal _CHAR_4 "20.4" (range "#x10000-#x10FFFF")) - (terminal R_CHAR "21" (diff CHAR "]")) + (terminal R_CHAR "21" (diff CHAR _R_CHAR_1)) + (terminal _R_CHAR_1 "21.1" (alt "]" "-")) (terminal POSTFIX "22" (range "?*+")) (terminal PASS "23" (plus _PASS_1)) - (terminal _PASS_10 "23.10" (star _PASS_11)) (terminal _PASS_1 "23.1" (alt _PASS_2 _PASS_3 _PASS_4 _PASS_5)) + (terminal _PASS_10 "23.10" (star _PASS_11)) (terminal _PASS_11 "23.11" (alt _PASS_12 _PASS_13)) (terminal _PASS_12 "23.12" (opt _PASS_14)) (terminal _PASS_13 "23.13" (range "^*")) @@ -78,8 +79,8 @@ (terminal _PASS_17 "23.17" (alt _PASS_18 _PASS_19)) (terminal _PASS_18 "23.18" (opt _PASS_20)) (terminal _PASS_19 "23.19" (range "^*")) - (terminal _PASS_20 "23.20" (seq "*" _PASS_21)) (terminal _PASS_2 "23.2" (range "#x9#xA#xD#x20")) + (terminal _PASS_20 "23.20" (seq "*" _PASS_21)) (terminal _PASS_21 "23.21" (range "^)")) (terminal _PASS_3 "23.3" (seq _PASS_6 _PASS_7)) (terminal _PASS_4 "23.4" (seq "/*" _PASS_10 "*/")) diff --git a/etc/ebnf.sxp b/etc/ebnf.sxp index 87e6d04..59068dc 100644 --- a/etc/ebnf.sxp +++ b/etc/ebnf.sxp @@ -26,7 +26,7 @@ (range "#x20-#xD7FF") (range "#xE000-#xFFFD") (range "#x10000-#x10FFFF")) ) - (terminal R_CHAR "21" (diff CHAR "]")) + (terminal R_CHAR "21" (diff CHAR (alt "]" "-"))) (terminal POSTFIX "22" (range "?*+")) (terminal PASS "23" (plus diff --git a/lib/ebnf/base.rb b/lib/ebnf/base.rb index 0977f3a..4f1762b 100644 --- a/lib/ebnf/base.rb +++ b/lib/ebnf/base.rb @@ -106,6 +106,8 @@ class Base # @param [Hash{Symbol => Object}] options # @option options [Boolean, Array] :debug # Output debug information to an array or $stdout. + # @option options [Boolean, Array] :validate + # Validate resulting grammar. def initialize(input, format: :ebnf, **options) @options = options.dup @lineno, @depth, @errors = 1, 0, [] @@ -115,9 +117,9 @@ def initialize(input, format: :ebnf, **options) input = input.respond_to?(:read) ? input.read : input.to_s case format - when :sxp - require 'sxp' unless defined?(SXP) - @ast = SXP::Reader::Basic.read(input).map {|e| Rule.from_sxp(e)} + when :abnf + abnf = ABNF.new(input, **options) + @ast = abnf.ast when :ebnf scanner = StringScanner.new(input) @@ -140,15 +142,17 @@ def initialize(input, format: :ebnf, **options) @ast << rule end end - when :abnf - abnf = ABNF.new(input, **options) - @ast = abnf.ast when :isoebnf iso = ISOEBNF.new(input, **options) @ast = iso.ast + when :sxp + require 'sxp' unless defined?(SXP) + @ast = SXP::Reader::Basic.read(input).map {|e| Rule.from_sxp(e)} else raise "unknown input format #{format.inspect}" end + + validate! if @options[:validate] end ## diff --git a/lib/ebnf/rule.rb b/lib/ebnf/rule.rb index cb73d1b..14d0769 100644 --- a/lib/ebnf/rule.rb +++ b/lib/ebnf/rule.rb @@ -1,4 +1,5 @@ require 'scanf' +require 'strscan' module EBNF # Represent individual parsed rules @@ -12,6 +13,20 @@ class Rule hex istr range }.map(&:to_sym).freeze + # The number of arguments expected per operator. `nil` for unspecified + OP_ARGN = { + alt: nil, + diff: 2, + hex: 1, + not: 1, + opt: 1, + plus: 1, + range: 1, + rept: 3, + seq: nil, + star: 1 + } + # Symbol of rule # # @return [Symbol] @@ -66,6 +81,7 @@ class Rule # The expression is an internal-representation of an S-Expression with one of the following oparators: # # * `alt` – A list of alternative rules, which are attempted in order. It terminates with the first matching rule, or is terminated as unmatched, if no such rule is found. + # * `diff` – matches any string that matches `A` but does not match `B`. # * `hex` – A single character represented using the hexadecimal notation `#xnn`. # * `istr` – A string which matches in a case-insensitive manner, so that `(istr "fOo")` will match either of the strings `"foo"`, `"FOO"` or any other combination. # * `opt` – An optional rule or terminal. It either results in the matching rule or returns `nil`. @@ -120,7 +136,7 @@ def initialize(sym, id, expr, kind: nil, ebnf: nil, first: nil, follow: nil, sta raise ArgumentError, "#{@expr.first} operation must an non-negative integer maximum or '*', was #{@expr[2]}" unless @expr[2] == '*' || @expr[2].is_a?(Integer) && @expr[2] >= 0 when :seq - # It's legal to have a zero-lenght sequence + # It's legal to have a zero-length sequence else raise ArgumentError, "Rule expression must be an array using a known operator, was #{@expr.first}" end @@ -431,7 +447,9 @@ def translate_codepoints(str) # * `alt` => this is every non-terminal. # * `diff` => this is every non-terminal. # * `hex` => nil + # * `istr` => nil # * `not` => this is the last expression, if any. + # * `opt` => this is the last expression, if any. # * `plus` => this is the last expression, if any. # * `range` => nil # * `rept` => this is the last expression, if any. @@ -464,7 +482,9 @@ def non_terminals(ast, expr = @expr) # * `alt` => this is every terminal. # * `diff` => this is every terminal. # * `hex` => nil + # * `istr` => nil # * `not` => this is the last expression, if any. + # * `opt` => this is the last expression, if any. # * `plus` => this is the last expression, if any. # * `range` => nil # * `rept` => this is the last expression, if any. @@ -540,17 +560,52 @@ def starts_with?(sym) # Typically, if the expression is recursive, the embedded expression is called recursively. # @raise [RangeError] def validate!(ast, expr = @expr) - ([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).each do |sym| - case sym - when Symbol - r = ast.detect {|r| r.sym == sym} - raise SyntaxError, "No rule found for #{sym}" unless r - when Array - validate!(ast, sym) + op = expr.first + raise SyntaxError, "Unknown operator: #{op}" unless OP_ARGN.key?(op) + raise SyntaxError, "Argument count missmatch on operator #{op}, had #{expr.length - 1} expected #{OP_ARGN[op]}" if + OP_ARGN[op] && OP_ARGN[op] != expr.length - 1 + + # rept operator needs min and max + if op == :alt + raise SyntaxError, "alt operation must have at least one operand, had #{expr.length - 1}" unless expr.length > 1 + elsif op == :rept + raise SyntaxError, "rept operation must an non-negative integer minimum, was #{expr[1]}" unless + expr[1].is_a?(Integer) && expr[1] >= 0 + raise SyntaxError, "rept operation must an non-negative integer maximum or '*', was #{expr[2]}" unless + expr[2] == '*' || expr[2].is_a?(Integer) && expr[2] >= 0 + end + + case op + when :hex + raise SyntaxError, "Hex operand must be of form '#xN+': #{sym}" unless expr.last.match?(/^#x\h+$/) + when :range + str = expr.last.dup + str = str[1..-1] if str.start_with?('^') + if str.include?('-') + # If range is RANGE or O_RANGE, must be of form R_CHAR-R_CHAR or HEX-HEX + raise SyntaxError, "Range must be of form HEX-HEX or R_CHAR-R_CHAR: was #{str.inspect}" unless + str.match?(/^\^?(?:(?:#{Terminals::HEX}-#{Terminals::HEX})|(?:#{Terminals::R_CHAR}-#{Terminals::R_CHAR}))$/) else - nil + if str.match?(/^#{Terminals::HEX}+$/) + # Okay + elsif str.match?(Terminals::HEX) || !str.match?(/^#{Terminals::R_CHAR}+$/) + # Can't include both CHAR and HEX + raise SyntaxError, "Range must be of form HEX+ or R_CHAR+: was #{str.inspect}" + end end - end.compact + else + ([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).each do |sym| + case sym + when Symbol + r = ast.detect {|r| r.sym == sym} + raise SyntaxError, "No rule found for #{sym}" unless r + when Array + validate!(ast, sym) + when String + raise SyntaxError, "String must be of the form CHAR*" unless sym.match?(/^#{Terminals::CHAR}*$/) + end + end + end end ## diff --git a/lib/ebnf/terminals.rb b/lib/ebnf/terminals.rb index 18e98de..295fecd 100644 --- a/lib/ebnf/terminals.rb +++ b/lib/ebnf/terminals.rb @@ -3,9 +3,9 @@ module EBNF::Terminals SYMBOL_BASE = %r(\b[a-zA-Z0-9_\.]+\b)u.freeze SYMBOL = %r(#{SYMBOL_BASE}(?!\s*::=))u.freeze - HEX = %r(\#x[a-fA-F0-9]+)u.freeze + HEX = %r(\#x\h+)u.freeze CHAR = %r([\u0009\u000A\u000D\u0020-\uD7FF\u{10000}-\u{10FFFF}])u.freeze - R_CHAR = %r([\u0009\u000A\u000D\u0020-\u005C\u005E-\uD7FF\u{10000}-\u{10FFFF}])u.freeze + R_CHAR = %r([\u0009\u000A\u000D\u0020-\u002C\u002E-\u005C\u005E-\uD7FF\u{10000}-\u{10FFFF}])u.freeze RANGE = %r(\[(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}-#{HEX}))\])u.freeze ENUM_BASE = %r(\[(?:(?:#{R_CHAR})+|(?:#{HEX})+)\])u.freeze ENUM = %r(#{ENUM_BASE}(?!\s+#{SYMBOL_BASE}\s*::=))u.freeze diff --git a/spec/examples/ebnf-parser-spec.rb b/spec/examples/ebnf-parser-spec.rb deleted file mode 100644 index 9cd92f5..0000000 --- a/spec/examples/ebnf-parser-spec.rb +++ /dev/null @@ -1,46 +0,0 @@ -# coding: utf-8 -require 'spec_helper' -$:.unshift(File.expand_path("../../../examples/ebnf-parser", __FILE__)) -require 'parser' - -describe EBNFParser do - describe ".new" do - { - %{[2] Prolog ::= BaseDecl? PrefixDecl*} => - %{((rule Prolog "2" (seq (opt BaseDecl) (star PrefixDecl))))}, - %{ - @terminals - [3] terminal ::= [A-Z_]+ - } => %{((terminal terminal "3" (plus (range "A-Z_"))))}, - %{ - [9] primary ::= HEX - | RANGE - | ENUM - | O_RANGE - | O_ENUM - | STRING1 - | STRING2 - | '(' expression ')' - - } => %{((rule primary "9" (alt HEX RANGE ENUM O_RANGE O_ENUM STRING1 STRING2 (seq "(" expression ")"))))}, - %{#[1] rule ::= 'FOO'} => %{()}, - %{//[1] rule ::= 'FOO'} => %{()}, - %{[18] SolutionModifier ::= _SolutionModifier_1 _SolutionModifier_2} => - %{((rule SolutionModifier "18" (seq _SolutionModifier_1 _SolutionModifier_2)))}, - %{[18.1] _SolutionModifier_1 ::= _empty | GroupClause} => - %{((rule _SolutionModifier_1 "18.1" (alt _empty GroupClause)))}, - %q{[18] STRING1 ::= '"' ((CHAR - '"') | '\\t')* '"'} => - %q{((terminal STRING1 "18" (seq "\"" (star (alt (diff CHAR "\"") "\t")) "\"")))} - }.each do |input, expected| - it "parses #{input.inspect}" do - expect(parse(input, validate: true).ast.to_sxp).to produce(expected, @debug) - end - end - end - - def parse(value, **options) - @debug = [] - options = {debug: @debug}.merge(options) - EBNFParser.new(value, **options) - end -end \ No newline at end of file diff --git a/spec/rule_spec.rb b/spec/rule_spec.rb index 9ee047c..792c6e5 100644 --- a/spec/rule_spec.rb +++ b/spec/rule_spec.rb @@ -21,8 +21,8 @@ EBNF::Rule.new(:ebnf, "1", [:star, [:alt, :declaration, :rule]], kind: :rule) ], "pass": [ - %{(pass _pass (plus (range "#x20\\\\t\\\\r\\\\n")))}, - EBNF::Rule.new(nil, nil, [:plus, [:range, "#x20\\t\\r\\n"]], kind: :pass) + %{(pass _pass (plus (range "#x9#xA#xD#x20")))}, + EBNF::Rule.new(nil, nil, [:plus, [:range, "#x9#xA#xD#x20"]], kind: :pass) ], "terminal": [ %{(terminal O_ENUM "17" (seq "[^" (plus CHAR) "]"))}, @@ -82,6 +82,8 @@ "diff (empty)": %{(terminal R_CHAR "21" (diff))}, "diff (one)": %{(terminal R_CHAR "21" (diff CHAR))}, "diff (three)": %{(terminal R_CHAR "21" (diff CHAR "]" ","))}, + "hex (empty)": %{(terminal hex (hex))}, + "hex (two)": %{(terminal hex (hex #x01 #x02))}, "istr (empty)": %{(terminal nc (istr))}, "istr (two)": %{(terminal nc (istr "foo" "bar"))}, "not (empty)": %{(rule _a_1 "n.1" (not))}, @@ -919,7 +921,7 @@ STRING1: ['"'], STRING2: ["'"], CHAR: ["#x9#xA#xD", "#x20-#xD7FF", "#xE000-#xFFFD", "#x10000-#x10FFFF"], - R_CHAR: [:CHAR, "]"], + R_CHAR: [:CHAR, "]", "-"], POSTFIX: ["?*+"], PASS: ["#x9#xA#xD#x20", "#", "#x", "//", "/*", "(*"] }.each do |sym, expected| @@ -964,9 +966,94 @@ end describe "#validate!" do - subject {EBNF.parse("a ::= b")} - it "notes missing rule" do - expect {subject.ast.first.validate!(subject.ast)}.to raise_error SyntaxError, "No rule found for b" + let(:gram) {EBNF.parse("a ::= 'b'?")} + subject {gram.ast.first} + + { + "missing rule": [ + "a ::= b", + "No rule found for b" + ], + "illegal string": [ + %{a ::= "\u{01}"}, + 'String must be of the form CHAR*' + ], + "empty range": [ + "a ::= []", + /Range must be of form HEX\+ or R_CHAR\+/ + ], + "mixed enum char and hex": [ + "a ::= [b#x20]", + %(Range must be of form HEX+ or R_CHAR+: was "b#x20") + ], + "mixed enum char and hex (2)": [ + "a ::= [#x20z]", + %(Range must be of form HEX+ or R_CHAR+: was "#x20z") + ], + "mixed range char and hex": [ + "a ::= [b-#x20]", + %(Range must be of form HEX-HEX or R_CHAR-R_CHAR: was "b-#x20") + ], + "mixed range char and hex (2)": [ + "a ::= [#x20-b]", + %(Range must be of form HEX-HEX or R_CHAR-R_CHAR: was "#x20-b") + ], + "incomplete range": [ + "a ::= [a-]", + %(Range must be of form HEX-HEX or R_CHAR-R_CHAR: was "a-") + ], + "incomplete range (2)": [ + "a ::= [-b]", + %(Range must be of form HEX-HEX or R_CHAR-R_CHAR: was "-b") + ], + "extra range": [ + "a ::= [a-b-c]", + %(Range must be of form HEX-HEX or R_CHAR-R_CHAR: was "a-b-c") + ], + "extra range (2)": [ + "a ::= [a-zA-Z]", + %(Range must be of form HEX-HEX or R_CHAR-R_CHAR: was "a-zA-Z") + ], + }.each do |name, (rule, message)| + it name do + gram = EBNF.parse(rule) + rule = gram.ast.first + expect {rule.validate!(gram.ast)}.to raise_error SyntaxError, message + end + end + + # Validate rules that can only be created through modification + { + "alt (empty)": [:alt], + "diff (empty)": [:diff], + "diff (one)": [:diff, 'A'], + "diff (three)": [:diff, 'A', 'B', 'C'], + "hex (empty)": [:hex], + "hex (two)": [:hex, '#x01', '#x02'], + "hex (string)": [:hex, 'string'], + "istr (empty)": [:istr], + "istr (two)": [:istr, 'A', 'B'], + "not (empty)": [:not], + "not (two)": [:not, 'A', 'B'], + "opt (empty)": [:opt], + "plus (empty)": [:plus], + "plus (two)": [:plus, 'A', 'B'], + "rept (empty)": [:rept], + "rept (one)": [:rept, 1], + "rept (two)": [:rept, 1, 2], + "rept (four)": [:rept, 1, 2, 'A', 'B'], + "rept (float min)": [:rept, 1.1, 2, 'A'], + "rept (negative min)": [:rept, -1, 2, 'A'], + "rept (float max)": [:rept, 1, 2.1, 'A'], + "rept (negative max)": [:rept, 1, -1, 'A'], + "star (empty)": [:star], + "star (two)": [:star, 'A', 'B'], + "not op": [:bad] + }.each do |title, expr| + it title do + subject.expr = expr + expect {subject.validate!(gram.ast)}.to raise_error(SyntaxError) + end end end From ca47b72b6bbd2c1ce1f4d823145a6ce2797dac36 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Mon, 6 Jul 2020 15:25:00 -0700 Subject: [PATCH 25/50] Improve ABNF capture of bin/dec/hex data. --- etc/abnf.sxp | 58 ++++++++++++++++++++--------------------- examples/abnf/parser.rb | 43 +++++++++++++++++++++++++----- lib/ebnf/abnf.rb | 45 ++++++++++++++++++++++++++------ 3 files changed, 102 insertions(+), 44 deletions(-) diff --git a/etc/abnf.sxp b/etc/abnf.sxp index ba8d642..2ccf6ed 100644 --- a/etc/abnf.sxp +++ b/etc/abnf.sxp @@ -1,32 +1,4 @@ ( - (terminal WSP (alt SP HTAB)) - (rule rule (seq rulename defined_as elements c_nl)) - (rule elements (seq alternation (star c_wsp))) - (rule alternation - (seq concatenation (star (seq (star c_wsp) "/" (star c_wsp) concatenation)))) - (rule concatenation (seq repetition (star (seq (plus c_wsp) repetition)))) - (rule repetition (seq (opt repeat) element)) - (rule repeat (alt (seq (star DIGIT) "*" (star DIGIT)) (plus DIGIT))) - (rule element (alt rulename group option char_val num_val prose_val)) - (rule group (seq "(" (star c_wsp) alternation (star c_wsp) ")")) - (rule option (seq "[" (star c_wsp) alternation (star c_wsp) "]")) - (rule char_val (alt case_insensitive_string case_sensitive_string)) - (rule case_insensitive_string (seq (opt "%i") quoted_string)) - (rule case_sensitive_string (seq "%s" quoted_string)) - (rule num_val (seq "%" (alt bin_val dec_val hex_val))) - (terminal rulename (seq ALPHA (star (alt ALPHA DIGIT "-")))) - (terminal defined_as (seq (star c_wsp) (alt "=" "=/") (star c_wsp))) - (terminal c_wsp (alt WSP (seq c_nl WSP))) - (terminal c_nl (alt COMMENT CRLF)) - (terminal comment (seq ";" (star (alt WSP VCHAR)) CRLF)) - (terminal quoted_string - (seq DQUOTE (star (alt (range "#x20-#x21") (range "#x23-#x7E"))) DQUOTE)) - (terminal bin_val (seq "b" (plus BIT) (opt (alt (plus (seq "." (plus BIT))) (seq "-" (plus BIT)))))) - (terminal dec_val - (seq "d" (plus DIGIT) (opt (alt (plus (seq "." (plus DIGIT))) (seq "-" (plus DIGIT)))))) - (terminal hex_val - (seq "x" (plus HEXDIG) (opt (alt (plus (seq "." (plus HEXDIG))) (seq "-" (plus HEXDIG)))))) - (terminal prose_val (seq "<" (star (alt (range "#x20-#x3D") (range "#x3F-#x7E"))) ">")) (terminal ALPHA (alt (range "#x41-#x5A") (range "#x61-#x7A"))) (terminal BIT (alt "0" "1")) (terminal CHAR (range "#x01-#x7F")) @@ -42,4 +14,32 @@ (terminal OCTET (range "#x00-#xFF")) (terminal SP (hex "#x20")) (terminal VCHAR (range "#x21-#x7E")) - (rule rulelist (plus (alt rule (seq (star c_wsp) c_nl))))) + (terminal WSP (alt SP HTAB)) + (rule alternation + (seq concatenation (star (seq (star c_wsp) "/" (star c_wsp) concatenation)))) + (terminal bin_val (seq "b" (plus BIT) (opt (alt (plus (seq "." (plus BIT))) (seq "-" (plus BIT)))))) + (terminal c_nl (alt COMMENT CRLF)) + (terminal c_wsp (alt WSP (seq c_nl WSP))) + (rule case_insensitive_string (seq (opt "%i") quoted_string)) + (rule case_sensitive_string (seq "%s" quoted_string)) + (rule char_val (alt case_insensitive_string case_sensitive_string)) + (terminal comment (seq ";" (star (alt WSP VCHAR)) CRLF)) + (rule concatenation (seq repetition (star (seq (plus c_wsp) repetition)))) + (terminal dec_val + (seq "d" (plus DIGIT) (opt (alt (plus (seq "." (plus DIGIT))) (seq "-" (plus DIGIT)))))) + (terminal defined_as (seq (star c_wsp) (alt "=" "=/") (star c_wsp))) + (rule element (alt rulename group option char_val num_val prose_val)) + (rule elements (seq alternation (star c_wsp))) + (rule group (seq "(" (star c_wsp) alternation (star c_wsp) ")")) + (terminal hex_val + (seq "x" (plus HEXDIG) (opt (alt (plus (seq "." (plus HEXDIG))) (seq "-" (plus HEXDIG)))))) + (rule num_val (seq "%" (alt bin_val dec_val hex_val))) + (rule option (seq "[" (star c_wsp) alternation (star c_wsp) "]")) + (terminal prose_val (seq "<" (star (alt (range "#x20-#x3D") (range "#x3F-#x7E"))) ">")) + (terminal quoted_string + (seq DQUOTE (star (alt (range "#x20-#x21") (range "#x23-#x7E"))) DQUOTE)) + (rule repeat (alt (seq (star DIGIT) "*" (star DIGIT)) (plus DIGIT))) + (rule repetition (seq (opt repeat) element)) + (rule rule (seq rulename defined_as elements c_nl)) + (rule rulelist (plus (alt rule (seq (star c_wsp) c_nl)))) + (terminal rulename (seq ALPHA (star (alt ALPHA DIGIT "-"))))) diff --git a/examples/abnf/parser.rb b/examples/abnf/parser.rb index ecccd4d..b94ae54 100644 --- a/examples/abnf/parser.rb +++ b/examples/abnf/parser.rb @@ -46,8 +46,8 @@ class ABNFParser # `bin_val ::= "b" BIT+ (("." BIT+)+ | ("-" BIT+))?` terminal(:bin_val, /b[01]+(?:(?:(?:\.[01]+)+)|(?:-[01]+))?/) do |value| if value.include?('.') - # Interpret segments in binary creating a string - value[1..-1].split('.').map {|b| b.to_i(base=2).chr}.join("") + # Interpret segments in binary creating a sequence of hex characters or a string + hex_or_string(value[1..-1].split('.').map {|b| b.to_i(base=2).chr(Encoding::UTF_8)}) elsif value.include?('-') # Interpret as a range [:range, value[1..-1].split('-').map {|b| "#x%x" % b.to_i(base=2)}.join("-")] @@ -60,8 +60,8 @@ class ABNFParser # `dec_val ::= "d" DIGIT+ (("." DIGIT+)+ | ("-" DIGIT+))?` terminal(:dec_val, /d[0-9]+(?:(?:(?:\.[0-9]+)+)|(?:-[0-9]+))?/) do |value| if value.include?('.') - # Interpret segments in decimal creating a string - value[1..-1].split('.').map {|d| d.to_i.chr}.join("") + # Interpret segments in decimal creating a sequence of hex characters or a string + hex_or_string(value[1..-1].split('.').map {|b| b.to_i.chr(Encoding::UTF_8)}) elsif value.include?('-') # Interpret as a range [:range, value[1..-1].split('-').map {|d| "#x%x" % d.to_i}.join("-")] @@ -74,8 +74,8 @@ class ABNFParser # `hex_val ::= "x" HEXDIG+ (("." HEXDIG+)+ | ("-" HEXDIG+))?` terminal(:hex_val, /x[0-9A-F]+(?:(?:(?:\.[0-9A-F]+)+)|(?:-[0-9A-F]+))?/i) do |value| if value.include?('.') - # Interpret segments in hexadecimal creating a string - value[1..-1].split('.').map {|h| h.to_i(base=16).chr}.join("") + # Interpret segments in hexadecimal creating a sequence of hex characters or a string + hex_or_string(value[1..-1].split('.').map {|b| b.to_i(base=16).chr(Encoding::UTF_8)}) elsif value.include?('-') # Interpret as a range [:range, value[1..-1].split('-').map {|h| "#x%x" % h.to_i(base=16)}.join("-")] @@ -265,7 +265,7 @@ def ast # Add built-in rules for standard ABNF rules not parsed_rules.values.map(&:symbols).flatten.uniq.each do |sym| rule = ABNFCore::RULES.detect {|r| r.sym == sym} - parsed_rules[sym] ||= rule + parsed_rules[sym] ||= rule if rule end parsed_rules.values @@ -277,4 +277,33 @@ def to_sxp # Output rules as a formatted S-Expression SXP::Generator.string(ast.map(&:for_sxp)) end + +private + # Generate a combination of seq and string to represent a sequence of characters + # + # @param [Array] characters + # @return [String,Array] + def hex_or_string(characters) + seq = [:seq] + str_result = "" + characters.each do |c| + if VCHAR.match?(c) + str_result << c + else + if str_result.length > 0 + seq << str_result + str_result = "" + end + seq << [:hex, "#x%x" % c.hex] + end + end + seq << str_result if str_result.length > 0 + + # Either return the sequence, or a string + if seq.length == 2 && seq.last.is_a?(String) + seq.last + else + seq + end + end end diff --git a/lib/ebnf/abnf.rb b/lib/ebnf/abnf.rb index 2eaa8b7..be98fdd 100644 --- a/lib/ebnf/abnf.rb +++ b/lib/ebnf/abnf.rb @@ -41,8 +41,8 @@ class ABNF # `bin_val ::= "b" BIT+ (("." BIT+)+ | ("-" BIT+))?` terminal(:bin_val, /b[01]+(?:(?:(?:\.[01]+)+)|(?:-[01]+))?/) do |value| if value.include?('.') - # Interpret segments in binary creating a string - value[1..-1].split('.').map {|b| b.to_i(base=2).chr}.join("") + # Interpret segments in binary creating a sequence of hex characters or a string + hex_or_string(value[1..-1].split('.').map {|b| b.to_i(base=2).chr(Encoding::UTF_8)}) elsif value.include?('-') # Interpret as a range [:range, value[1..-1].split('-').map {|b| "#x%x" % b.to_i(base=2)}.join("-")] @@ -55,8 +55,8 @@ class ABNF # `dec_val ::= "d" DIGIT+ (("." DIGIT+)+ | ("-" DIGIT+))?` terminal(:dec_val, /d[0-9]+(?:(?:(?:\.[0-9]+)+)|(?:-[0-9]+))?/) do |value| if value.include?('.') - # Interpret segments in decimal creating a string - value[1..-1].split('.').map {|d| d.to_i.chr}.join("") + # Interpret segments in decimal creating a sequence of hex characters or a string + hex_or_string(value[1..-1].split('.').map {|b| b.to_i.chr(Encoding::UTF_8)}) elsif value.include?('-') # Interpret as a range [:range, value[1..-1].split('-').map {|d| "#x%x" % d.to_i}.join("-")] @@ -69,8 +69,8 @@ class ABNF # `hex_val ::= "x" HEXDIG+ (("." HEXDIG+)+ | ("-" HEXDIG+))?` terminal(:hex_val, /x[0-9A-F]+(?:(?:(?:\.[0-9A-F]+)+)|(?:-[0-9A-F]+))?/i) do |value| if value.include?('.') - # Interpret segments in hexadecimal creating a string - value[1..-1].split('.').map {|h| h.to_i(base=16).chr}.join("") + # Interpret segments in hexadecimal creating a sequence of hex characters or a string + hex_or_string(value[1..-1].split('.').map {|b| b.to_i(base=16).chr(Encoding::UTF_8)}) elsif value.include?('-') # Interpret as a range [:range, value[1..-1].split('-').map {|h| "#x%x" % h.to_i(base=16)}.join("-")] @@ -105,7 +105,7 @@ class ABNF # append to rule alternate rule = parsed_rules.fetch(sym) {raise "No existing rule found for #{sym}"} rule.expr = [:alt, rule.expr] unless rule.alt? - if elements.first == :alt + if elements.is_a?(Array) && elements.first == :alt # append alternatives to rule rule.expr.concat(elements[1..-1]) else @@ -260,10 +260,39 @@ def ast # Add built-in rules for standard ABNF rules not parsed_rules.values.map(&:symbols).flatten.uniq.each do |sym| rule = ABNFCore::RULES.detect {|r| r.sym == sym} - parsed_rules[sym] ||= rule + parsed_rules[sym] ||= rule if rule end parsed_rules.values end + + private + # Generate a combination of seq and string to represent a sequence of characters + # + # @param [Array] characters + # @return [String,Array] + def hex_or_string(characters) + seq = [:seq] + str_result = "" + characters.each do |c| + if VCHAR.match?(c) + str_result << c + else + if str_result.length > 0 + seq << str_result + str_result = "" + end + seq << [:hex, "#x%x" % c.codepoints.first] + end + end + seq << str_result if str_result.length > 0 + + # Either return the sequence, or a string + if seq.length == 2 && seq.last.is_a?(String) + seq.last + else + seq + end + end end end \ No newline at end of file From 894415169c388b99df9539a71ff8f705f38107c2 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Mon, 6 Jul 2020 15:25:52 -0700 Subject: [PATCH 26/50] Be more consistent when sorting rules and don't sort SXP output by default. --- etc/ebnf.html | 2 +- etc/ebnf.ll1.sxp | 2 +- etc/ebnf.peg.sxp | 6 +-- etc/ebnf.sxp | 4 +- etc/iso-ebnf.sxp | 86 ++++++++++++++++++++--------------------- lib/ebnf/base.rb | 2 +- spec/ll1/data/parser.rb | 2 +- spec/ll1_spec.rb | 12 +++--- spec/peg/data/parser.rb | 2 +- 9 files changed, 59 insertions(+), 59 deletions(-) diff --git a/etc/ebnf.html b/etc/ebnf.html index 0cc61d0..7ff0734 100644 --- a/etc/ebnf.html +++ b/etc/ebnf.html @@ -196,7 +196,7 @@ - + @pass diff --git a/etc/ebnf.ll1.sxp b/etc/ebnf.ll1.sxp index 4c20739..d0fa4ef 100644 --- a/etc/ebnf.ll1.sxp +++ b/etc/ebnf.ll1.sxp @@ -1,6 +1,5 @@ ( (rule _empty "0" (first _eps) (seq)) - (pass _pass (seq PASS)) (rule ebnf "1" (start #t) (first "@pass" "@terminals" LHS _eps) @@ -60,6 +59,7 @@ (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL) (follow ")" "@pass" "@terminals" LHS _eof "|") (seq seq)) + (pass _pass (seq PASS)) (rule seq "6" (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL) (follow ")" "@pass" "@terminals" LHS _eof "|") diff --git a/etc/ebnf.peg.sxp b/etc/ebnf.peg.sxp index a1d5e77..87df021 100644 --- a/etc/ebnf.peg.sxp +++ b/etc/ebnf.peg.sxp @@ -1,5 +1,4 @@ ( - (pass _pass (seq PASS)) (rule ebnf "1" (star _ebnf_1)) (rule _ebnf_1 "1.1" (alt declaration rule)) (rule declaration "2" (alt "@terminals" pass)) @@ -79,8 +78,8 @@ (terminal _PASS_17 "23.17" (alt _PASS_18 _PASS_19)) (terminal _PASS_18 "23.18" (opt _PASS_20)) (terminal _PASS_19 "23.19" (range "^*")) - (terminal _PASS_2 "23.2" (range "#x9#xA#xD#x20")) (terminal _PASS_20 "23.20" (seq "*" _PASS_21)) + (terminal _PASS_2 "23.2" (range "#x9#xA#xD#x20")) (terminal _PASS_21 "23.21" (range "^)")) (terminal _PASS_3 "23.3" (seq _PASS_6 _PASS_7)) (terminal _PASS_4 "23.4" (seq "/*" _PASS_10 "*/")) @@ -88,4 +87,5 @@ (terminal _PASS_6 "23.6" (alt _PASS_8 "//")) (terminal _PASS_7 "23.7" (star _PASS_9)) (terminal _PASS_8 "23.8" (diff "#" "#x")) - (terminal _PASS_9 "23.9" (range "^#xA#xD"))) + (terminal _PASS_9 "23.9" (range "^#xA#xD")) + (pass _pass (seq PASS))) diff --git a/etc/ebnf.sxp b/etc/ebnf.sxp index 59068dc..fb20766 100644 --- a/etc/ebnf.sxp +++ b/etc/ebnf.sxp @@ -1,5 +1,4 @@ ( - (pass _pass (seq PASS)) (rule ebnf "1" (star (alt declaration rule))) (rule declaration "2" (alt "@terminals" pass)) (rule rule "3" (seq LHS expression)) @@ -34,4 +33,5 @@ (range "#x9#xA#xD#x20") (seq (alt (diff "#" "#x") "//") (star (range "^#xA#xD"))) (seq "/*" (star (alt (opt (seq "*" (range "^/"))) (range "^*"))) "*/") - (seq "(*" (star (alt (opt (seq "*" (range "^)"))) (range "^*"))) "*)")) )) ) + (seq "(*" (star (alt (opt (seq "*" (range "^)"))) (range "^*"))) "*)")) )) + (pass _pass (seq PASS))) diff --git a/etc/iso-ebnf.sxp b/etc/iso-ebnf.sxp index 3b89954..24fd9d4 100644 --- a/etc/iso-ebnf.sxp +++ b/etc/iso-ebnf.sxp @@ -1,36 +1,53 @@ ( - (terminal special_sequence_symbol (seq "?")) - (rule syntax_rule - (seq meta_identifier defining_symbol definitions_list terminator_symbol)) + (pass _pass (alt (plus gap_separator) comment)) + (terminal comment (seq start_comment_symbol (star comment_symbol) end_comment_symbol)) + (terminal comment_symbol (alt comment terminal_string special_sequence character)) + (terminal concatenate_symbol (seq ",")) + (terminal decimal_digit (range "0-9")) + (terminal defining_symbol (alt "=" ":")) + (terminal definition_separator_symbol (alt "|" "/" "!")) (rule definitions_list (seq single_definition (star (seq definition_separator_symbol definitions_list)))) - (rule single_definition (seq term (star (seq "," term)))) - (rule term (seq factor (opt (seq "-" exception)))) + (terminal empty (seq ())) + (terminal end_comment_symbol (seq "*)")) + (terminal end_group_symbol (seq ")")) + (terminal end_option_symbol (alt "]" "/)")) + (terminal end_repeat_symbol (alt "}" ":)")) + (terminal except_symbol (seq "-")) (rule exception (seq factor)) (rule factor (seq (opt (seq integer "*")) primary)) + (terminal first_quote_symbol (seq "'")) + (terminal first_terminal_character (diff terminal_character "'")) + (terminal gap_free_symbol (alt (diff terminal_character (range "'\"")) terminal_string)) + (terminal gap_separator (range "#x9#xa#xb#xc#xd#x20")) + (rule grouped_sequence (seq "(" definitions_list ")")) + (terminal integer (plus decimal_digit)) + (terminal letter (alt (range "a-z") (range "A-Z"))) + (terminal meta_identifier (seq letter (star meta_identifier_character))) + (terminal meta_identifier_character (alt letter decimal_digit "_")) + (rule optional_sequence + (seq start_option_symbol definitions_list end_option_symbol)) + (terminal other_character (alt (range ":+_%@&$<>^` ̃#x20#x23") "\\")) (rule primary (alt optional_sequence repeated_sequence special_sequence grouped_sequence meta_identifier terminal_string empty )) - (rule optional_sequence - (seq start_option_symbol definitions_list end_option_symbol)) (rule repeated_sequence (seq start_repeat_symbol definitions_list end_repeat_symbol)) - (rule grouped_sequence (seq "(" definitions_list ")")) - (terminal terminal_string - (alt - (seq "'" (plus first_terminal_character) "'") - (seq "\"" (plus second_terminal_character) "\"")) ) - (terminal meta_identifier (seq letter (star meta_identifier_character))) - (terminal integer (plus decimal_digit)) - (terminal special_sequence (seq "?" (star special_sequence_character) "?")) - (terminal comment (seq start_comment_symbol (star comment_symbol) end_comment_symbol)) - (terminal comment_symbol (alt comment terminal_string special_sequence character)) - (terminal letter (alt (range "a-z") (range "A-Z"))) - (terminal decimal_digit (range "0-9")) - (terminal meta_identifier_character (alt letter decimal_digit "_")) - (terminal first_terminal_character (diff terminal_character "'")) + (terminal repetition_symbol (seq "*")) + (terminal second_quote_symbol (seq "\"")) (terminal second_terminal_character (diff terminal_character "\"")) + (rule single_definition (seq term (star (seq "," term)))) + (terminal special_sequence (seq "?" (star special_sequence_character) "?")) (terminal special_sequence_character (diff terminal_character "?")) + (terminal special_sequence_symbol (seq "?")) + (terminal start_comment_symbol (seq "(*")) + (terminal start_group_symbol (seq "(")) + (terminal start_option_symbol (alt "[" "(/")) + (terminal start_repeat_symbol (alt "{" "(:")) + (rule syntax (star syntax_rule)) + (rule syntax_rule + (seq meta_identifier defining_symbol definitions_list terminator_symbol)) + (rule term (seq factor (opt (seq "-" exception)))) (terminal terminal_character (alt letter decimal_digit concatenate_symbol defining_symbol definition_separator_symbol end_comment_symbol end_group_symbol @@ -38,25 +55,8 @@ repetition_symbol second_quote_symbol special_sequence_symbol start_comment_symbol start_group_symbol start_option_symbol start_repeat_symbol terminator_symbol other_character )) - (terminal other_character (alt (range ":+_%@&$<>^` ̃#x20#x23") "\\")) - (terminal gap_separator (range "#x9#xa#xb#xc#xd#x20")) - (pass _pass (alt (plus gap_separator) comment)) - (terminal empty (seq ())) - (terminal defining_symbol (alt "=" ":")) - (terminal definition_separator_symbol (alt "|" "/" "!")) - (terminal terminator_symbol (alt ";" ".")) - (terminal start_option_symbol (alt "[" "(/")) - (terminal end_option_symbol (alt "]" "/)")) - (terminal start_repeat_symbol (alt "{" "(:")) - (terminal end_repeat_symbol (alt "}" ":)")) - (terminal gap_free_symbol (alt (diff terminal_character (range "'\"")) terminal_string)) - (terminal repetition_symbol (seq "*")) - (terminal except_symbol (seq "-")) - (terminal concatenate_symbol (seq ",")) - (terminal first_quote_symbol (seq "'")) - (terminal second_quote_symbol (seq "\"")) - (terminal start_comment_symbol (seq "(*")) - (terminal end_comment_symbol (seq "*)")) - (terminal start_group_symbol (seq "(")) - (terminal end_group_symbol (seq ")")) - (rule syntax (star syntax_rule))) + (terminal terminal_string + (alt + (seq "'" (plus first_terminal_character) "'") + (seq "\"" (plus second_terminal_character) "\"")) ) + (terminal terminator_symbol (alt ";" "."))) diff --git a/lib/ebnf/base.rb b/lib/ebnf/base.rb index 4f1762b..e3dcc5a 100644 --- a/lib/ebnf/base.rb +++ b/lib/ebnf/base.rb @@ -198,7 +198,7 @@ def each(kind, &block) # @return [String] def to_sxp require 'sxp' unless defined?(SXP) - SXP::Generator.string(ast.sort_by{|r| r.id.to_f}.map(&:for_sxp)) + SXP::Generator.string(ast.map(&:for_sxp)) end ## diff --git a/spec/ll1/data/parser.rb b/spec/ll1/data/parser.rb index 86f55d2..954eccb 100644 --- a/spec/ll1/data/parser.rb +++ b/spec/ll1/data/parser.rb @@ -229,6 +229,6 @@ def initialize(input, **options, &block) def to_sxp require 'sxp' unless defined?(SXP) # Output rules as a formatted S-Expression - SXP::Generator.string(@ast.sort_by{|r| r.id.to_f}.map(&:for_sxp)) + SXP::Generator.string(@ast.map(&:for_sxp)) end end diff --git a/spec/ll1_spec.rb b/spec/ll1_spec.rb index 94c979c..7dbac19 100644 --- a/spec/ll1_spec.rb +++ b/spec/ll1_spec.rb @@ -86,12 +86,12 @@ [18] IRIREF ::= '<' ("range" | UCHAR)* '>' [29t] SPARQL_BASE ::= [Bb][Aa][Ss][Ee] }, - %{ - ((rule _empty "0" (first _eps) (seq)) - (terminal IRIREF "18" (seq "<" (star (alt "range" UCHAR)) ">")) - (rule sparqlBase "29s" (first SPARQL_BASE) (seq SPARQL_BASE IRIREF)) - (rule _sparqlBase_1 "29s.1" (first IRIREF) (seq IRIREF)) - (terminal SPARQL_BASE "29t" (seq (range "Bb") (range "Aa") (range "Ss") (range "Ee")))) + %{( + (rule _empty "0" (first _eps) (seq)) + (terminal IRIREF "18" (seq "<" (star (alt "range" UCHAR)) ">")) + (rule sparqlBase "29s" (first SPARQL_BASE) (seq SPARQL_BASE IRIREF)) + (terminal SPARQL_BASE "29t" (seq (range "Bb") (range "Aa") (range "Ss") (range "Ee"))) + (rule _sparqlBase_1 "29s.1" (first IRIREF) (seq IRIREF))) }, nil ], "declaration (FF.1)" => [ diff --git a/spec/peg/data/parser.rb b/spec/peg/data/parser.rb index c426e66..0cb8a99 100644 --- a/spec/peg/data/parser.rb +++ b/spec/peg/data/parser.rb @@ -170,6 +170,6 @@ def initialize(input, **options, &block) def to_sxp require 'sxp' unless defined?(SXP) # Output rules as a formatted S-Expression - SXP::Generator.string(@ast.sort_by{|r| r.id.to_f}.map(&:for_sxp)) + SXP::Generator.string(@ast.map(&:for_sxp)) end end From 84ab6a8c98182a3677e785492bc9e7094fca2a2f Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Mon, 6 Jul 2020 15:26:42 -0700 Subject: [PATCH 27/50] Add ABNF parser spec. --- README.md | 6 +- lib/ebnf/rule.rb | 11 +- lib/ebnf/writer.rb | 2 +- spec/abnf_spec.rb | 245 ++++++++++++++++++++++++++++++++++++++++++++ spec/parser_spec.rb | 4 +- 5 files changed, 261 insertions(+), 7 deletions(-) create mode 100644 spec/abnf_spec.rb diff --git a/README.md b/README.md index f4cfacb..85bc45a 100644 --- a/README.md +++ b/README.md @@ -101,6 +101,8 @@ On a parsing failure, and exception is raised with information that may be usefu The [EBNF][] variant used here is based on [W3C](https://w3.org/) [EBNF][] (see {file:etc/ebnf.ebnf EBNF grammar}) as defined in the [XML 1.0 recommendation](https://www.w3.org/TR/REC-xml/), with minor extensions: +The character set for EBNF is UTF-8. + The general form of a rule is: symbol ::= expression @@ -121,9 +123,9 @@ Within the expression on the right-hand side of a rule, the following expression [abc], [#xN#xN#xN] matches any Char with a value among the characters enumerated. Enumerations and ranges can be mixed in one set of brackets. [^a-z], [^#xN-#xN] - matches any Char with a value outside the range indicated. + matches any UTF-8 Char with a value outside the range indicated. [^abc], [^#xN#xN#xN] - matches any Char with a value not among the characters given. Enumerations and ranges of forbidden values can be mixed in one set of brackets. + matches any UTF-8 Char with a value not among the characters given. Enumerations and ranges of forbidden values can be mixed in one set of brackets. "string" matches a literal string matching that given inside the double quotes. 'string' diff --git a/lib/ebnf/rule.rb b/lib/ebnf/rule.rb index 14d0769..3b8c37a 100644 --- a/lib/ebnf/rule.rb +++ b/lib/ebnf/rule.rb @@ -18,6 +18,7 @@ class Rule alt: nil, diff: 2, hex: 1, + istr: 1, not: 1, opt: 1, plus: 1, @@ -429,10 +430,14 @@ def eql?(other) # Rules compare using their ids def <=>(other) - if id.to_i == other.id.to_i - id.to_s <=> other.id.to_s + if id && other.id + if id == other.id + id.to_s <=> other.id.to_s + else + id.to_f <=> other.id.to_f + end else - id.to_i <=> other.id.to_i + sym.to_s <=> other.sym.to_s end end diff --git a/lib/ebnf/writer.rb b/lib/ebnf/writer.rb index 599b109..f58b4de 100644 --- a/lib/ebnf/writer.rb +++ b/lib/ebnf/writer.rb @@ -499,7 +499,7 @@ def format_isoebnf_range(string) - rules.each do |rule| %tr{id: "grammar-production-#{rule.sym}"} - if rule.pass? - %td{colspan: (format == :ebnf && rule.id ? 3 : 2)} + %td{colspan: (format == :ebnf && rule.id ? 4 : 3)} %code<="@pass" - else - if format == :ebnf && rule.id diff --git a/spec/abnf_spec.rb b/spec/abnf_spec.rb new file mode 100644 index 0000000..115bcd6 --- /dev/null +++ b/spec/abnf_spec.rb @@ -0,0 +1,245 @@ +# coding: utf-8 +$:.unshift "." +require 'spec_helper' +require 'ebnf' +require 'sxp' + +describe EBNF::ABNF do + let(:logger) {RDF::Spec.logger} + after(:each) do |example| + puts logger.to_s if example.exception && !example.exception.is_a?(RSpec::Expectations::ExpectationNotMetError) + end + + context "rule variations" do + { + "legal rule name": [ + 'rulename = "foo"', + %{((terminal rulename (istr "foo")))} + ], + "binary character": [ + "bin = %b11", + %{((terminal bin (hex "#x3")))} + ], + "binary string": [ + "bin = %b1.10.11", + %{((rule bin (seq (hex "#x1") (hex "#x2") (hex "#x3"))))} + ], + "binary string (ascii range)": [ + "bin = %b1100010.1101001.1101110", + %{((rule bin (seq "bin")))} + ], + "binary string (mixed range)": [ + "bin = %b1100010.1.1101110", + %{((rule bin (seq "b" (hex "#x1") "n")))} + ], + "decimal char": [ + "dec = %d22", + %{((terminal dec (hex "#x16")))} + ], + "decimal string": [ + "dec = %d1.2.3", + %{((rule dec (seq (hex "#x1") (hex "#x2") (hex "#x3"))))} + ], + "decimal string (ascii range)": [ + "dec = %d100.101.99", + %{((rule dec (seq "dec")))} + ], + "decimal string (mixed range)": [ + "dec = %d100.1.99", + %{((rule dec (seq "d" (hex "#x1") "c")))} + ], + "decimal range": [ + "dec = %d22-40", + %{((terminal dec (range "#x16-#x28")))} + ], + "hex character": [ + "hex = %x1f", + %{((terminal hex (hex "#x1f")))} + ], + "hex string": [ + "hex = %x1.a.c", + %{((rule hex (seq (hex "#x1") (hex "#xa") (hex "#xc"))))} + ], + "hex string (ascii range)": [ + "hex = %x68.65.78", + %{((rule hex (seq "hex")))} + ], + "hex string (mixed range)": [ + "hex = %x68.1.78", + %{((rule hex (seq "h" (hex "#x1") "x")))} + ], + "hex range": [ + "hex = %x22-40", + %{((terminal hex (range "#x22-#x40")))} + ], + "aliteration": [ + %(baz = foo / bar), + %{((rule baz (alt foo bar)))} + ], + "incremental alternatives": [ + %(ruleset = alt1 / alt2\nruleset =/ alt3\nruleset =/ alt4 / alt5), + %{((rule ruleset (alt alt1 alt2 alt3 alt4 alt5)))} + ], + "concatenated chars and ranges": [ + %(char-line = %x0D.0A %x20-7E %x0D.0A), + %{((rule char-line (seq (seq (hex "#xd") (hex "#xa")) (range "#x20-#x7e") (seq (hex "#xd") (hex "#xa")))))} + ], + "sequence group": [ + %(sequence-group = elem (foo / bar) blat), + %{((rule sequence-group (seq elem (alt foo bar) blat)))} + ], + "rept *": [ + %(rept = *A), + %{((rule rept (star A)))} + ], + "rept 0*": [ + %(rept = 0*A), + %{((rule rept (star A)))} + ], + "rept 1*": [ + %(rept = 1*A), + %{((rule rept (plus A)))} + ], + "rept 2*": [ + %(rept = 2*A), + %{((rule rept (rept 2 "*" A)))} + ], + "rept *1": [ + %(rept = *1A), + %{((rule rept (rept 0 1 A)))} + ], + "rept 0*2": [ + %(rept = 0*2A), + %{((rule rept (rept 0 2 A)))} + ], + "rept 1*3": [ + %(rept = 1*3A), + %{((rule rept (rept 1 3 A)))} + ], + "rept 3": [ + %(rept = 3A), + %{((rule rept (rept 3 3 A)))} + ], + "opt": [ + %(opt = [foo bar]), + %{((rule opt (opt (seq foo bar))))} + ], + "comment": [ + %(foo = %x61 ; a), + %{((terminal foo (hex "#x61")))} + ], + "prose-value": [ + %(prose = < free form >), + %{((rule prose (seq "< free form >")))} + ] + }.each do |title, (input, expect)| + it title do + input << "\n" unless input.end_with?("\n") + expect(parse(input).to_sxp).to produce(expect, logger) + end + end + end + + context "Case-Sensitive String Support in ABNF" do + { + "case insensitive": [ + %(rulename = %i"aBc"), + %{((terminal rulename (istr "aBc")))} + ], + "case sensitive": [ + %(rulename = %s"aBc"), + %{((rule rulename (seq "aBc")))} + ], + }.each do |title, (input, expect)| + it title do + input << "\n" unless input.end_with?("\n") + expect(parse(input).to_sxp).to produce(expect, logger) + end + end + end + + context "Core Rules" do + { + "ALPHA": [ + "builtin = ALPHA", + %{((rule builtin (seq ALPHA)) (terminal ALPHA (range "#x41-#x5A#x61-#x7A")))} + ], + "BIT": [ + "builtin = BIT", + %{((rule builtin (seq BIT)) (terminal BIT (alt "0" "1")))} + ], + "CR": [ + "builtin = CR", + %{((rule builtin (seq CR)) (terminal CR (hex "#x0D")))} + ], + "CRLF": [ + "builtin = CRLF", + %{((rule builtin (seq CRLF)) (terminal CRLF (seq (opt CR) LF)))} + ], + "CTL": [ + "builtin = CTL", + %{((rule builtin (seq CTL)) (terminal CTL (alt (range "#x00-#x1F") (hex "#x7F"))))} + ], + "DIGIT": [ + "builtin = DIGIT", + %{((rule builtin (seq DIGIT)) (terminal DIGIT (range "#x30-#x39")))} + ], + "DQUOTE": [ + "builtin = DQUOTE", + %{((rule builtin (seq DQUOTE)) (terminal DQUOTE (hex "#x22")))} + ], + "HEXDIG": [ + "builtin = HEXDIG", + %{((rule builtin (seq HEXDIG)) (terminal HEXDIG (alt DIGIT (range "A-F"))))} + ], + "HTAB": [ + "builtin = HTAB", + %{((rule builtin (seq HTAB)) (terminal HTAB (hex "#x09")))} + ], + "LF": [ + "builtin = LF", + %{((rule builtin (seq LF)) (terminal LF (hex "#x0A")))} + ], + "LWSP": [ + "builtin = LWSP", + %{((rule builtin (seq LWSP)) (terminal LWSP (star (alt WSP (seq CRLF WSP)))))} + ], + "WSP": [ + "builtin = WSP", + %{((rule builtin (seq WSP)) (terminal WSP (alt SP HTAB)))} + ], + }.each do |title, (input, expect)| + it title do + input << "\n" unless input.end_with?("\n") + expect(parse(input).to_sxp).to produce(expect, logger) + end + end + end + + context "illegal syntax" do + { + "illegal rule name": "rule.name = CRLF\n", + "no line ending": "rule.name = CRLF", + "illegal binary": "bin = %b2\n", + "illegal binary range": "bin = %b10-20\n", + "illegal decimal": "dec = %d2f\n", + "illegal decimal range": "dec = %d22-4060-80\n", + "illegal hex": "hex = %x2g\n", + "illegal hex range": "hex = %x22-4060-80\n", + }.each do |title, input| + it title do + expect {parse(input)}.to raise_error(EBNF::PEG::Parser::Error) + end + end + end + + it "parses ABNF grammar" do + gram = parse(File.open(File.expand_path("../../etc/abnf.abnf", __FILE__))) + expect(gram).to be_valid + end + + def parse(input, **options) + @debug = [] + EBNF.parse(input, debug: @debug, format: :abnf, **options) + end +end diff --git a/spec/parser_spec.rb b/spec/parser_spec.rb index 6969e0b..a2e8d8f 100644 --- a/spec/parser_spec.rb +++ b/spec/parser_spec.rb @@ -16,7 +16,9 @@ %{[18] STRING2 ::= "'" (CHAR - "'")* "'"} => %{(terminal STRING2 "18" (seq "'" (star (diff CHAR "'")) "'"))}, %([18] IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20] | UCHAR)* '>') => - %{(terminal IRIREF "18" (seq "<" (star (alt (diff (range "^<>\\\"{}|^`") (range "#x00-#x20")) UCHAR)) ">"))} + %{(terminal IRIREF "18" (seq "<" (star (alt (diff (range "^<>\\\"{}|^`") (range "#x00-#x20")) UCHAR)) ">"))}, + #%{[xx]minimal::=whitespace[yy]whitespace::=PASS} => + # %{(rule Prolog "2" (seq (opt BaseDecl) (star PrefixDecl)))}, }.each do |input, expected| it "given #{input.inspect} produces #{expected}" do expect(ebnf(:ruleParts, input).to_sxp).to produce(expected, @debug) From 5f4e8ee8ac75bb4c8e55208bad15cf858ad312de Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Mon, 6 Jul 2020 18:00:16 -0700 Subject: [PATCH 28/50] Use PEG EBNF parser rather than buggy custom parser. (degrades error reporting). --- bin/ebnf | 5 +- etc/sparql.ebnf | 4 +- etc/turtle.ebnf | 10 +- examples/ebnf-ll1-parser/parser.rb | 2 +- examples/ebnf-peg-parser/parser.rb | 4 +- lib/ebnf/abnf.rb | 3 + lib/ebnf/base.rb | 24 +- lib/ebnf/ebnf/meta.rb | 95 +++++ lib/ebnf/isoebnf.rb | 3 + lib/ebnf/parser.rb | 599 +++++++++++++++-------------- lib/ebnf/terminals.rb | 7 +- spec/abnf_spec.rb | 2 +- spec/base_spec.rb | 10 +- spec/ebnf_spec.rb | 4 +- spec/parser_spec.rb | 190 +++++---- spec/rule_spec.rb | 26 +- 16 files changed, 554 insertions(+), 434 deletions(-) create mode 100644 lib/ebnf/ebnf/meta.rb diff --git a/bin/ebnf b/bin/ebnf index c813e85..1300954 100755 --- a/bin/ebnf +++ b/bin/ebnf @@ -15,6 +15,7 @@ options = { output_format: :sxp, prefix: "ttl", namespace: "http://www.w3.org/ns/formats/Turtle#", + level: 4 } input, out = nil, STDOUT @@ -55,7 +56,7 @@ opts = GetoptLong.new(*OPT_ARGS.map {|o| o[0..-2]}) opts.each do |opt, arg| case opt - when '--debug' then options[:debug] = true + when '--debug' then options[:level] = 0 when '--bnf' then options[:bnf] = true when '--evaluate' then input = arg when '--input-format' @@ -76,7 +77,7 @@ opts.each do |opt, arg| when '--peg' then options[:peg] = true when '--prefix' then options[:prefix] = arg when '--namespace' then options[:namespace] = arg - when '--progress' then options[:progress] = true + when '--progress' then options[:level] = 1 unless options[:level] == 0 when '--validate' then options[:validate] = true when '--help' then usage end diff --git a/etc/sparql.ebnf b/etc/sparql.ebnf index 1b23a15..ed4faee 100644 --- a/etc/sparql.ebnf +++ b/etc/sparql.ebnf @@ -249,7 +249,7 @@ [142] BLANK_NODE_LABEL ::= '_:' ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)? [143] VAR1 ::= '?' VARNAME [144] VAR2 ::= '$' VARNAME - [145] LANGTAG ::= '@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)* + [145] LANGTAG ::= '@' ([a-z] | [A-Z])+ ('-' ([a-z] | [A-Z] | [0-9])+)* [146] INTEGER ::= [0-9]+ [147] DECIMAL ::= [0-9]* '.' [0-9]+ [148] DOUBLE ::= [0-9]+ '.' [0-9]* EXPONENT @@ -260,7 +260,7 @@ [152] INTEGER_NEGATIVE ::= '-' INTEGER [153] DECIMAL_NEGATIVE ::= '-' DECIMAL [154] DOUBLE_NEGATIVE ::= '-' DOUBLE - [155] EXPONENT ::= [eE] [+-]? [0-9]+ + [155] EXPONENT ::= [eE] [#x2b#x2d]? [0-9]+ [156] STRING_LITERAL1 ::= "'" ( ([^#x27#x5C#xA#xD]) | ECHAR )* "'" [157] STRING_LITERAL2 ::= '"' ( ([^#x22#x5C#xA#xD]) | ECHAR )* '"' [158] STRING_LITERAL_LONG1 ::= "'''" ( ( "'" | "''" )? ( [^'\] | ECHAR ) )* "'''" diff --git a/etc/turtle.ebnf b/etc/turtle.ebnf index da86a68..6e45726 100644 --- a/etc/turtle.ebnf +++ b/etc/turtle.ebnf @@ -30,11 +30,11 @@ [139s] PNAME_NS ::= PN_PREFIX? ":" [140s] PNAME_LN ::= PNAME_NS PN_LOCAL [141s] BLANK_NODE_LABEL ::= '_:' ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)? -[144s] LANGTAG ::= "@" [a-zA-Z]+ ( "-" [a-zA-Z0-9]+ )* -[19] INTEGER ::= [+-]? [0-9]+ -[20] DECIMAL ::= [+-]? ( ([0-9])* '.' ([0-9])+ ) -[21] DOUBLE ::= [+-]? ( [0-9]+ '.' [0-9]* EXPONENT | '.' ([0-9])+ EXPONENT | ([0-9])+ EXPONENT ) -[154s] EXPONENT ::= [eE] [+-]? [0-9]+ +[144s] LANGTAG ::= "@" ([a-z] | [A-Z])+ ( "-" ([a-z] | [A-Z] | [0-9])+ )* +[19] INTEGER ::= [#x2b#x2d]? [0-9]+ +[20] DECIMAL ::= [#x2b#x2d]? ( ([0-9])* '.' ([0-9])+ ) +[21] DOUBLE ::= [#x2b#x2d]? ( [0-9]+ '.' [0-9]* EXPONENT | '.' ([0-9])+ EXPONENT | ([0-9])+ EXPONENT ) +[154s] EXPONENT ::= [eE] [#x2b#x2d]? [0-9]+ [22] STRING_LITERAL_QUOTE ::= '"' ( [^#x22#x5C#xA#xD] | ECHAR | UCHAR )* '"' [23] STRING_LITERAL_SINGLE_QUOTE ::= "'" ( [^#x27#x5C#xA#xD] | ECHAR | UCHAR )* "'" [24] STRING_LITERAL_LONG_SINGLE_QUOTE ::= "'''" ( ( "'" | "''" )? ( [^'\] | ECHAR | UCHAR ) )* "'''" diff --git a/examples/ebnf-ll1-parser/parser.rb b/examples/ebnf-ll1-parser/parser.rb index 0ea4ff5..f2c6ce5 100644 --- a/examples/ebnf-ll1-parser/parser.rb +++ b/examples/ebnf-ll1-parser/parser.rb @@ -67,7 +67,7 @@ def inspect # # [13] HEX ::= '#x' ([a-f] | [A-F] | [0-9])+ terminal(:HEX, HEX) do |prod, token, input| - input[:terminal] = token.value + input[:terminal] = [:hex, token.value] end # Terminal for `ENUM` is matched as part of a `primary` rule. Unescape the values to remove EBNF escapes in the input. diff --git a/examples/ebnf-peg-parser/parser.rb b/examples/ebnf-peg-parser/parser.rb index ac573ac..52c2b90 100644 --- a/examples/ebnf-peg-parser/parser.rb +++ b/examples/ebnf-peg-parser/parser.rb @@ -47,7 +47,9 @@ class EBNFPegParser # Match `HEX` terminal # # [13] HEX ::= #x' ([a-f] | [A-F] | [0-9])+ - terminal(:HEX, HEX) + terminal(:HEX, HEX) do |value| + [:hex, value] + end # Terminal for `ENUM` is matched as part of a `primary` rule. # diff --git a/lib/ebnf/abnf.rb b/lib/ebnf/abnf.rb index be98fdd..ac06d8c 100644 --- a/lib/ebnf/abnf.rb +++ b/lib/ebnf/abnf.rb @@ -1,5 +1,6 @@ require_relative 'abnf/core' require_relative 'abnf/meta' +require 'logger' # ABNF parser # Parses ABNF into an array of {EBNF::Rule}. @@ -250,6 +251,8 @@ def initialize(input, **options) ABNFMeta::RULES, # PEG rules whitespace: '', # No implicit whitespace **options) + rescue EBNF::PEG::Parser::Error => e + raise SyntaxError, e.message end ## diff --git a/lib/ebnf/base.rb b/lib/ebnf/base.rb index e3dcc5a..93ba533 100644 --- a/lib/ebnf/base.rb +++ b/lib/ebnf/base.rb @@ -84,7 +84,6 @@ module EBNF class Base include BNF include LL1 - include Parser include PEG # Abstract syntax tree from parse @@ -121,27 +120,8 @@ def initialize(input, format: :ebnf, **options) abnf = ABNF.new(input, **options) @ast = abnf.ast when :ebnf - scanner = StringScanner.new(input) - - eachRule(scanner) do |r| - debug("rule string") {r.inspect} - case r - when /^@terminals/ - # Switch mode to parsing terminals - terminal = true - when /^@pass\s*(.*)$/m - expr = expression($1).first - rule = Rule.new(nil, nil, expr, kind: :pass, ebnf: self) - rule.orig = expr - @ast << rule - else - rule = depth {ruleParts(r)} - - rule.kind = :terminal if terminal # Override after we've parsed @terminals - rule.orig = r - @ast << rule - end - end + ebnf = Parser.new(input, **options) + @ast = ebnf.ast when :isoebnf iso = ISOEBNF.new(input, **options) @ast = iso.ast diff --git a/lib/ebnf/ebnf/meta.rb b/lib/ebnf/ebnf/meta.rb new file mode 100644 index 0000000..209136a --- /dev/null +++ b/lib/ebnf/ebnf/meta.rb @@ -0,0 +1,95 @@ +# This file is automatically generated by ebnf version 2.0.0 +# Derived from ../etc/ebnf.ebnf +module EBNFMeta + RULES = [ + EBNF::Rule.new(:ebnf, "1", [:star, :_ebnf_1]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_ebnf_1, "1.1", [:alt, :declaration, :rule]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:declaration, "2", [:alt, "@terminals", :pass]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:rule, "3", [:seq, :LHS, :expression]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:expression, "4", [:seq, :alt]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:alt, "5", [:seq, :seq, :_alt_1]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_alt_1, "5.1", [:star, :_alt_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_alt_2, "5.2", [:seq, "|", :seq]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:seq, "6", [:plus, :diff]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:diff, "7", [:seq, :postfix, :_diff_1]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_diff_1, "7.1", [:opt, :_diff_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_diff_2, "7.2", [:seq, "-", :postfix]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:postfix, "8", [:seq, :primary, :_postfix_1]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_postfix_1, "8.1", [:opt, :POSTFIX]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:primary, "9", [:alt, :HEX, :SYMBOL, :ENUM, :O_ENUM, :RANGE, :O_RANGE, :STRING1, :STRING2, :_primary_1]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_primary_1, "9.1", [:seq, "(", :expression, ")"]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:pass, "10", [:seq, "@pass", :expression]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:LHS, "11", [:seq, :_LHS_1, :SYMBOL, :_LHS_2, "::="], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_LHS_1, "11.1", [:opt, :_LHS_3], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_LHS_3, "11.3", [:seq, "[", :SYMBOL, "]", :_LHS_4], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_LHS_4, "11.4", [:plus, " "], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_LHS_2, "11.2", [:star, " "], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:SYMBOL, "12", [:plus, :_SYMBOL_1], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_SYMBOL_1, "12.1", [:alt, :_SYMBOL_2, :_SYMBOL_3, :_SYMBOL_4, "_", "."], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_SYMBOL_2, "12.2", [:range, "a-z"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_SYMBOL_3, "12.3", [:range, "A-Z"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_SYMBOL_4, "12.4", [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:HEX, "13", [:seq, "#x", :_HEX_1], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_HEX_1, "13.1", [:plus, :_HEX_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_HEX_2, "13.2", [:alt, :_HEX_3, :_HEX_4, :_HEX_5], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_HEX_3, "13.3", [:range, "a-f"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_HEX_4, "13.4", [:range, "A-F"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_HEX_5, "13.5", [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:ENUM, "14", [:diff, :_ENUM_1, :LHS], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_ENUM_1, "14.1", [:alt, :_ENUM_2, :_ENUM_3], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_ENUM_2, "14.2", [:seq, "[", :_ENUM_4], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_ENUM_4, "14.4", [:plus, :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_ENUM_3, "14.3", [:seq, :_ENUM_5, "]"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_ENUM_5, "14.5", [:plus, :HEX], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:O_ENUM, "15", [:alt, :_O_ENUM_1, :_O_ENUM_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_ENUM_1, "15.1", [:seq, "[^", :_O_ENUM_3], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_ENUM_3, "15.3", [:plus, :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_ENUM_2, "15.2", [:seq, :_O_ENUM_4, "]"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_ENUM_4, "15.4", [:plus, :HEX], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:RANGE, "16", [:seq, "[", :_RANGE_1, "]"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_1, "16.1", [:alt, :_RANGE_2, :_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_2, "16.2", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_3, "16.3", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:O_RANGE, "17", [:seq, "[^", :_O_RANGE_1, "]"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_1, "17.1", [:alt, :_O_RANGE_2, :_O_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_2, "17.2", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_3, "17.3", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:STRING1, "18", [:seq, "\"", :_STRING1_1, "\""], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_STRING1_1, "18.1", [:star, :_STRING1_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_STRING1_2, "18.2", [:diff, :CHAR, "\""], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:STRING2, "19", [:seq, "'", :_STRING2_1, "'"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_STRING2_1, "19.1", [:star, :_STRING2_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_STRING2_2, "19.2", [:diff, :CHAR, "'"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:CHAR, "20", [:alt, :_CHAR_1, :_CHAR_2, :_CHAR_3, :_CHAR_4], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_CHAR_1, "20.1", [:range, "#x9#xA#xD"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_CHAR_2, "20.2", [:range, "#x20-#xD7FF"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_CHAR_3, "20.3", [:range, "#xE000-#xFFFD"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_CHAR_4, "20.4", [:range, "#x10000-#x10FFFF"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:POSTFIX, "22", [:range, "?*+"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:PASS, "23", [:plus, :_PASS_1], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_1, "23.1", [:alt, :_PASS_2, :_PASS_3, :_PASS_4, :_PASS_5], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_2, "23.2", [:range, "#x9#xA#xD#x20"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_3, "23.3", [:seq, :_PASS_6, :_PASS_7], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_6, "23.6", [:alt, :_PASS_8, "//"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_8, "23.8", [:diff, "#", "#x"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_7, "23.7", [:star, :_PASS_9], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_9, "23.9", [:range, "^#xA#xD"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_4, "23.4", [:seq, "/*", :_PASS_10, "*/"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_10, "23.10", [:star, :_PASS_11], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_11, "23.11", [:alt, :_PASS_12, :_PASS_13], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_12, "23.12", [:opt, :_PASS_14], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_14, "23.14", [:seq, "*", :_PASS_15], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_15, "23.15", [:range, "^/"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_13, "23.13", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_5, "23.5", [:seq, "(*", :_PASS_16, "*)"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_16, "23.16", [:star, :_PASS_17], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_17, "23.17", [:alt, :_PASS_18, :_PASS_19], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_18, "23.18", [:opt, :_PASS_20], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_20, "23.20", [:seq, "*", :_PASS_21], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_21, "23.21", [:range, "^)"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_19, "23.19", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_pass, nil, [:seq, :PASS], kind: :pass).extend(EBNF::PEG::Rule), + ] +end + diff --git a/lib/ebnf/isoebnf.rb b/lib/ebnf/isoebnf.rb index 98661e2..36fe4c3 100644 --- a/lib/ebnf/isoebnf.rb +++ b/lib/ebnf/isoebnf.rb @@ -1,4 +1,5 @@ require_relative 'isoebnf/meta' +require 'logger' # ABNF parser # Parses ABNF into an array of {EBNF::Rule}. @@ -221,6 +222,8 @@ def initialize(input, **options, &block) end @ast << rule if rule end + rescue EBNF::PEG::Parser::Error => e + raise SyntaxError, e.message end end end diff --git a/lib/ebnf/parser.rb b/lib/ebnf/parser.rb index 360b2c8..62d4460 100644 --- a/lib/ebnf/parser.rb +++ b/lib/ebnf/parser.rb @@ -1,318 +1,321 @@ +require_relative 'ebnf/meta' +require 'logger' + module EBNF - module Parser - ## - # Iterate over rule strings. - # a line that starts with '\[' or '@' starts a new rule - # - # @param [StringScanner] scanner - # @yield rule_string - # @yieldparam [String] rule_string - def eachRule(scanner) - cur_lineno = 1 - r = '' - until scanner.eos? - case - when s = scanner.scan(%r(\s+)m) - # Eat whitespace - cur_lineno += s.count("\n") - #debug("eachRule(ws)") { "[#{cur_lineno}] #{s.inspect}" } - when s = scanner.scan(%r(/\*([^\*]|\*[^\/])*\*/)m) - # Eat comments /* .. */ - cur_lineno += s.count("\n") - debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" } - when s = scanner.scan(%r(\(\*([^\*]|\*[^\)])*\*\))m) - # Eat comments (* .. *) - cur_lineno += s.count("\n") - debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" } - when s = scanner.scan(%r((#(?!x)|//).*$)) - # Eat comments // & # - cur_lineno += s.count("\n") - debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" } - when s = scanner.scan(/\A["']/) - # Found a quote, scan until end of matching quote - s += scanner.scan_until(/#{scanner.matched}|$/) - r += s - when s = scanner.scan(%r(^@terminals)) - #debug("eachRule(@terminals)") { "[#{cur_lineno}] #{s.inspect}" } - yield(r) unless r.empty? - @lineno = cur_lineno - yield(s) - r = '' - when s = scanner.scan(/@pass/) - # Found rule start, if we've already collected a rule, yield it - #debug("eachRule(@pass)") { "[#{cur_lineno}] #{s.inspect}" } - yield r unless r.empty? - @lineno = cur_lineno - r = s - when s = scanner.scan(EBNF::Terminals::LHS) - # Found rule start, if we've already collected a rule, yield it - yield r unless r.empty? - #debug("eachRule(rule)") { "[#{cur_lineno}] #{s.inspect}" } - @lineno = cur_lineno - r = s - else - # Collect until end of line, or start of comment or quote - s = scanner.scan_until(%r{(?:[/\(]\*)|#(?!x)|//|["']|$}) - if scanner.matched.length > 0 - # Back up scan head before ending match - scanner.pos = scanner.pos - scanner.matched.length + class Parser + include EBNF::PEG::Parser + include EBNF::Terminals - # Remove matched from end of string - s = s[0..-(scanner.matched.length+1)] - end - cur_lineno += s.count("\n") - #debug("eachRule(rest)") { "[#{cur_lineno}] #{s.inspect}" } - r += s - end - end - yield r unless r.empty? + # Abstract syntax tree from parse + # + # @return [Array] + attr_reader :ast + + # ## Terminals + # Define rules for Terminals, placing results on the input stack, making them available to upstream non-Terminal rules. + # + # Terminals are defined with a symbol matching the associated rule name, and an optional (although strongly encouraged) regular expression used to match the head of the input stream. + # + # The result of the terminal block is the semantic value of that terminal, which if often a string, but may be any instance which reflects the semantic interpretation of that terminal. + # + # The `value` parameter is the value matched by the regexp, if defined, or by the sub-terminal rules otherwise. + # + # The `prod` parameter is the name of the parent rule for which this terminal is matched, which may have a bearing in some circumstances, although not used in this example. + # + # If no block is provided, then the value which would have been passed to the block is used as the result directly. + + # Match the Left hand side of a rule or terminal + # + # [11] LHS ::= ('[' SYMBOL+ ']' ' '+)? SYMBOL ' '* '::=' + terminal(:LHS, LHS) do |value, prod| + value.to_s.scan(/(?:\[([^\]]+)\])?\s*(\w+)\s*::=/).first end - - ## - # Parse a rule into an optional rule number, a symbol and an expression - # - # @param [String] rule - # @return [Rule] - def ruleParts(rule) - num_sym, expr = rule.split('::=', 2).map(&:strip) - num, sym = num_sym.split(']', 2).map(&:strip) - num, sym = "", num if sym.nil? - num = num[1..-1] - r = Rule.new(sym && sym.to_sym, num, expression(expr).first, ebnf: self) - debug("ruleParts") { r.inspect } - r + + # Match `SYMBOL` terminal + # + # [12] SYMBOL ::= ([a-z] | [A-Z] | [0-9] | '_' | '.')+ + terminal(:SYMBOL, SYMBOL) do |value| + value.to_sym end - ## - # Parse a string into an expression tree and a remaining string - # - # @example - # >>> expression("a b c") - # ((seq a b c) '') - # - # >>> expression("a? b+ c*") - # ((seq (opt a) (plus b) (star c)) '') - # - # >>> expression(" | x xlist") - # ((alt (seq) (seq x xlist)) '') - # - # >>> expression("a | (b - c)") - # ((alt a (diff b c)) '') - # - # >>> expression("a b | c d") - # ((alt (seq a b) (seq c d)) '') - # - # >>> expression("a | b | c") - # ((alt a b c) '') - # - # >>> expression("a) b c") - # (a ' b c') - # - # >>> expression("BaseDecl? PrefixDecl*") - # ((seq (opt BaseDecl) (star PrefixDecl)) '') - # - # >>> expression("NCCHAR1 | diff | [0-9] | #x00B7 | [#x0300-#x036F] | \[#x203F-#x2040\]") - # ((alt NCCHAR1 diff - # (range '0-9') - # (hex '#x00B7') - # (range '#x0300-#x036F') - # (range, '#x203F-#x2040')) '') - # - # @param [String] s - # @return [Array] - def expression(s) - debug("expression") {"(#{s.inspect})"} - e, s = depth {alt(s)} - debug {"=> alt returned #{[e, s].inspect}"} - unless s.to_s.empty? - t, ss = depth {terminal(s)} - debug {"=> terminal returned #{[t, ss].inspect}"} - return [e, ss] if t.is_a?(Array) && t.first == :")" - end - [e, s] + # Match `HEX` terminal + # + # [13] HEX ::= #x' ([a-f] | [A-F] | [0-9])+ + terminal(:HEX, HEX) do |value| + [:hex, value] end - - ## - # Parse alt - # >>> alt("a | b | c") - # ((alt a b c) '') - # @param [String] s - # @return [Array] - def alt(s) - debug("alt") {"(#{s.inspect})"} - args = [] - while !s.to_s.empty? - e, s = depth {seq(s)} - debug {"=> seq returned #{[e, s].inspect}"} - if e.to_s.empty? - break unless args.empty? - e = [:seq, []] # empty sequence - end - args << e - unless s.to_s.empty? - t, ss = depth {terminal(s)} - break unless t[0] == :alt - s = ss - end - end - args.length > 1 ? [args.unshift(:alt), s] : [e, s] + + # Terminal for `ENUM` is matched as part of a `primary` rule. + # + # [14] ENUM ::= ('[' R_CHAR+ | HEX+ ']') - LHS + terminal(:ENUM, ENUM) do |value| + [:range, value[1..-2]] end - - ## - # parse seq - # - # >>> seq("a b c") - # ((seq a b c) '') - # - # >>> seq("a b? c") - # ((seq a (opt b) c) '') - def seq(s) - debug("seq") {"(#{s.inspect})"} - args = [] - while !s.to_s.empty? - e, ss = depth {diff(s)} - debug {"=> diff returned #{[e, ss].inspect}"} - unless e.to_s.empty? - args << e - s = ss - else - break; - end - end - if args.length > 1 - [args.unshift(:seq), s] - elsif args.length == 1 - args + [s] + + # Terminal for `O_ENUM` is matched as part of a `primary` rule. + # + # [15] O_ENUM ::= '[^' R_CHAR+ | HEX+ ']' + terminal(:O_ENUM, O_ENUM) do |value| + [:range, value[1..-2]] + end + + # Terminal for `RANGE` is matched as part of a `primary` rule. + # + # [16] `RANGE` ::= '[' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']' + terminal(:RANGE, RANGE) do |value| + [:range, value[1..-2]] + end + + # Terminal for `O_RANGE` is matched as part of a `primary` rule. + # + # [17] O_RANGE ::= '[^' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']' + terminal(:O_RANGE, O_RANGE) do |value| + [:range, value[1..-2]] + end + + # Match double quote string + # + # [18] STRING1 ::= '"' (CHAR - '"')* '"' + terminal(:STRING1, STRING1) do |value| + value[1..-2] + end + + # Match single quote string + # + # [19] STRING2 ::= "'" (CHAR - "'")* "'" + terminal(:STRING2, STRING2) do |value| + value[1..-2] + end + + # The `CHAR` and `R_CHAR` productions are not used explicitly + + # Match `POSTFIX` terminal + # + # [22] POSTFIX ::= [?*+] + terminal(:POSTFIX, POSTFIX) + + # The `PASS` productions is not used explicitly + + # ## Non-terminal productions + # Define productions for non-Termainals. This can include `start_production` as well as `production` to hook into rule start and end. In some cases, we need to use sub-productions as generated when turning EBNF into PEG. + # + # Productions are defined with a symbol matching the associated rule name. + # + # The result of the productions is typically the abstract syntax tree matched by the rule, so far, but could be a specific semantic value, or could be ignored with the result being returned via the `callback`. + # + # The `value` parameter is the result returned from child productions + # + # The `data` parameter other data which may be returned by child productions placing information onto their input (unused in this example). + # + # The `callback` parameter provides access to a callback defined in the call to `parse`). + + # Production for end of `declaration` non-terminal. + # + # Look for `@terminals` to change parser state to parsing terminals. + # + # Clears the packrat parser when called. + # + # `@pass` is ignored here. + # + # [2] declaration ::= '@terminals' | pass + production(:declaration, clear_packrat: true) do |value, data, callback| + # value contains a declaration. + # Invoke callback + callback.call(:terminal) if value == '@terminals' + nil + end + + # Production for end of `rule` non-terminal. + # + # By setting `as_hash: true` in the `start_production`, the `value` parameter will be in the form `{LHS: "v", expression: "v"}`. Otherwise, it would be expressed using an array of hashes of the form `[{LHS: "v"}, {expression: "v"}]`. + # + # Clears the packrat parser when called. + # + # Create rule from expression value and pass to callback + # + # [3] rule ::= LHS expression + start_production(:rule, as_hash: true) + production(:rule, clear_packrat: true) do |value, data, callback| + # value contains an expression. + # Invoke callback + id, sym = value[:LHS] + expression = value[:expression] + callback.call(:rule, EBNF::Rule.new(sym.to_sym, id, expression)) + nil + end + + # Production for end of `expression` non-terminal. + # Passes through the optimized value of the alt production as follows: + # + # The `value` parameter, is of the form `[{alt: "v"}]`. + # + # [:alt foo] => foo + # [:alt foo bar] => [:alt foo bar] + # + # [4] expression ::= alt + production(:expression) do |value| + value.first[:alt] + end + + # Production for end of `alt` non-terminal. + # Passes through the optimized value of the seq production as follows: + # + # The `value` parameter, is of the form `{seq: "v", _alt_1: "v"}`. + # + # [:seq foo] => foo + # [:seq foo bar] => [:seq foo bar] + # + # Note that this also may just pass through from `_alt_1` + # + # [5] alt ::= seq ('|' seq)* + start_production(:alt, as_hash: true) + production(:alt) do |value| + if value[:_alt_1].length > 0 + [:alt, value[:seq]] + value[:_alt_1] else - ["", s] + value[:seq] end end - - ## - # parse diff - # - # >>> diff("a - b") - # ((diff a b) '') - def diff(s) - debug("diff") {"(#{s.inspect})"} - e1, s = depth {postfix(s)} - debug {"=> postfix returned #{[e1, s].inspect}"} - unless e1.to_s.empty? - unless s.to_s.empty? - t, ss = depth {terminal(s)} - debug {"diff #{[t, ss].inspect}"} - if t.is_a?(Array) && t.first == :diff - s = ss - e2, s = primary(s) - unless e2.to_s.empty? - return [[:diff, e1, e2], s] - else - error("diff", "Syntax Error") - raise SyntaxError, "diff missing second operand" - end - end - end - end - [e1, s] + + # Production for end of `_alt_1` non-terminal. + # Used to collect the `('|' seq)*` portion of the `alt` non-terminal: + # + # The `value` parameter, is of the form `[{seq: ["v"]}]`. + # + # [5] _alt_1 ::= ('|' seq)* + production(:_alt_1) do |value| + value.map {|a1| a1.last[:seq]}.compact # Get rid of '|' end - - ## - # parse postfix - # - # >>> postfix("a b c") - # (a ' b c') - # - # >>> postfix("a? b c") - # ((opt, a) ' b c') - def postfix(s) - debug("postfix") {"(#{s.inspect})"} - e, s = depth {primary(s)} - debug {"=> primary returned #{[e, s].inspect}"} - return ["", s] if e.to_s.empty? - if !s.to_s.empty? - t, ss = depth {terminal(s)} - debug {"=> #{[t, ss].inspect}"} - if t.is_a?(Array) && [:opt, :star, :plus].include?(t.first) - return [[t.first, e], ss] - end - end - [e, s] + + # Production for end of `seq` non-terminal. + # Passes through the optimized value of the `diff` production as follows: + # + # The `value` parameter, is an array of values, which cannot be empty. + # + # [:diff foo] => foo + # [:diff foo bar] => [:diff foo bar] + # + # Note that this also may just pass through from `_seq_1` + # + # [6] seq ::= diff+ + production(:seq) do |value| + value.length == 1 ? value.first : ([:seq] + value) end - ## - # parse primary - # - # >>> primary("a b c") - # (a ' b c') - def primary(s) - debug("primary") {"(#{s.inspect})"} - t, s = depth {terminal(s)} - debug {"=> terminal returned #{[t, s].inspect}"} - if t.is_a?(Symbol) || t.is_a?(String) - [t, s] - elsif %w(range hex).map(&:to_sym).include?(t.first) - [t, s] - elsif t.first == :"(" - e, s = depth {expression(s)} - debug {"=> expression returned #{[e, s].inspect}"} - [e, s] + # `Diff` production returns concatenated postfix values + # + # The `value` parameter, is of the form `{postfix: "v", _diff_1: "v"}`. + # + # [7] diff ::= postfix ('-' postfix)? + start_production(:diff, as_hash: true) + production(:diff) do |value| + if value[:_diff_1] + [:diff, value[:postfix], value[:_diff_1]] else - ["", s] + value[:postfix] end end - - ## - # parse one terminal; return the terminal and the remaining string - # - # A terminal is represented as a tuple whose 1st item gives the type; - # some types have additional info in the tuple. - # - # @example - # >>> terminal("'abc' def") - # ('abc' ' def') - # - # >>> terminal("[0-9]") - # ((range '0-9') '') - # >>> terminal("#x00B7") - # ((hex '#x00B7') '') - # >>> terminal ("\[#x0300-#x036F\]") - # ((range '#x0300-#x036F') '') - # >>> terminal("\[^<>'{}|^`\]-\[#x00-#x20\]") - # ((range "^<>'{}|^`") '-\[#x00-#x20\]') - def terminal(s) - s = s.strip - #STDERR.puts s.inspect - case m = s[0,1] - when '"', "'" # STRING1 or STRING2 - l, s = s[1..-1].split(m.rstrip, 2) - [LL1::Lexer.unescape_string(l), s] - when '[' # RANGE, O_RANGE - l, s = s[1..-1].split(/(?<=[^\\])\]/, 2) - [[:range, LL1::Lexer.unescape_string(l)], s] - when '#' # HEX - s.match(/(#x\h+)(.*)$/) - l, s = $1, $2 - [[:hex, l], s] - when /[\w\.]/ # SYMBOL - s.match(/([\w\.]+)(.*)$/) - l, s = $1, $2 - [l.to_sym, s] - when '-' - [[:diff], s[1..-1]] - when '?' - [[:opt], s[1..-1]] - when '|' - [[:alt], s[1..-1]] - when '+' - [[:plus], s[1..-1]] - when '*' - [[:star], s[1..-1]] - when /[\(\)]/ # '(' or ')' - [[m.to_sym], s[1..-1]] - else - error("terminal", "unrecognized terminal: #{s.inspect}") - raise SyntaxError, "unrecognized terminal: #{s.inspect}" + + production(:_diff_1) do |value| + value.last[:postfix] if value + end + + # Production for end of `postfix` non-terminal. + # Either returns the `primary` production value, or as modified by the `postfix`. + # + # The `value` parameter, is of the form `{primary: "v", _postfix_1: "v"}`. + # + # [:primary] => [:primary] + # [:primary, '*'] => [:star, :primary] + # [:primary, '+'] => [:plus, :primary] + # [:primary, '?'] => [:opt, :primary] + # + # [8] postfix ::= primary POSTFIX? + start_production(:postfix, as_hash: true) + production(:postfix) do |value| + # Push result onto input stack, as the `diff` production can have some number of `postfix` values that are applied recursively + case value[:_postfix_1] + when "*" then [:star, value[:primary]] + when "+" then [:plus, value[:primary]] + when "?" then [:opt, value[:primary]] + else value[:primary] + end + end + + # Production for end of `primary` non-terminal. + # Places `:primary` on the stack + # + # The `value` parameter, is either a string (for a terminal) or an array of the form `['(': '(', expression: "v", ')', ')']`. + # + # This may either be a terminal, or the result of an `expression`. + # + # [9] primary ::= HEX + # | SYMBOL + # | RANGE + # | ENUM + # | O_RANGE + # | O_ENUM + # | STRING1 + # | STRING2 + # | '(' expression ')' + production(:primary) do |value| + Array(value).length > 2 ? value[1][:expression] : value + end + + # Production for end of pass non-terminal. + # + # [10] pass ::= '@pass' expression + production(:pass) do |value, data, callback| + # Invoke callback + callback.call(:pass, value.last[:expression]) + end + + # ## Parser invocation. + # On start, yield ourselves if a block is given, otherwise, return this parser instance + # + # @param [#read, #to_s] input + # @param [Hash{Symbol => Object}] options + # @option options [Boolean] :level + # Trace level. 0(debug), 1(info), 2(warn), 3(error). + # @return [EBNFParser] + def initialize(input, **options, &block) + # If the `level` option is set, instantiate a logger for collecting trace information. + if options.has_key?(:level) + options[:logger] = Logger.new(STDERR) + options[:logger].level = options[:level] + options[:logger].formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"} + end + + # Read input, if necessary, which will be used in a Scanner. + @input = input.respond_to?(:read) ? input.read : input.to_s + + parsing_terminals = false + @ast = [] + parse(@input, :ebnf, EBNFMeta::RULES, + # Use an optimized Regexp for whitespace + whitespace: EBNF::Terminals::PASS, + **options + ) do |context, *data| + rule = case context + when :terminal + # After parsing `@terminals` + # This changes the state of the parser to treat subsequent rules as terminals. + parsing_terminals = true + next + when :pass + # After parsing `@pass` + # This defines a specific rule for whitespace. + rule = EBNF::Rule.new(nil, nil, data.first, kind: :pass) + when :rule + # A rule which has already been turned into a `Rule` object. + rule = data.first + rule.kind = :terminal if parsing_terminals + rule + end + @ast << rule if rule end + rescue EBNF::PEG::Parser::Error => e + raise SyntaxError, e.message end end end \ No newline at end of file diff --git a/lib/ebnf/terminals.rb b/lib/ebnf/terminals.rb index 295fecd..9f607bc 100644 --- a/lib/ebnf/terminals.rb +++ b/lib/ebnf/terminals.rb @@ -15,5 +15,10 @@ module EBNF::Terminals STRING1 = %r("[\u0009\u000A\u000D\u0020\u0021\u0023-\uD7FF\u{10000}-\u{10FFFF}]*")u.freeze STRING2 = %r('[\u0009\u000A\u000D\u0020-\u0026\u0028-\uD7FF\u{10000}-\u{10FFFF}]*')u.freeze POSTFIX = %r([?*+])u.freeze - PASS = %r((\s|(?:(#[^x]|//)[^\n\r]*$)|(?:/\*(?:(?:\*[^/])|[^*])*\*/))+)mu.freeze + PASS = %r(( + \s + | (?:(?:\#[^x]|//)[^\n\r]*) + | (?:/\*(?:(?:\*[^/])|[^*])*\*/) + | (?:\(\*(?:(?:\*[^\)])|[^*])*\*\)) + )+)xmu.freeze end diff --git a/spec/abnf_spec.rb b/spec/abnf_spec.rb index 115bcd6..c10f883 100644 --- a/spec/abnf_spec.rb +++ b/spec/abnf_spec.rb @@ -228,7 +228,7 @@ "illegal hex range": "hex = %x22-4060-80\n", }.each do |title, input| it title do - expect {parse(input)}.to raise_error(EBNF::PEG::Parser::Error) + expect {parse(input)}.to raise_error(SyntaxError) end end end diff --git a/spec/base_spec.rb b/spec/base_spec.rb index 22d9175..2e5b6e7 100644 --- a/spec/base_spec.rb +++ b/spec/base_spec.rb @@ -14,8 +14,8 @@ %{((rule Prolog "2" (seq (opt BaseDecl) (star PrefixDecl))))}, %{ @terminals - [3] terminal ::= [A-Z_]+ - } => %{((terminal terminal "3" (plus (range "A-Z_"))))}, + [3] terminal ::= [A-Z]+ + } => %{((terminal terminal "3" (plus (range "A-Z"))))}, %{ [9] primary ::= HEX | RANGE @@ -33,8 +33,8 @@ %{((rule SolutionModifier "18" (seq _SolutionModifier_1 _SolutionModifier_2)))}, %{[18.1] _SolutionModifier_1 ::= _empty | GroupClause} => %{((rule _SolutionModifier_1 "18.1" (alt _empty GroupClause)))}, - %q{[18] STRING1 ::= '"' (CHAR | [\t\'\[\]\(\)\-])* '"'} => - %q{((terminal STRING1 "18" (seq "\"" (star (alt CHAR (range "\t'[]()-"))) "\"")))}, + %q{[18] STRING1 ::= '"' (CHAR - '"')* '"'} => + %q{((terminal STRING1 "18" (seq "\"" (star (diff CHAR "\"")) "\"")))}, %q{[161s] WS ::= #x20 | #x9 | #xD | #xA} => %q{((terminal WS "161s" (alt (hex "#x20") (hex "#x9") (hex "#xD") (hex "#xA"))))}, %q{[1] shexDoc ::= directive* # leading CODE} => @@ -47,7 +47,7 @@ %q{((rule shexDoc "1" (star directive)))}, %q{[1] shexDoc ::= /* leading CODE */ directive*} => %q{((rule shexDoc "1" (star directive)))}, - %q{[1] shexDoc (* leading CODE *) ::= directive*} => + %q{[1] shexDoc ::= (* leading CODE *) directive*} => %q{((rule shexDoc "1" (star directive)))}, %q{[156s] STRING_LITERAL1 ::= "'" ([^#x27#x5C#xA#xD] | ECHAR | UCHAR)* "'" /* #x27=' #x5C=\ #xA=new line #xD=carriage return */} => %q{((terminal STRING_LITERAL1 "156s" diff --git a/spec/ebnf_spec.rb b/spec/ebnf_spec.rb index c12cf75..e5d1877 100644 --- a/spec/ebnf_spec.rb +++ b/spec/ebnf_spec.rb @@ -11,8 +11,8 @@ %{((rule Prolog "2" (seq (opt BaseDecl) (star PrefixDecl))))}, %{ @terminals - [3] terminal ::= [A-Z_]+ - } => %{((terminal terminal "3" (plus (range "A-Z_"))))}, + [3] terminal ::= [A-Z]+ + } => %{((terminal terminal "3" (plus (range "A-Z"))))}, %{ [9] primary ::= HEX | RANGE diff --git a/spec/parser_spec.rb b/spec/parser_spec.rb index a2e8d8f..28a9a10 100644 --- a/spec/parser_spec.rb +++ b/spec/parser_spec.rb @@ -4,111 +4,141 @@ require 'ebnf' require 'sxp' -describe EBNF::Base do - describe "#ruleParts" do +describe EBNF::Parser do + let(:logger) {RDF::Spec.logger} + after(:each) do |example| + puts logger.to_s if example.exception && !example.exception.is_a?(RSpec::Expectations::ExpectationNotMetError) + end + + context "rule variations" do { - %{[2] Prolog ::= BaseDecl? PrefixDecl*} => - %{(rule Prolog "2" (seq (opt BaseDecl) (star PrefixDecl)))}, - %{[2] declaration ::= '@terminals' | '@pass'} => - %{(rule declaration "2" (alt "@terminals" "@pass"))}, - %{[9] postfix ::= primary ( [?*+] )?} => - %{(rule postfix "9" (seq primary (opt (range "?*+"))))}, - %{[18] STRING2 ::= "'" (CHAR - "'")* "'"} => - %{(terminal STRING2 "18" (seq "'" (star (diff CHAR "'")) "'"))}, - %([18] IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20] | UCHAR)* '>') => - %{(terminal IRIREF "18" (seq "<" (star (alt (diff (range "^<>\\\"{}|^`") (range "#x00-#x20")) UCHAR)) ">"))}, - #%{[xx]minimal::=whitespace[yy]whitespace::=PASS} => - # %{(rule Prolog "2" (seq (opt BaseDecl) (star PrefixDecl)))}, - }.each do |input, expected| - it "given #{input.inspect} produces #{expected}" do - expect(ebnf(:ruleParts, input).to_sxp).to produce(expected, @debug) + "legal rule name": [ + 'rulename ::= "foo"', + %{((rule rulename (seq "foo")))} + ], + "prolog": [ + %{[2] Prolog ::= BaseDecl? PrefixDecl*}, + %{((rule Prolog "2" (seq (opt BaseDecl) (star PrefixDecl))))} + ], + "aliteration": [ + %{[2] declaration ::= '@terminals' | '@pass'}, + %{((rule declaration "2" (alt "@terminals" "@pass")))}, + ], + "posfix": [ + %{[9] postfix ::= primary ( [?*+] )?}, + %{((rule postfix "9" (seq primary (opt (range "?*+")))))}, + ], + "diff": [ + %{[18] STRING2 ::= "'" (CHAR - "'")* "'"}, + %{((terminal STRING2 "18" (seq "'" (star (diff CHAR "'")) "'")))}, + ], + "IRIREF": [ + %([18] IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20] | UCHAR)* '>'), + %{((terminal IRIREF "18" + (seq "<" + (star + (alt + (diff (range "^<>\\\"{}|^`") (range "#x00-#x20")) + UCHAR)) + ">")))}, + ], + "minimal whitespace": [ + %{[xx]minimal::=whitespace[yy]whitespace::=PASS}, + %{((rule minimal "xx" (seq whitespace (range "yy"))) + (rule whitespace (seq PASS)))} + ] + }.each do |title, (input, expect)| + it title do + expect(parse(input).to_sxp).to produce(expect, logger) end end context "without rule identifiers" do { - %{Prolog ::= BaseDecl? PrefixDecl*} => - %{(rule Prolog (seq (opt BaseDecl) (star PrefixDecl)))}, - %{declaration ::= '@terminals' | '@pass'} => - %{(rule declaration (alt "@terminals" "@pass"))}, - %{postfix ::= primary ( [?*+] )?} => - %{(rule postfix (seq primary (opt (range "?*+"))))}, - %{STRING2 ::= "'" (CHAR - "'")* "'"} => - %{(terminal STRING2 (seq "'" (star (diff CHAR "'")) "'"))}, - %(IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20] | UCHAR)* '>') => - %{(terminal IRIREF (seq "<" (star (alt (diff (range "^<>\\\"{}|^`") (range "#x00-#x20")) UCHAR)) ">"))} - }.each do |input, expected| - it "given #{input.inspect} produces #{expected}" do - expect(ebnf(:ruleParts, input).to_sxp).to produce(expected, @debug) + "prolog": [ + %{Prolog ::= BaseDecl? PrefixDecl*}, + %{((rule Prolog (seq (opt BaseDecl) (star PrefixDecl))))} + ], + "aliteration": [ + %{declaration ::= '@terminals' | '@pass'}, + %{((rule declaration (alt "@terminals" "@pass")))}, + ], + "posfix": [ + %{postfix ::= primary ( [?*+] )?}, + %{((rule postfix (seq primary (opt (range "?*+")))))}, + ], + "diff": [ + %{STRING2 ::= "'" (CHAR - "'")* "'"}, + %{((terminal STRING2 (seq "'" (star (diff CHAR "'")) "'")))}, + ], + "IRIREF": [ + %(IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20] | UCHAR)* '>'), + %{((terminal IRIREF + (seq "<" + (star + (alt + (diff (range "^<>\\\"{}|^`") (range "#x00-#x20")) + UCHAR)) + ">")))}, + ], + }.each do |title, (input, expect)| + it title do + expect(parse(input).to_sxp).to produce(expect, logger) end end end end - + describe "#expression" do { - "'abc' def" => %{((seq "abc" def) "")}, - %{[0-9]} => %{((range "0-9") "")}, - %{#x00B7} => %{((hex "#x00B7") "")}, - %{[#x0300-#x036F]} => %{((range "#x0300-#x036F") "")}, - %{[^<>'{}|^`]-[#x00-#x20]} => %{((diff (range "^<>'{}|^`") (range "#x00-#x20")) "")}, - %{a b c} => %{((seq a b c) "")}, - %{a? b c} => %{((seq (opt a) b c) "")}, - %(a - b) => %{((diff a b) "")}, - %((a - b) - c) => %{((diff (diff a b) c) "")}, - %(a b? c) => %{((seq a (opt b) c) "")}, - %(a | b | c) => %{((alt a b c) "")}, - %(a? b+ c*) => %{((seq (opt a) (plus b) (star c)) "")}, - %( | x xlist) => %{((alt (seq ()) (seq x xlist)) "")}, - %(a | (b - c)) => %{((alt a (diff b c)) "")}, - %(a b | c d) => %{((alt (seq a b) (seq c d)) "")}, - %{a) b c} => %{(a " b c")}, - %(BaseDecl? PrefixDecl*) => %{((seq (opt BaseDecl) (star PrefixDecl)) "")}, - %(NCCHAR1 | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]) => - %{((alt NCCHAR1 "-" (range "0-9") (hex "#x00B7") (range "#x0300-#x036F") (range "#x203F-#x2040")) "")}, - %('<' ([^<>"{}|^`\]-[#x00-#x20] | UCHAR)* '>') => - %{((seq "<" (star (alt (diff (range "^<>\\\"{}|^`") (range "#x00-#x20")) UCHAR)) ">") "")} + "'abc' def" => %{(seq "abc" def)}, + %{[0-9]} => %{(range "0-9")}, + %{#x00B7} => %{(hex "#x00B7")}, + %{[#x0300-#x036F]} => %{(range "#x0300-#x036F")}, + %{[^<>'{}|^`]-[#x00-#x20]} => %{(diff (range "^<>'{}|^`") (range "#x00-#x20"))}, + %{a b c} => %{(seq a b c)}, + %{a? b c} => %{(seq (opt a) b c)}, + %{a - b} => %{(diff a b)}, + %{(a - b) - c} => %{(diff (diff a b) c)}, + %{a b? c} => %{(seq a (opt b) c)}, + %{a | b | c} => %{(alt a b c)}, + %{a? b+ c*} => %{(seq (opt a) (plus b) (star c))}, + %{foo | x xlist} => %{(alt foo (seq x xlist))}, + %{a | (b - c)} => %{(alt a (diff b c))}, + %{a b | c d} => %{(alt (seq a b) (seq c d))}, + %{BaseDecl? PrefixDecl*} => %{(seq (opt BaseDecl) (star PrefixDecl))}, + %{NCCHAR1 | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]} => + %{(alt NCCHAR1 "-" (range "0-9") (hex "#x00B7") (range "#x0300-#x036F") (range "#x203F-#x2040"))}, + %{'<' ([^<>"{}|^`\]-[#x00-#x20] | UCHAR)* '>'} => + %{(seq "<" (star (alt (diff (range "^<>\\\"{}|^`") (range "#x00-#x20")) UCHAR)) ">")} }.each do |input, expected| it "given #{input.inspect} produces #{expected}" do - expect(ebnf(:expression, input).to_sxp).to produce(expected, @debug) + rule = parse("rule ::= #{input}").ast.first + expect(rule.expr.to_sxp).to produce(expected, @debug) end end end - describe "#diff" do + context "illegal syntax" do { - %{'abc' def} => %{("abc" " def")}, - %{[0-9]} => %{((range "0-9") "")}, - %{#x00B7} => %{((hex "#x00B7") "")}, - %{[#x0300-#x036F]} => %{((range "#x0300-#x036F") "")}, - %{[^<>'{}|^`]-[#x00-#x20]} => %{((diff (range "^<>'{}|^`") (range "#x00-#x20")) "")}, - %{a b c} => %{(a " b c")}, - %{a? b c} => %{((opt a) " b c")}, - %{( [?*+] )?} => %{((opt (range "?*+")) "")}, - %(a - b) => %{((diff a b) "")}, - }.each do |input, expected| - it "given #{input.inspect} produces #{expected}" do - expect(ebnf(:diff, input).to_sxp).to produce(expected, @debug) + "illegal rule name": %{$rule.name ::= foo}, + "diff missing second operand": %{rule ::= a -}, + "unrecognized terminal" => %{rule ::= %foo%}, + "unopened paren" => %{rule ::= a) b c} + }.each do |title, input| + it title do + expect {parse(input)}.to raise_error(SyntaxError) end end end - describe "errors" do - { - %(a - '') => /diff missing second operand/, - %(%foo%) => /unrecognized terminal/, - }.each do |input, expected| - it "given #{input.inspect} raises #{expected}" do - expect do - expect {ebnf(:expression, input)}.to raise_error(SyntaxError, expected) - end.to write(:something).to(:error) - end - end + it "parses EBNF grammar" do + gram = parse(File.open(File.expand_path("../../etc/ebnf.ebnf", __FILE__))) + expect(gram).to be_valid end - def ebnf(method, value, **options) + def parse(input, **options) @debug = [] - options = {debug: @debug}.merge(options) - EBNF::Base.new("", **options).send(method, value) + EBNF.parse(input, debug: @debug, format: :ebnf, **options) end end diff --git a/spec/rule_spec.rb b/spec/rule_spec.rb index 792c6e5..e63beac 100644 --- a/spec/rule_spec.rb +++ b/spec/rule_spec.rb @@ -972,53 +972,51 @@ { "missing rule": [ "a ::= b", - "No rule found for b" + /In rule a: No rule found for b/ ], "illegal string": [ %{a ::= "\u{01}"}, - 'String must be of the form CHAR*' + /syntax error/ ], "empty range": [ "a ::= []", - /Range must be of form HEX\+ or R_CHAR\+/ + /syntax error/ ], "mixed enum char and hex": [ "a ::= [b#x20]", - %(Range must be of form HEX+ or R_CHAR+: was "b#x20") + %(In rule a: Range must be of form HEX+ or R_CHAR+: was "b#x20") ], "mixed enum char and hex (2)": [ "a ::= [#x20z]", - %(Range must be of form HEX+ or R_CHAR+: was "#x20z") + %(In rule a: Range must be of form HEX+ or R_CHAR+: was "#x20z") ], "mixed range char and hex": [ "a ::= [b-#x20]", - %(Range must be of form HEX-HEX or R_CHAR-R_CHAR: was "b-#x20") + /syntax error/ ], "mixed range char and hex (2)": [ "a ::= [#x20-b]", - %(Range must be of form HEX-HEX or R_CHAR-R_CHAR: was "#x20-b") + /syntax error/ ], "incomplete range": [ "a ::= [a-]", - %(Range must be of form HEX-HEX or R_CHAR-R_CHAR: was "a-") + /syntax error/ ], "incomplete range (2)": [ "a ::= [-b]", - %(Range must be of form HEX-HEX or R_CHAR-R_CHAR: was "-b") + /syntax error/ ], "extra range": [ "a ::= [a-b-c]", - %(Range must be of form HEX-HEX or R_CHAR-R_CHAR: was "a-b-c") + /syntax error/ ], "extra range (2)": [ "a ::= [a-zA-Z]", - %(Range must be of form HEX-HEX or R_CHAR-R_CHAR: was "a-zA-Z") + /syntax error/ ], }.each do |name, (rule, message)| it name do - gram = EBNF.parse(rule) - rule = gram.ast.first - expect {rule.validate!(gram.ast)}.to raise_error SyntaxError, message + expect {EBNF.parse(rule, validate: true)}.to raise_error SyntaxError, message end end From 18f54faa0c8f4f63a4534c6f8d6e41f13449821d Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Tue, 7 Jul 2020 16:19:36 -0700 Subject: [PATCH 29/50] Revert "Update RANGE and O_RANGE to allow just a single range, not multiple ranges in the same expression." This reverts commit 07cf933f85dc0d025aae39e03c8bda8fdec42b8b. --- etc/abnf.ebnf | 6 +- etc/ebnf.ebnf | 4 +- etc/ebnf.html | 4 +- etc/ebnf.ll1.sxp | 131 +++++++++++---------- etc/ebnf.peg.rb | 14 ++- etc/ebnf.peg.sxp | 40 ++++--- etc/ebnf.sxp | 5 +- etc/iso-ebnf.ebnf | 2 +- etc/iso-ebnf.isoebnf | 4 +- etc/sparql.sxp | 15 ++- etc/turtle.sxp | 47 ++++---- examples/abnf/README.md | 56 ++++----- examples/abnf/abnf.ebnf | 6 +- examples/abnf/abnf.peg.sxp | 15 +-- examples/abnf/abnf.sxp | 9 +- examples/abnf/doc/parser.html | 57 +++------ examples/abnf/meta.rb | 15 +-- examples/ebnf-ll1-parser/README.md | 9 +- examples/ebnf-peg-parser/README.md | 9 +- examples/ebnf-peg-parser/meta.rb | 18 +-- examples/isoebnf/README.md | 63 ++++------ examples/isoebnf/examples/iso-ebnf.isoebnf | 4 +- lib/ebnf/terminals.rb | 4 +- lib/ebnf/writer.rb | 66 ++++++++--- spec/parser_spec.rb | 7 +- spec/rule_spec.rb | 4 - 26 files changed, 311 insertions(+), 303 deletions(-) diff --git a/etc/abnf.ebnf b/etc/abnf.ebnf index c6e91e8..6e8d708 100644 --- a/etc/abnf.ebnf +++ b/etc/abnf.ebnf @@ -49,7 +49,7 @@ c_nl ::= COMMENT | CRLF comment ::= ";" (WSP | VCHAR)* CRLF -quoted_string::= DQUOTE ([#x20-#x21] | [#x23-#x7E])* DQUOTE +quoted_string::= DQUOTE [#x20-#x21#x23-#x7E]* DQUOTE # quoted string of SP and VCHAR # without DQUOTE @@ -64,14 +64,14 @@ dec_val ::= "d" DIGIT+ hex_val ::= "x" HEXDIG+ (("." HEXDIG+)+ | ("-" HEXDIG+))? -prose_val ::= "<" ([#x20-#x3D] | [#x3F-#x7E])* ">" +prose_val ::= "<" [#x20-#x3D#x3F-#x7E]* ">" # bracketed string of SP and VCHAR # without angles # prose description, to be used as # last resort # Core terminals available in uses of ABNF -ALPHA ::= [#x41-#x5A] | [#x61-#x7A] # A-Z | a-z +ALPHA ::= [#x41-#x5A#x61-#x7A] # A-Z | a-z BIT ::= '0' | '1' diff --git a/etc/ebnf.ebnf b/etc/ebnf.ebnf index 70baf33..19d2824 100644 --- a/etc/ebnf.ebnf +++ b/etc/ebnf.ebnf @@ -42,9 +42,9 @@ [15] O_ENUM ::= '[^' R_CHAR+ | HEX+ ']' - [16] RANGE ::= '[' ((R_CHAR '-' R_CHAR) | (HEX '-' HEX)) ']' + [16] RANGE ::= '[' ((R_CHAR '-' R_CHAR) | (HEX '-' HEX))+ ']' - [17] O_RANGE ::= '[^' ((R_CHAR '-' R_CHAR) | (HEX '-' HEX)) ']' + [17] O_RANGE ::= '[^' ((R_CHAR '-' R_CHAR) | (HEX '-' HEX))+ ']' # Strings are unescaped Unicode, excepting control characters and hash (#) [18] STRING1 ::= '"' (CHAR - '"')* '"' diff --git a/etc/ebnf.html b/etc/ebnf.html index 7ff0734..1bf3054 100644 --- a/etc/ebnf.html +++ b/etc/ebnf.html @@ -133,7 +133,7 @@ RANGE ::= -"[" ((R_CHAR "-" R_CHAR) | (HEX "-" HEX)) "]" +"[" ((R_CHAR "-" R_CHAR) | (HEX "-" HEX))+ "]" @@ -141,7 +141,7 @@ O_RANGE ::= -"[^" ((R_CHAR "-" R_CHAR) | (HEX "-" HEX)) "]" +"[^" ((R_CHAR "-" R_CHAR) | (HEX "-" HEX))+ "]" diff --git a/etc/ebnf.ll1.sxp b/etc/ebnf.ll1.sxp index d0fa4ef..dcbaed2 100644 --- a/etc/ebnf.ll1.sxp +++ b/etc/ebnf.ll1.sxp @@ -6,25 +6,20 @@ (follow _eof) (cleanup star) (alt _empty _ebnf_2)) - (rule _ebnf_1 "1.1" - (first "@pass" "@terminals" LHS) - (follow "@pass" "@terminals" LHS _eof) - (alt declaration rule)) (rule _ebnf_2 "1.2" (first "@pass" "@terminals" LHS) (follow _eof) (cleanup merge) (seq _ebnf_1 ebnf)) - (rule _ebnf_3 "1.3" (first "@pass" "@terminals" LHS _eps) (follow _eof) (seq ebnf)) + (rule _ebnf_1 "1.1" + (first "@pass" "@terminals" LHS) + (follow "@pass" "@terminals" LHS _eof) + (alt declaration rule)) (rule declaration "2" (first "@pass" "@terminals") (follow "@pass" "@terminals" LHS _eof) (alt "@terminals" pass)) (rule rule "3" (first LHS) (follow "@pass" "@terminals" LHS _eof) (seq LHS expression)) - (rule _rule_1 "3.1" - (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL) - (follow "@pass" "@terminals" LHS _eof) - (seq expression)) (rule expression "4" (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL) (follow ")" "@pass" "@terminals" LHS _eof) @@ -38,28 +33,15 @@ (follow ")" "@pass" "@terminals" LHS _eof) (cleanup star) (alt _empty _alt_3)) - (rule _alt_2 "5.2" - (first "|") - (follow ")" "@pass" "@terminals" LHS _eof "|") - (seq "|" seq)) (rule _alt_3 "5.3" (first "|") (follow ")" "@pass" "@terminals" LHS _eof) (cleanup merge) (seq _alt_2 _alt_1)) - (rule _alt_4 "5.4" - (first _eps "|") - (follow ")" "@pass" "@terminals" LHS _eof) - (seq _alt_1)) - (rule _alt_5 "5.5" - (first _eps "|") - (follow ")" "@pass" "@terminals" LHS _eof) - (seq _alt_1)) - (rule _alt_6 "5.6" - (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL) + (rule _alt_2 "5.2" + (first "|") (follow ")" "@pass" "@terminals" LHS _eof "|") - (seq seq)) - (pass _pass (seq PASS)) + (seq "|" seq)) (rule seq "6" (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL) (follow ")" "@pass" "@terminals" LHS _eof "|") @@ -75,14 +57,6 @@ (follow ")" "@pass" "@terminals" LHS _eof "|") (cleanup merge) (seq diff _seq_1)) - (rule _seq_3 "6.3" - (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL _eps) - (follow ")" "@pass" "@terminals" LHS _eof "|") - (seq _seq_1)) - (rule _seq_4 "6.4" - (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL _eps) - (follow ")" "@pass" "@terminals" LHS _eof "|") - (seq _seq_1)) (rule diff "7" (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL) (follow "(" ")" "@pass" "@terminals" ENUM HEX LHS O_ENUM O_RANGE RANGE @@ -99,16 +73,6 @@ (follow "(" ")" "@pass" "@terminals" ENUM HEX LHS O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL _eof "|" ) (seq "-" postfix)) - (rule _diff_3 "7.3" - (first "-" _eps) - (follow "(" ")" "@pass" "@terminals" ENUM HEX LHS O_ENUM O_RANGE RANGE - STRING1 STRING2 SYMBOL _eof "|" ) - (seq _diff_1)) - (rule _diff_4 "7.4" - (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL) - (follow "(" ")" "@pass" "@terminals" ENUM HEX LHS O_ENUM O_RANGE RANGE - STRING1 STRING2 SYMBOL _eof "|" ) - (seq postfix)) (rule postfix "8" (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL) (follow "(" ")" "-" "@pass" "@terminals" ENUM HEX LHS O_ENUM O_RANGE RANGE @@ -120,11 +84,6 @@ STRING1 STRING2 SYMBOL _eof "|" ) (cleanup opt) (alt _empty POSTFIX)) - (rule _postfix_2 "8.2" - (first POSTFIX _eps) - (follow "(" ")" "-" "@pass" "@terminals" ENUM HEX LHS O_ENUM O_RANGE RANGE - STRING1 STRING2 SYMBOL _eof "|" ) - (seq _postfix_1)) (rule primary "9" (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL) (follow "(" ")" "-" "@pass" "@terminals" ENUM HEX LHS O_ENUM O_RANGE POSTFIX @@ -135,31 +94,18 @@ (follow "(" ")" "-" "@pass" "@terminals" ENUM HEX LHS O_ENUM O_RANGE POSTFIX RANGE STRING1 STRING2 SYMBOL _eof "|" ) (seq "(" expression ")")) - (rule _primary_2 "9.2" - (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL) - (follow "(" ")" "-" "@pass" "@terminals" ENUM HEX LHS O_ENUM O_RANGE POSTFIX - RANGE STRING1 STRING2 SYMBOL _eof "|" ) - (seq expression ")")) - (rule _primary_3 "9.3" - (first ")") - (follow "(" ")" "-" "@pass" "@terminals" ENUM HEX LHS O_ENUM O_RANGE POSTFIX - RANGE STRING1 STRING2 SYMBOL _eof "|" ) - (seq ")")) (rule pass "10" (first "@pass") (follow "@pass" "@terminals" LHS _eof) (seq "@pass" expression)) - (rule _pass_1 "10.1" - (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL) - (follow "@pass" "@terminals" LHS _eof) - (seq expression)) (terminal LHS "11" (seq (opt (seq "[" SYMBOL "]" (plus " "))) SYMBOL (star " ") "::=")) (terminal SYMBOL "12" (plus (alt (range "a-z") (range "A-Z") (range "0-9") "_" "."))) (terminal HEX "13" (seq "#x" (plus (alt (range "a-f") (range "A-F") (range "0-9"))))) (terminal ENUM "14" (diff (alt (seq "[" (plus R_CHAR)) (seq (plus HEX) "]")) LHS)) (terminal O_ENUM "15" (alt (seq "[^" (plus R_CHAR)) (seq (plus HEX) "]"))) - (terminal RANGE "16" (seq "[" (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX)) "]")) - (terminal O_RANGE "17" (seq "[^" (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX)) "]")) + (terminal RANGE "16" (seq "[" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) + (terminal O_RANGE "17" + (seq "[^" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) (terminal STRING1 "18" (seq "\"" (star (diff CHAR "\"")) "\"")) (terminal STRING2 "19" (seq "'" (star (diff CHAR "'")) "'")) (terminal CHAR "20" @@ -176,4 +122,59 @@ (range "#x9#xA#xD#x20") (seq (alt (diff "#" "#x") "//") (star (range "^#xA#xD"))) (seq "/*" (star (alt (opt (seq "*" (range "^/"))) (range "^*"))) "*/") - (seq "(*" (star (alt (opt (seq "*" (range "^)"))) (range "^*"))) "*)")) )) ) + (seq "(*" (star (alt (opt (seq "*" (range "^)"))) (range "^*"))) "*)")) )) + (pass _pass (seq PASS)) + (rule _ebnf_3 "1.3" (first "@pass" "@terminals" LHS _eps) (follow _eof) (seq ebnf)) + (rule _rule_1 "3.1" + (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL) + (follow "@pass" "@terminals" LHS _eof) + (seq expression)) + (rule _alt_4 "5.4" + (first _eps "|") + (follow ")" "@pass" "@terminals" LHS _eof) + (seq _alt_1)) + (rule _alt_5 "5.5" + (first _eps "|") + (follow ")" "@pass" "@terminals" LHS _eof) + (seq _alt_1)) + (rule _alt_6 "5.6" + (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL) + (follow ")" "@pass" "@terminals" LHS _eof "|") + (seq seq)) + (rule _seq_3 "6.3" + (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL _eps) + (follow ")" "@pass" "@terminals" LHS _eof "|") + (seq _seq_1)) + (rule _seq_4 "6.4" + (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL _eps) + (follow ")" "@pass" "@terminals" LHS _eof "|") + (seq _seq_1)) + (rule _diff_3 "7.3" + (first "-" _eps) + (follow "(" ")" "@pass" "@terminals" ENUM HEX LHS O_ENUM O_RANGE RANGE + STRING1 STRING2 SYMBOL _eof "|" ) + (seq _diff_1)) + (rule _diff_4 "7.4" + (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL) + (follow "(" ")" "@pass" "@terminals" ENUM HEX LHS O_ENUM O_RANGE RANGE + STRING1 STRING2 SYMBOL _eof "|" ) + (seq postfix)) + (rule _postfix_2 "8.2" + (first POSTFIX _eps) + (follow "(" ")" "-" "@pass" "@terminals" ENUM HEX LHS O_ENUM O_RANGE RANGE + STRING1 STRING2 SYMBOL _eof "|" ) + (seq _postfix_1)) + (rule _primary_2 "9.2" + (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL) + (follow "(" ")" "-" "@pass" "@terminals" ENUM HEX LHS O_ENUM O_RANGE POSTFIX + RANGE STRING1 STRING2 SYMBOL _eof "|" ) + (seq expression ")")) + (rule _pass_1 "10.1" + (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL) + (follow "@pass" "@terminals" LHS _eof) + (seq expression)) + (rule _primary_3 "9.3" + (first ")") + (follow "(" ")" "-" "@pass" "@terminals" ENUM HEX LHS O_ENUM O_RANGE POSTFIX + RANGE STRING1 STRING2 SYMBOL _eof "|" ) + (seq ")")) ) diff --git a/etc/ebnf.peg.rb b/etc/ebnf.peg.rb index 0c8cda8..ecf7edc 100644 --- a/etc/ebnf.peg.rb +++ b/etc/ebnf.peg.rb @@ -47,13 +47,15 @@ module Meta EBNF::Rule.new(:_O_ENUM_2, "15.2", [:seq, :_O_ENUM_4, "]"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_O_ENUM_4, "15.4", [:plus, :HEX], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:RANGE, "16", [:seq, "[", :_RANGE_1, "]"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_RANGE_1, "16.1", [:alt, :_RANGE_2, :_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_RANGE_2, "16.2", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_RANGE_3, "16.3", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_1, "16.1", [:plus, :_RANGE_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_2, "16.2", [:alt, :_RANGE_3, :_RANGE_4], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_3, "16.3", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_4, "16.4", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:O_RANGE, "17", [:seq, "[^", :_O_RANGE_1, "]"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_RANGE_1, "17.1", [:alt, :_O_RANGE_2, :_O_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_RANGE_2, "17.2", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_RANGE_3, "17.3", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_1, "17.1", [:plus, :_O_RANGE_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_2, "17.2", [:alt, :_O_RANGE_3, :_O_RANGE_4], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_3, "17.3", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_4, "17.4", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:STRING1, "18", [:seq, "\"", :_STRING1_1, "\""], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_STRING1_1, "18.1", [:star, :_STRING1_2], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_STRING1_2, "18.2", [:diff, :CHAR, "\""], kind: :terminal).extend(EBNF::PEG::Rule), diff --git a/etc/ebnf.peg.sxp b/etc/ebnf.peg.sxp index 87df021..6e16b19 100644 --- a/etc/ebnf.peg.sxp +++ b/etc/ebnf.peg.sxp @@ -19,9 +19,9 @@ (rule pass "10" (seq "@pass" expression)) (terminal LHS "11" (seq _LHS_1 SYMBOL _LHS_2 "::=")) (terminal _LHS_1 "11.1" (opt _LHS_3)) - (terminal _LHS_2 "11.2" (star " ")) (terminal _LHS_3 "11.3" (seq "[" SYMBOL "]" _LHS_4)) (terminal _LHS_4 "11.4" (plus " ")) + (terminal _LHS_2 "11.2" (star " ")) (terminal SYMBOL "12" (plus _SYMBOL_1)) (terminal _SYMBOL_1 "12.1" (alt _SYMBOL_2 _SYMBOL_3 _SYMBOL_4 "_" ".")) (terminal _SYMBOL_2 "12.2" (range "a-z")) @@ -36,22 +36,24 @@ (terminal ENUM "14" (diff _ENUM_1 LHS)) (terminal _ENUM_1 "14.1" (alt _ENUM_2 _ENUM_3)) (terminal _ENUM_2 "14.2" (seq "[" _ENUM_4)) - (terminal _ENUM_3 "14.3" (seq _ENUM_5 "]")) (terminal _ENUM_4 "14.4" (plus R_CHAR)) + (terminal _ENUM_3 "14.3" (seq _ENUM_5 "]")) (terminal _ENUM_5 "14.5" (plus HEX)) (terminal O_ENUM "15" (alt _O_ENUM_1 _O_ENUM_2)) (terminal _O_ENUM_1 "15.1" (seq "[^" _O_ENUM_3)) - (terminal _O_ENUM_2 "15.2" (seq _O_ENUM_4 "]")) (terminal _O_ENUM_3 "15.3" (plus R_CHAR)) + (terminal _O_ENUM_2 "15.2" (seq _O_ENUM_4 "]")) (terminal _O_ENUM_4 "15.4" (plus HEX)) (terminal RANGE "16" (seq "[" _RANGE_1 "]")) - (terminal _RANGE_1 "16.1" (alt _RANGE_2 _RANGE_3)) - (terminal _RANGE_2 "16.2" (seq R_CHAR "-" R_CHAR)) - (terminal _RANGE_3 "16.3" (seq HEX "-" HEX)) + (terminal _RANGE_1 "16.1" (plus _RANGE_2)) + (terminal _RANGE_2 "16.2" (alt _RANGE_3 _RANGE_4)) + (terminal _RANGE_3 "16.3" (seq R_CHAR "-" R_CHAR)) + (terminal _RANGE_4 "16.4" (seq HEX "-" HEX)) (terminal O_RANGE "17" (seq "[^" _O_RANGE_1 "]")) - (terminal _O_RANGE_1 "17.1" (alt _O_RANGE_2 _O_RANGE_3)) - (terminal _O_RANGE_2 "17.2" (seq R_CHAR "-" R_CHAR)) - (terminal _O_RANGE_3 "17.3" (seq HEX "-" HEX)) + (terminal _O_RANGE_1 "17.1" (plus _O_RANGE_2)) + (terminal _O_RANGE_2 "17.2" (alt _O_RANGE_3 _O_RANGE_4)) + (terminal _O_RANGE_3 "17.3" (seq R_CHAR "-" R_CHAR)) + (terminal _O_RANGE_4 "17.4" (seq HEX "-" HEX)) (terminal STRING1 "18" (seq "\"" _STRING1_1 "\"")) (terminal _STRING1_1 "18.1" (star _STRING1_2)) (terminal _STRING1_2 "18.2" (diff CHAR "\"")) @@ -68,24 +70,24 @@ (terminal POSTFIX "22" (range "?*+")) (terminal PASS "23" (plus _PASS_1)) (terminal _PASS_1 "23.1" (alt _PASS_2 _PASS_3 _PASS_4 _PASS_5)) + (terminal _PASS_2 "23.2" (range "#x9#xA#xD#x20")) + (terminal _PASS_3 "23.3" (seq _PASS_6 _PASS_7)) + (terminal _PASS_6 "23.6" (alt _PASS_8 "//")) + (terminal _PASS_8 "23.8" (diff "#" "#x")) + (terminal _PASS_7 "23.7" (star _PASS_9)) + (terminal _PASS_9 "23.9" (range "^#xA#xD")) + (terminal _PASS_4 "23.4" (seq "/*" _PASS_10 "*/")) (terminal _PASS_10 "23.10" (star _PASS_11)) (terminal _PASS_11 "23.11" (alt _PASS_12 _PASS_13)) (terminal _PASS_12 "23.12" (opt _PASS_14)) - (terminal _PASS_13 "23.13" (range "^*")) (terminal _PASS_14 "23.14" (seq "*" _PASS_15)) (terminal _PASS_15 "23.15" (range "^/")) + (terminal _PASS_13 "23.13" (range "^*")) + (terminal _PASS_5 "23.5" (seq "(*" _PASS_16 "*)")) (terminal _PASS_16 "23.16" (star _PASS_17)) (terminal _PASS_17 "23.17" (alt _PASS_18 _PASS_19)) (terminal _PASS_18 "23.18" (opt _PASS_20)) - (terminal _PASS_19 "23.19" (range "^*")) (terminal _PASS_20 "23.20" (seq "*" _PASS_21)) - (terminal _PASS_2 "23.2" (range "#x9#xA#xD#x20")) (terminal _PASS_21 "23.21" (range "^)")) - (terminal _PASS_3 "23.3" (seq _PASS_6 _PASS_7)) - (terminal _PASS_4 "23.4" (seq "/*" _PASS_10 "*/")) - (terminal _PASS_5 "23.5" (seq "(*" _PASS_16 "*)")) - (terminal _PASS_6 "23.6" (alt _PASS_8 "//")) - (terminal _PASS_7 "23.7" (star _PASS_9)) - (terminal _PASS_8 "23.8" (diff "#" "#x")) - (terminal _PASS_9 "23.9" (range "^#xA#xD")) + (terminal _PASS_19 "23.19" (range "^*")) (pass _pass (seq PASS))) diff --git a/etc/ebnf.sxp b/etc/ebnf.sxp index fb20766..a2be331 100644 --- a/etc/ebnf.sxp +++ b/etc/ebnf.sxp @@ -15,8 +15,9 @@ (terminal HEX "13" (seq "#x" (plus (alt (range "a-f") (range "A-F") (range "0-9"))))) (terminal ENUM "14" (diff (alt (seq "[" (plus R_CHAR)) (seq (plus HEX) "]")) LHS)) (terminal O_ENUM "15" (alt (seq "[^" (plus R_CHAR)) (seq (plus HEX) "]"))) - (terminal RANGE "16" (seq "[" (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX)) "]")) - (terminal O_RANGE "17" (seq "[^" (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX)) "]")) + (terminal RANGE "16" (seq "[" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) + (terminal O_RANGE "17" + (seq "[^" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) (terminal STRING1 "18" (seq "\"" (star (diff CHAR "\"")) "\"")) (terminal STRING2 "19" (seq "'" (star (diff CHAR "'")) "'")) (terminal CHAR "20" diff --git a/etc/iso-ebnf.ebnf b/etc/iso-ebnf.ebnf index 5d37f91..05d6481 100644 --- a/etc/iso-ebnf.ebnf +++ b/etc/iso-ebnf.ebnf @@ -69,7 +69,7 @@ comment ::= start_comment_symbol comment_symbol* end_comment comment_symbol ::= comment | terminal_string | special_sequence | character -letter ::= [a-z] | [A-Z] +letter ::= [a-zA-Z] decimal_digit ::= [0-9] # Extended to allow '_' diff --git a/etc/iso-ebnf.isoebnf b/etc/iso-ebnf.isoebnf index 8bcda08..90084f1 100644 --- a/etc/iso-ebnf.isoebnf +++ b/etc/iso-ebnf.isoebnf @@ -47,7 +47,7 @@ grouped_sequence = '(', definitions_list, ')' terminal_string = ("'", first_terminal_character, {first_terminal_character}, "'") | ('"', second_terminal_character, {second_terminal_character}, '"') (* A represents the - between the quote symbols '_' or "_" *); + between the quote symbols ’_’ or "_" *); meta_identifier = letter, {meta_identifier_character} (* A is the name of a syntactic element of the language being defined *); @@ -57,7 +57,7 @@ integer = decimal_digit, {decimal_digit} ; special_sequence = '?', {special_sequence_character}, '?' (* The meaning of a is not defined in the standard metalanguage. *); -comment = '(*', {comment_symbol}, '*)' +comment = ’(*’, {comment_symbol}, ’*)’ (* A comment is allowed anywhere outside a , , or *); diff --git a/etc/sparql.sxp b/etc/sparql.sxp index ff3a130..19622d5 100644 --- a/etc/sparql.sxp +++ b/etc/sparql.sxp @@ -282,7 +282,8 @@ (rule iri "136" (alt IRIREF PrefixedName)) (rule PrefixedName "137" (alt PNAME_LN PNAME_NS)) (rule BlankNode "138" (alt BLANK_NODE_LABEL ANON)) - (terminal IRIREF "139" (seq "<" (star (range "^<>\"{}|^`]-[#x00-#x20")) ">")) + (terminal IRIREF "139" + (seq "<" (star (diff (range "^<>\"{}|^`\\") (range "#x00-#x20"))) ">")) (terminal PNAME_NS "140" (seq (opt PN_PREFIX) ":")) (terminal PNAME_LN "141" (seq PNAME_NS PN_LOCAL)) (terminal BLANK_NODE_LABEL "142" @@ -290,7 +291,9 @@ (terminal VAR1 "143" (seq "?" VARNAME)) (terminal VAR2 "144" (seq "$" VARNAME)) (terminal LANGTAG "145" - (seq "@" (plus (range "a-zA-Z")) (star (seq "-" (plus (range "a-zA-Z0-9")))))) + (seq "@" + (plus (alt (range "a-z") (range "A-Z"))) + (star (seq "-" (plus (alt (range "a-z") (range "A-Z") (range "0-9")))))) ) (terminal INTEGER "146" (plus (range "0-9"))) (terminal DECIMAL "147" (seq (star (range "0-9")) "." (plus (range "0-9")))) (terminal DOUBLE "148" @@ -304,16 +307,16 @@ (terminal INTEGER_NEGATIVE "152" (seq "-" INTEGER)) (terminal DECIMAL_NEGATIVE "153" (seq "-" DECIMAL)) (terminal DOUBLE_NEGATIVE "154" (seq "-" DOUBLE)) - (terminal EXPONENT "155" (seq (range "eE") (opt (range "+-")) (plus (range "0-9")))) + (terminal EXPONENT "155" (seq (range "eE") (opt (range "#x2b#x2d")) (plus (range "0-9")))) (terminal STRING_LITERAL1 "156" (seq "'" (star (alt (range "^#x27#x5C#xA#xD") ECHAR)) "'")) (terminal STRING_LITERAL2 "157" (seq "\"" (star (alt (range "^#x22#x5C#xA#xD") ECHAR)) "\"")) (terminal STRING_LITERAL_LONG1 "158" - (seq "'''" (seq (opt (alt "'" "''")) (range "^'] | ECHAR ))* \"'''\"")))) + (seq "'''" (star (seq (opt (alt "'" "''")) (alt (range "^'\\") ECHAR))) "'''")) (terminal STRING_LITERAL_LONG2 "159" - (seq "\"\"\"" (seq (opt (alt "\"" "\"\"")) (range "^\"] | ECHAR ))* '\"\"\"'")))) - (terminal ECHAR "160" (seq "\\" (range "tbnrf\"'"))) + (seq "\"\"\"" (star (seq (opt (alt "\"" "\"\"")) (alt (range "^\"\\") ECHAR))) "\"\"\"")) + (terminal ECHAR "160" (seq "\\" (range "tbnrf\\\"'"))) (terminal NIL "161" (seq "(" (star WS) ")")) (terminal WS "162" (alt (hex "#x20") (hex "#x9") (hex "#xD") (hex "#xA"))) (terminal ANON "163" (seq "[" (star WS) "]")) diff --git a/etc/turtle.sxp b/etc/turtle.sxp index 8fe9099..b6309fd 100644 --- a/etc/turtle.sxp +++ b/etc/turtle.sxp @@ -4,6 +4,8 @@ (rule directive "3" (alt prefixID base sparqlPrefix sparqlBase)) (rule prefixID "4" (seq "@prefix" PNAME_NS IRIREF ".")) (rule base "5" (seq "@base" IRIREF ".")) + (rule sparqlPrefix "28s" (seq SPARQL_PREFIX PNAME_NS IRIREF)) + (rule sparqlBase "29s" (seq SPARQL_BASE IRIREF)) (rule triples "6" (alt (seq subject predicateObjectList) @@ -19,48 +21,49 @@ (rule blankNodePropertyList "14" (seq "[" predicateObjectList "]")) (rule collection "15" (seq "(" (star object) ")")) (rule NumericLiteral "16" (alt INTEGER DECIMAL DOUBLE)) + (rule RDFLiteral "128s" (seq String (opt (alt LANGTAG (seq "^^" iri))))) + (rule BooleanLiteral "133s" (alt "true" "false")) (rule String "17" (alt STRING_LITERAL_QUOTE STRING_LITERAL_SINGLE_QUOTE STRING_LITERAL_LONG_SINGLE_QUOTE STRING_LITERAL_LONG_QUOTE )) - (terminal IRIREF "18" (seq "<" (star (alt (range "^<>\"{}|^`]-[#x00-#x20") UCHAR)) ">")) - (terminal INTEGER "19" (seq (opt (range "+-")) (plus (range "0-9")))) + (rule iri "135s" (alt IRIREF PrefixedName)) + (rule PrefixedName "136s" (alt PNAME_LN PNAME_NS)) + (rule BlankNode "137s" (alt BLANK_NODE_LABEL ANON)) + (terminal IRIREF "18" + (seq "<" (star (alt (diff (range "^<>\"{}|^`\\") (range "#x00-#x20")) UCHAR)) ">")) + (terminal PNAME_NS "139s" (seq (opt PN_PREFIX) ":")) + (terminal PNAME_LN "140s" (seq PNAME_NS PN_LOCAL)) + (terminal BLANK_NODE_LABEL "141s" + (seq "_:" (alt PN_CHARS_U (range "0-9")) (opt (seq (star (alt PN_CHARS ".")) PN_CHARS)))) + (terminal LANGTAG "144s" + (seq "@" + (plus (alt (range "a-z") (range "A-Z"))) + (star (seq "-" (plus (alt (range "a-z") (range "A-Z") (range "0-9")))))) ) + (terminal INTEGER "19" (seq (opt (range "#x2b#x2d")) (plus (range "0-9")))) (terminal DECIMAL "20" - (seq (opt (range "+-")) (seq (star (range "0-9")) "." (plus (range "0-9"))))) + (seq (opt (range "#x2b#x2d")) (seq (star (range "0-9")) "." (plus (range "0-9"))))) (terminal DOUBLE "21" (seq - (opt (range "+-")) + (opt (range "#x2b#x2d")) (alt (seq (plus (range "0-9")) "." (star (range "0-9")) EXPONENT) (seq "." (plus (range "0-9")) EXPONENT) (seq (plus (range "0-9")) EXPONENT)) )) + (terminal EXPONENT "154s" (seq (range "eE") (opt (range "#x2b#x2d")) (plus (range "0-9")))) (terminal STRING_LITERAL_QUOTE "22" (seq "\"" (star (alt (range "^#x22#x5C#xA#xD") ECHAR UCHAR)) "\"")) (terminal STRING_LITERAL_SINGLE_QUOTE "23" (seq "'" (star (alt (range "^#x27#x5C#xA#xD") ECHAR UCHAR)) "'")) (terminal STRING_LITERAL_LONG_SINGLE_QUOTE "24" - (seq "'''" (seq (opt (alt "'" "''")) (range "^'] | ECHAR | UCHAR ))* \"'''\"")))) + (seq "'''" (star (seq (opt (alt "'" "''")) (alt (range "^'\\") ECHAR UCHAR))) "'''")) (terminal STRING_LITERAL_LONG_QUOTE "25" - (seq "\"\"\"" (seq (opt (alt "\"" "\"\"")) (range "^\"] | ECHAR | UCHAR ))* '\"\"\"'")))) + (seq "\"\"\"" (star (seq (opt (alt "\"" "\"\"")) (alt (range "^\"\\") ECHAR UCHAR))) "\"\"\"")) (terminal UCHAR "26" - (alt (seq "u" HEX HEX HEX HEX) (seq "U" HEX HEX HEX HEX HEX HEX HEX HEX))) - (rule sparqlPrefix "28s" (seq SPARQL_PREFIX PNAME_NS IRIREF)) + (alt (seq "\\u" HEX HEX HEX HEX) (seq "\\U" HEX HEX HEX HEX HEX HEX HEX HEX))) + (terminal ECHAR "159s" (seq "\\" (range "tbnrf\\\"'"))) (terminal SPARQL_PREFIX "28t" (seq (range "Pp") (range "Rr") (range "Ee") (range "Ff") (range "Ii") (range "Xx"))) (terminal SPARQL_BASE "29t" (seq (range "Bb") (range "Aa") (range "Ss") (range "Ee"))) - (rule sparqlBase "29s" (seq SPARQL_BASE IRIREF)) - (rule RDFLiteral "128s" (seq String (opt (alt LANGTAG (seq "^^" iri))))) - (rule BooleanLiteral "133s" (alt "true" "false")) - (rule iri "135s" (alt IRIREF PrefixedName)) - (rule PrefixedName "136s" (alt PNAME_LN PNAME_NS)) - (rule BlankNode "137s" (alt BLANK_NODE_LABEL ANON)) - (terminal PNAME_NS "139s" (seq (opt PN_PREFIX) ":")) - (terminal PNAME_LN "140s" (seq PNAME_NS PN_LOCAL)) - (terminal BLANK_NODE_LABEL "141s" - (seq "_:" (alt PN_CHARS_U (range "0-9")) (opt (seq (star (alt PN_CHARS ".")) PN_CHARS)))) - (terminal LANGTAG "144s" - (seq "@" (plus (range "a-zA-Z")) (star (seq "-" (plus (range "a-zA-Z0-9")))))) - (terminal EXPONENT "154s" (seq (range "eE") (opt (range "+-")) (plus (range "0-9")))) - (terminal ECHAR "159s" (seq "\\" (range "tbnrf\"'"))) (terminal WS "161s" (alt (hex "#x20") (hex "#x9") (hex "#xD") (hex "#xA"))) (terminal ANON "162s" (seq "[" (star WS) "]")) (terminal PN_CHARS_BASE "163s" diff --git a/examples/abnf/README.md b/examples/abnf/README.md index 5faa0d4..f6d7a43 100644 --- a/examples/abnf/README.md +++ b/examples/abnf/README.md @@ -15,49 +15,49 @@ Output rules and terminals as [S-Expression][S-Expression]: This generates a [S-Expression][] form of the grammar suitable for use by {EBNF}. ( - (rule rulelist (plus (alt rule (seq (star c_wsp) c_nl)))) - (rule rule (seq rulename defined_as elements c_nl)) - (rule elements (seq alternation (star c_wsp))) + (rule rulelist (plus (alt rule (seq (star c-wsp) c-nl)))) + (rule rule (seq rulename defined-as elements c-nl)) + (rule rulename (seq ALPHA (star (alt ALPHA DIGIT "-")))) + (rule defined-as (seq (star c-wsp) (alt "=" "=/") (star c-wsp))) + (rule elements (seq alternation (star c-wsp))) + (rule c-wsp (alt WSP (seq c-nl WSP))) + (rule c-nl (alt comment CRLF)) + (rule comment (seq ";" (star (alt WSP VCHAR)) CRLF)) (rule alternation - (seq concatenation (star (seq (star c_wsp) "/" (star c_wsp) concatenation)))) - (rule concatenation (seq repetition (star (seq (plus c_wsp) repetition)))) + (seq concatenation (star (seq (star c-wsp) "/" (star c-wsp) concatenation)))) + (rule concatenation (seq repetition (star (seq (plus c-wsp) repetition)))) (rule repetition (seq (opt repeat) element)) (rule repeat (alt (seq (star DIGIT) "*" (star DIGIT)) (plus DIGIT))) - (rule element (alt rulename group option char_val num_val prose_val)) - (rule group (seq "(" (star c_wsp) alternation (star c_wsp) ")")) - (rule option (seq "[" (star c_wsp) alternation (star c_wsp) "]")) - (rule char_val (alt case_insensitive_string case_sensitive_string)) - (rule case_insensitive_string (seq (opt "%i") quoted_string)) - (rule case_sensitive_string (seq "%s" quoted_string)) - (rule num_val (seq "%" (alt bin_val dec_val hex_val))) - (terminal rulename (seq ALPHA (star (alt ALPHA DIGIT "-")))) - (terminal defined_as (seq (star c_wsp) (alt "=" "=/") (star c_wsp))) - (terminal c_wsp (alt WSP (seq c_nl WSP))) - (terminal c_nl (alt COMMENT CRLF)) - (terminal comment (seq ";" (star (alt WSP VCHAR)) CRLF)) - (terminal quoted_string - (seq DQUOTE (star (alt (range "#x20-#x21") (range "#x23-#x7E"))) DQUOTE)) - (terminal bin_val (seq "b" (plus BIT) (opt (alt (plus (seq "." (plus BIT))) (seq "-" (plus BIT)))))) - (terminal dec_val + (rule element (alt rulename group option char-val num-val prose-val)) + (rule group (seq "(" (star c-wsp) alternation (star c-wsp) ")")) + (rule option (seq "[" (star c-wsp) alternation (star c-wsp) "]")) + (rule char-val (alt case-insensitive-string case-sensitive-string)) + (rule case-insensitive-string (seq (opt "%i") quoted-string)) + (rule case-sensitive-string (seq "%s" quoted-string)) + (rule quoted-string + (seq DQUOTE (star (alt (range "#x20-#x21") (range "#x23-#x7e"))) DQUOTE)) + (rule num-val (seq "%" (alt bin-val dec-val hex-val))) + (rule bin-val (seq "b" (plus BIT) (opt (alt (plus (seq "." (plus BIT))) (seq "-" (plus BIT)))))) + (rule dec-val (seq "d" (plus DIGIT) (opt (alt (plus (seq "." (plus DIGIT))) (seq "-" (plus DIGIT)))))) - (terminal hex_val + (rule hex-val (seq "x" (plus HEXDIG) (opt (alt (plus (seq "." (plus HEXDIG))) (seq "-" (plus HEXDIG)))))) - (terminal prose_val (seq "<" (star (alt (range "#x20-#x3D") (range "#x3F-#x7E"))) ">")) - (terminal ALPHA (alt (range "#x41-#x5A") (range "#x61-#x7A"))) + (rule prose-val (seq "<" (star (alt (range "#x20-#x3d") (range "#x3f-#x7e"))) ">")) + (terminal ALPHA (alt (range "#x41-#x5a") (range "#x61-#x7a"))) (terminal BIT (alt "0" "1")) - (terminal CHAR (range "#x01-#x7F")) + (terminal CHAR (range "#x1-#x7f")) (terminal CR (hex "#x0D")) (terminal CRLF (seq (opt CR) LF)) - (terminal CTL (alt (range "#x00-#x1F") (hex "#x7F"))) + (terminal CTL (alt (range "#x0-#x1f") (hex "#x7F"))) (terminal DIGIT (range "#x30-#x39")) (terminal DQUOTE (hex "#x22")) (terminal HEXDIG (alt DIGIT "A" "B" "C" "D" "E" "F")) (terminal HTAB (hex "#x09")) (terminal LF (hex "#x0A")) (terminal LWSP (star (alt WSP (seq CRLF WSP)))) - (terminal OCTET (range "#x00-#xFF")) + (terminal OCTET (range "#x0-#xff")) (terminal SP (hex "#x20")) - (terminal VCHAR (range "#x21-#x7E")) + (terminal VCHAR (range "#x21-#x7e")) (terminal WSP (alt SP HTAB))) This can then be used as input to {EBNF.parse} to transform ABNF to PEG for parsing examples of the grammar using {EBNF::PEG::Parser}. diff --git a/examples/abnf/abnf.ebnf b/examples/abnf/abnf.ebnf index c6e91e8..6e8d708 100644 --- a/examples/abnf/abnf.ebnf +++ b/examples/abnf/abnf.ebnf @@ -49,7 +49,7 @@ c_nl ::= COMMENT | CRLF comment ::= ";" (WSP | VCHAR)* CRLF -quoted_string::= DQUOTE ([#x20-#x21] | [#x23-#x7E])* DQUOTE +quoted_string::= DQUOTE [#x20-#x21#x23-#x7E]* DQUOTE # quoted string of SP and VCHAR # without DQUOTE @@ -64,14 +64,14 @@ dec_val ::= "d" DIGIT+ hex_val ::= "x" HEXDIG+ (("." HEXDIG+)+ | ("-" HEXDIG+))? -prose_val ::= "<" ([#x20-#x3D] | [#x3F-#x7E])* ">" +prose_val ::= "<" [#x20-#x3D#x3F-#x7E]* ">" # bracketed string of SP and VCHAR # without angles # prose description, to be used as # last resort # Core terminals available in uses of ABNF -ALPHA ::= [#x41-#x5A] | [#x61-#x7A] # A-Z | a-z +ALPHA ::= [#x41-#x5A#x61-#x7A] # A-Z | a-z BIT ::= '0' | '1' diff --git a/examples/abnf/abnf.peg.sxp b/examples/abnf/abnf.peg.sxp index 37fb27c..3373889 100644 --- a/examples/abnf/abnf.peg.sxp +++ b/examples/abnf/abnf.peg.sxp @@ -50,9 +50,7 @@ (rule _comment_2 (alt WSP VCHAR)) (terminal quoted_string (seq DQUOTE _quoted_string_1 DQUOTE)) (rule _quoted_string_1 (star _quoted_string_2)) - (rule _quoted_string_2 (alt _quoted_string_3 _quoted_string_4)) - (terminal _quoted_string_3 (range "#x20-#x21")) - (terminal _quoted_string_4 (range "#x23-#x7E")) + (terminal _quoted_string_2 (range "#x20-#x21#x23-#x7E")) (terminal bin_val (seq "b" _bin_val_1 _bin_val_2)) (rule _bin_val_1 (plus BIT)) (rule _bin_val_2 (opt _bin_val_3)) @@ -82,12 +80,8 @@ (rule _hex_val_8 (plus HEXDIG)) (terminal prose_val (seq "<" _prose_val_1 ">")) (rule _prose_val_1 (star _prose_val_2)) - (rule _prose_val_2 (alt _prose_val_3 _prose_val_4)) - (terminal _prose_val_3 (range "#x20-#x3D")) - (terminal _prose_val_4 (range "#x3F-#x7E")) - (terminal ALPHA (alt _ALPHA_1 _ALPHA_2)) - (terminal _ALPHA_1 (range "#x41-#x5A")) - (terminal _ALPHA_2 (range "#x61-#x7A")) + (terminal _prose_val_2 (range "#x20-#x3D#x3F-#x7E")) + (terminal ALPHA (range "#x41-#x5A#x61-#x7A")) (terminal BIT (alt "0" "1")) (terminal CHAR (range "#x01-#x7F")) (terminal CR (hex "#x0D")) @@ -98,7 +92,8 @@ (terminal _CTL_2 (hex "#x7F")) (terminal DIGIT (range "#x30-#x39")) (terminal DQUOTE (hex "#x22")) - (terminal HEXDIG (alt DIGIT "A" "B" "C" "D" "E" "F")) + (terminal HEXDIG (alt DIGIT _HEXDIG_1)) + (terminal _HEXDIG_1 (range "A-F")) (terminal HTAB (hex "#x09")) (terminal LF (hex "#x0A")) (terminal LWSP (star _LWSP_1)) diff --git a/examples/abnf/abnf.sxp b/examples/abnf/abnf.sxp index 3fd8590..4b96a3e 100644 --- a/examples/abnf/abnf.sxp +++ b/examples/abnf/abnf.sxp @@ -19,15 +19,14 @@ (terminal c_wsp (alt WSP (seq c_nl WSP))) (terminal c_nl (alt COMMENT CRLF)) (terminal comment (seq ";" (star (alt WSP VCHAR)) CRLF)) - (terminal quoted_string - (seq DQUOTE (star (alt (range "#x20-#x21") (range "#x23-#x7E"))) DQUOTE)) + (terminal quoted_string (seq DQUOTE (star (range "#x20-#x21#x23-#x7E")) DQUOTE)) (terminal bin_val (seq "b" (plus BIT) (opt (alt (plus (seq "." (plus BIT))) (seq "-" (plus BIT)))))) (terminal dec_val (seq "d" (plus DIGIT) (opt (alt (plus (seq "." (plus DIGIT))) (seq "-" (plus DIGIT)))))) (terminal hex_val (seq "x" (plus HEXDIG) (opt (alt (plus (seq "." (plus HEXDIG))) (seq "-" (plus HEXDIG)))))) - (terminal prose_val (seq "<" (star (alt (range "#x20-#x3D") (range "#x3F-#x7E"))) ">")) - (terminal ALPHA (alt (range "#x41-#x5A") (range "#x61-#x7A"))) + (terminal prose_val (seq "<" (star (range "#x20-#x3D#x3F-#x7E")) ">")) + (terminal ALPHA (range "#x41-#x5A#x61-#x7A")) (terminal BIT (alt "0" "1")) (terminal CHAR (range "#x01-#x7F")) (terminal CR (hex "#x0D")) @@ -35,7 +34,7 @@ (terminal CTL (alt (range "#x00-#x1F") (hex "#x7F"))) (terminal DIGIT (range "#x30-#x39")) (terminal DQUOTE (hex "#x22")) - (terminal HEXDIG (alt DIGIT "A" "B" "C" "D" "E" "F")) + (terminal HEXDIG (alt DIGIT (range "A-F"))) (terminal HTAB (hex "#x09")) (terminal LF (hex "#x0A")) (terminal LWSP (star (alt WSP (seq CRLF WSP)))) diff --git a/examples/abnf/doc/parser.html b/examples/abnf/doc/parser.html index c3e79b9..5c26cae 100644 --- a/examples/abnf/doc/parser.html +++ b/examples/abnf/doc/parser.html @@ -667,7 +667,7 @@

EBNF Parser for EBNF.

hex_val ::= "x" HEXDIG+ (("." HEXDIG+)+ | ("-" HEXDIG+))?

-
  terminal(:hex_val, /x[0-9A-F]+(?:(?:(?:\.[0-9A-F]+)+)|(?:-[0-9A-F]+))?/i) do |value|
+        
  terminal(:hex_val, /x[0-9A-F]+(?:(?:(?:\.[0-9A-F]+)+)|(?:-[0-9A-F]+))?/) do |value|
     if value.include?('.')
@@ -838,7 +838,6 @@

Non-terminal productions

      raise "Redefining rule #{sym}" if parsed_rules.has_key?(sym)
       parsed_rules[sym] = EBNF::Rule.new(sym.to_sym, nil, elements)
     end
-    progress(:rule, level: 2) {parsed_rules[sym].to_sxp}
     sym
   end
@@ -1001,12 +1000,10 @@

Non-terminal productions

-

case_insensitive_string ::= "%i"? quoted_string

+

char_val ::= case_insensitive_string | case_sensitive_string

-
  production(:case_insensitive_string) do |value|
-    str = value.last[:quoted_string]
-    if str.match?(/[[:alpha:]]/)
+
  production(:char_val) do |value|
@@ -1014,13 +1011,10 @@

Non-terminal productions

-

Only need to use case-insensitive if there are alphabetic characters in the string.

+

FIXME: need rule logic for insensitive matching of strings

-
      [:istr, value.last[:quoted_string]]
-    else
-      value.last[:quoted_string]
-    end
+        
    value.last[:quoted_string]
   end
@@ -1029,19 +1023,6 @@

Non-terminal productions

-

case_sensitive_string ::= "%s" quoted_string

- - -
  production(:case_sensitive_string) do |value|
-    value.last[:quoted_string]
-  end
- - - - -
- -

num_val ::= "%" (bin_val | dec_val | hex_val)

@@ -1069,10 +1050,10 @@

Parser invocation.

  def initialize(input, **options, &block)
- +
- +

If the level option is set, instantiate a logger for collecting trace information.

@@ -1084,10 +1065,10 @@

Parser invocation.

end
- +
- +

Read input, if necessary, which will be used in a Scanner.

@@ -1097,10 +1078,10 @@

Parser invocation.

@parsed_rules = {}
- +
- +

Parses into @parsed_rules

@@ -1113,10 +1094,10 @@

Parser invocation.

end - +
- +

The AST includes the parsed rules along with built-in rules for ABNF used within the parsed grammar.

@@ -1126,10 +1107,10 @@

Parser invocation.

  def ast
- +
- +

Add built-in rules for standard ABNF rules not

@@ -1143,10 +1124,10 @@

Parser invocation.

end - +
- +

Output formatted S-Expression of grammar

@@ -1155,10 +1136,10 @@

Parser invocation.

require 'sxp' unless defined?(SXP) - +
- +

Output rules as a formatted S-Expression

diff --git a/examples/abnf/meta.rb b/examples/abnf/meta.rb index c1e8c8e..b5ce638 100644 --- a/examples/abnf/meta.rb +++ b/examples/abnf/meta.rb @@ -53,9 +53,7 @@ module ABNFMeta EBNF::Rule.new(:_comment_2, nil, [:alt, :WSP, :VCHAR]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:quoted_string, nil, [:seq, :DQUOTE, :_quoted_string_1, :DQUOTE], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_quoted_string_1, nil, [:star, :_quoted_string_2]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_quoted_string_2, nil, [:alt, :_quoted_string_3, :_quoted_string_4]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_quoted_string_3, nil, [:range, "#x20-#x21"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_quoted_string_4, nil, [:range, "#x23-#x7E"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_quoted_string_2, nil, [:range, "#x20-#x21#x23-#x7E"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:bin_val, nil, [:seq, "b", :_bin_val_1, :_bin_val_2], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_bin_val_1, nil, [:plus, :BIT]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_bin_val_2, nil, [:opt, :_bin_val_3]).extend(EBNF::PEG::Rule), @@ -85,12 +83,8 @@ module ABNFMeta EBNF::Rule.new(:_hex_val_8, nil, [:plus, :HEXDIG]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:prose_val, nil, [:seq, "<", :_prose_val_1, ">"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_prose_val_1, nil, [:star, :_prose_val_2]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_prose_val_2, nil, [:alt, :_prose_val_3, :_prose_val_4]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_prose_val_3, nil, [:range, "#x20-#x3D"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_prose_val_4, nil, [:range, "#x3F-#x7E"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:ALPHA, nil, [:alt, :_ALPHA_1, :_ALPHA_2], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_ALPHA_1, nil, [:range, "#x41-#x5A"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_ALPHA_2, nil, [:range, "#x61-#x7A"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_prose_val_2, nil, [:range, "#x20-#x3D#x3F-#x7E"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:ALPHA, nil, [:range, "#x41-#x5A#x61-#x7A"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:BIT, nil, [:alt, "0", "1"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:CHAR, nil, [:range, "#x01-#x7F"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:CR, nil, [:hex, "#x0D"], kind: :terminal).extend(EBNF::PEG::Rule), @@ -101,7 +95,8 @@ module ABNFMeta EBNF::Rule.new(:_CTL_2, nil, [:hex, "#x7F"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:DIGIT, nil, [:range, "#x30-#x39"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:DQUOTE, nil, [:hex, "#x22"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:HEXDIG, nil, [:alt, :DIGIT, "A", "B", "C", "D", "E", "F"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:HEXDIG, nil, [:alt, :DIGIT, :_HEXDIG_1], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_HEXDIG_1, nil, [:range, "A-F"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:HTAB, nil, [:hex, "#x09"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:LF, nil, [:hex, "#x0A"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:LWSP, nil, [:star, :_LWSP_1], kind: :terminal).extend(EBNF::PEG::Rule), diff --git a/examples/ebnf-ll1-parser/README.md b/examples/ebnf-ll1-parser/README.md index f347b9a..e30dcd7 100644 --- a/examples/ebnf-ll1-parser/README.md +++ b/examples/ebnf-ll1-parser/README.md @@ -32,8 +32,9 @@ This generates a [S-Expression][] form of the grammar suitable for use by {EBNF} (terminal HEX "13" (seq "#x" (plus (alt (range "a-f") (range "A-F") (range "0-9"))))) (terminal ENUM "14" (diff (alt (seq "[" (plus R_CHAR)) (seq (plus HEX) "]")) LHS)) (terminal O_ENUM "15" (alt (seq "[^" (plus R_CHAR)) (seq (plus HEX) "]"))) - (terminal RANGE "16" (seq "[" (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX)) "]")) - (terminal O_RANGE "17" (seq "[^" (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX)) "]")) + (terminal RANGE "16" (seq "[" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) + (terminal O_RANGE "17" + (seq "[^" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) (terminal STRING1 "18" (seq "\"" (star (diff CHAR "\"")) "\"")) (terminal STRING2 "19" (seq "'" (star (diff CHAR "'")) "'")) (terminal CHAR "20" @@ -47,8 +48,8 @@ This generates a [S-Expression][] form of the grammar suitable for use by {EBNF} (terminal PASS "23" (plus (alt - (range "#x9#xA#xD#x20") - (seq (alt (diff "#" "#x") "//") (star (range "^#xA#xD"))) + (range "#x00-#x20") + (seq (alt (diff "#" "#x") "//") (star (range "^#x0A#x0Dx"))) (seq "/*" (star (alt (opt (seq "*" (range "^/"))) (range "^*"))) "*/") (seq "(*" (star (alt (opt (seq "*" (range "^)"))) (range "^*"))) "*)")) )) ) diff --git a/examples/ebnf-peg-parser/README.md b/examples/ebnf-peg-parser/README.md index 0981538..a51ccb3 100644 --- a/examples/ebnf-peg-parser/README.md +++ b/examples/ebnf-peg-parser/README.md @@ -32,8 +32,9 @@ This generates a [S-Expression][] form of the grammar suitable for use by {EBNF} (terminal HEX "13" (seq "#x" (plus (alt (range "a-f") (range "A-F") (range "0-9"))))) (terminal ENUM "14" (diff (alt (seq "[" (plus R_CHAR)) (seq (plus HEX) "]")) LHS)) (terminal O_ENUM "15" (alt (seq "[^" (plus R_CHAR)) (seq (plus HEX) "]"))) - (terminal RANGE "16" (seq "[" (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX)) "]")) - (terminal O_RANGE "17" (seq "[^" (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX)) "]")) + (terminal RANGE "16" (seq "[" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) + (terminal O_RANGE "17" + (seq "[^" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) (terminal STRING1 "18" (seq "\"" (star (diff CHAR "\"")) "\"")) (terminal STRING2 "19" (seq "'" (star (diff CHAR "'")) "'")) (terminal CHAR "20" @@ -47,8 +48,8 @@ This generates a [S-Expression][] form of the grammar suitable for use by {EBNF} (terminal PASS "23" (plus (alt - (range "#x9#xA#xD#x20") - (seq (alt (diff "#" "#x") "//") (star (range "^#xA#xD"))) + (range "#x00-#x20") + (seq (alt (diff "#" "#x") "//") (star (range "^#x0A#x0Dx"))) (seq "/*" (star (alt (opt (seq "*" (range "^/"))) (range "^*"))) "*/") (seq "(*" (star (alt (opt (seq "*" (range "^)"))) (range "^*"))) "*)")) )) ) diff --git a/examples/ebnf-peg-parser/meta.rb b/examples/ebnf-peg-parser/meta.rb index 535c52b..2aaa876 100644 --- a/examples/ebnf-peg-parser/meta.rb +++ b/examples/ebnf-peg-parser/meta.rb @@ -47,13 +47,15 @@ module EBNFPegMeta EBNF::Rule.new(:_O_ENUM_2, "15.2", [:seq, :_O_ENUM_4, "]"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_O_ENUM_4, "15.4", [:plus, :HEX], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:RANGE, "16", [:seq, "[", :_RANGE_1, "]"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_RANGE_1, "16.1", [:alt, :_RANGE_2, :_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_RANGE_2, "16.2", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_RANGE_3, "16.3", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_1, "16.1", [:plus, :_RANGE_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_2, "16.2", [:alt, :_RANGE_3, :_RANGE_4], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_3, "16.3", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_4, "16.4", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:O_RANGE, "17", [:seq, "[^", :_O_RANGE_1, "]"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_RANGE_1, "17.1", [:alt, :_O_RANGE_2, :_O_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_RANGE_2, "17.2", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_RANGE_3, "17.3", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_1, "17.1", [:plus, :_O_RANGE_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_2, "17.2", [:alt, :_O_RANGE_3, :_O_RANGE_4], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_3, "17.3", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_4, "17.4", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:STRING1, "18", [:seq, "\"", :_STRING1_1, "\""], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_STRING1_1, "18.1", [:star, :_STRING1_2], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_STRING1_2, "18.2", [:diff, :CHAR, "\""], kind: :terminal).extend(EBNF::PEG::Rule), @@ -69,12 +71,12 @@ module EBNFPegMeta EBNF::Rule.new(:POSTFIX, "22", [:range, "?*+"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:PASS, "23", [:plus, :_PASS_1], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_PASS_1, "23.1", [:alt, :_PASS_2, :_PASS_3, :_PASS_4, :_PASS_5], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_2, "23.2", [:range, "#x9#xA#xD#x20"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_2, "23.2", [:range, "#x00-#x20"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_PASS_3, "23.3", [:seq, :_PASS_6, :_PASS_7], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_PASS_6, "23.6", [:alt, :_PASS_8, "//"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_PASS_8, "23.8", [:diff, "#", "#x"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_PASS_7, "23.7", [:star, :_PASS_9], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_9, "23.9", [:range, "^#xA#xD"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_9, "23.9", [:range, "^#x0A#x0Dx"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_PASS_4, "23.4", [:seq, "/*", :_PASS_10, "*/"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_PASS_10, "23.10", [:star, :_PASS_11], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_PASS_11, "23.11", [:alt, :_PASS_12, :_PASS_13], kind: :terminal).extend(EBNF::PEG::Rule), diff --git a/examples/isoebnf/README.md b/examples/isoebnf/README.md index 2789438..957fb6e 100644 --- a/examples/isoebnf/README.md +++ b/examples/isoebnf/README.md @@ -32,50 +32,39 @@ This generates a [S-Expression][] form of the grammar suitable for use by {EBNF} (rule repeated_sequence (seq start_repeat_symbol definitions_list end_repeat_symbol)) (rule grouped_sequence (seq "(" definitions_list ")")) - (terminal terminal_string + (rule letter + (alt "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" + "S" "T" "U" "V" "W" "X" "Y" "Z" "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" + "l" "m" "n" "o" "p" "q" "r" "s" "t" "u" "v" "w" "x" "y" "z" )) + (rule decimal_digit (alt "0" "1" "2" "3" "4" "5" "6" "7" "8" "9")) + (rule integer (seq decimal_digit (star decimal_digit))) + (rule meta_identifier (seq letter (star meta_identifier_character))) + (rule meta_identifier_character (alt letter decimal_digit "_")) + (rule terminal_string (alt - (seq "'" (plus first_terminal_character) "'") - (seq "\"" (plus second_terminal_character) "\"")) ) - (terminal meta_identifier (seq letter (star meta_identifier_character))) - (terminal integer (plus decimal_digit)) - (terminal special_sequence (seq "?" (star special_sequence_character) "?")) - (terminal comment (seq start_comment_symbol (star comment_symbol) end_comment_symbol)) - (terminal comment_symbol (alt comment terminal_string special_sequence character)) - (terminal letter (range "a-zA-Z")) - (terminal decimal_digit (range "0-9")) - (terminal meta_identifier_character (alt letter decimal_digit "_")) - (terminal first_terminal_character (diff terminal_character "'")) - (terminal second_terminal_character (diff terminal_character "\"")) - (terminal special_sequence_character (diff terminal_character "?")) - (terminal terminal_character + (seq (seq "'" first_terminal_character (star first_terminal_character) "'")) + (seq (seq "\"" second_terminal_character (star second_terminal_character) "\""))) ) + (rule first_terminal_character (seq terminal_character)) + (rule second_terminal_character (seq terminal_character)) + (rule special_sequence (seq "?" (star special_sequence_character) "?")) + (rule special_sequence_character (seq terminal_character)) + (rule terminal_character (alt letter decimal_digit concatenate_symbol defining_symbol definition_separator_symbol end_comment_symbol end_group_symbol end_option_symbol end_repeat_symbol except_symbol first_quote_symbol repetition_symbol second_quote_symbol special_sequence_symbol start_comment_symbol start_group_symbol start_option_symbol start_repeat_symbol terminator_symbol other_character )) - (terminal other_character (alt (range ":+_%@&$<>^` ̃#x20#x23") "\\")) - (terminal gap_separator (range "#x9#xa#xb#xc#xd#x20")) - (pass _pass (alt (plus gap_separator) comment)) - (terminal empty (seq ())) - (terminal defining_symbol (alt "=" ":")) - (terminal definition_separator_symbol (alt "|" "/" "!")) - (terminal terminator_symbol (alt ";" ".")) - (terminal start_option_symbol (alt "[" "(/")) - (terminal end_option_symbol (alt "]" "/)")) - (terminal start_repeat_symbol (alt "{" "(:")) - (terminal end_repeat_symbol (alt "}" ":)")) - (terminal gap_free_symbol (alt (diff terminal_character (range "'\"")) terminal_string)) - (terminal repetition_symbol (seq "*")) - (terminal except_symbol (seq "-")) - (terminal concatenate_symbol (seq ",")) - (terminal first_quote_symbol (seq "'")) - (terminal second_quote_symbol (seq "\"")) - (terminal start_comment_symbol (seq "(*")) - (terminal end_comment_symbol (seq "*)")) - (terminal start_group_symbol (seq "(")) - (terminal end_group_symbol (seq ")")) - (terminal special_sequence_symbol (seq "?"))) + (rule other_character + (alt " " ":" "+" "_" "%" "@" "&" "#" "$" "<" ">" "\\" "^" "`" "~")) + (rule empty (seq "")) + (rule defining_symbol (alt "=" ":")) + (rule definition_separator_symbol (alt "|" "/" "!")) + (rule terminator_symbol (alt ";" ".")) + (rule start_option_symbol (alt "[" "(/")) + (rule end_option_symbol (alt "]" "/)")) + (rule start_repeat_symbol (alt "{" "(:")) + (rule end_repeat_symbol (alt "}" ":)"))) This can then be used as input to {EBNF.parse} to transform [EBNF][] to [PEG][] for parsing examples of the grammar using {EBNF::PEG::Parser}. diff --git a/examples/isoebnf/examples/iso-ebnf.isoebnf b/examples/isoebnf/examples/iso-ebnf.isoebnf index 8bcda08..90084f1 100644 --- a/examples/isoebnf/examples/iso-ebnf.isoebnf +++ b/examples/isoebnf/examples/iso-ebnf.isoebnf @@ -47,7 +47,7 @@ grouped_sequence = '(', definitions_list, ')' terminal_string = ("'", first_terminal_character, {first_terminal_character}, "'") | ('"', second_terminal_character, {second_terminal_character}, '"') (* A represents the - between the quote symbols '_' or "_" *); + between the quote symbols ’_’ or "_" *); meta_identifier = letter, {meta_identifier_character} (* A is the name of a syntactic element of the language being defined *); @@ -57,7 +57,7 @@ integer = decimal_digit, {decimal_digit} ; special_sequence = '?', {special_sequence_character}, '?' (* The meaning of a is not defined in the standard metalanguage. *); -comment = '(*', {comment_symbol}, '*)' +comment = ’(*’, {comment_symbol}, ’*)’ (* A comment is allowed anywhere outside a , , or *); diff --git a/lib/ebnf/terminals.rb b/lib/ebnf/terminals.rb index 9f607bc..af3bcbf 100644 --- a/lib/ebnf/terminals.rb +++ b/lib/ebnf/terminals.rb @@ -6,11 +6,11 @@ module EBNF::Terminals HEX = %r(\#x\h+)u.freeze CHAR = %r([\u0009\u000A\u000D\u0020-\uD7FF\u{10000}-\u{10FFFF}])u.freeze R_CHAR = %r([\u0009\u000A\u000D\u0020-\u002C\u002E-\u005C\u005E-\uD7FF\u{10000}-\u{10FFFF}])u.freeze - RANGE = %r(\[(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}-#{HEX}))\])u.freeze + RANGE = %r(\[(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}-#{HEX}))+\])u.freeze ENUM_BASE = %r(\[(?:(?:#{R_CHAR})+|(?:#{HEX})+)\])u.freeze ENUM = %r(#{ENUM_BASE}(?!\s+#{SYMBOL_BASE}\s*::=))u.freeze LHS = %r((?:\[#{SYMBOL_BASE}\])?\s*#{SYMBOL_BASE}\s*::=)u.freeze - O_RANGE = %r(\[^(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}-#{HEX}))\])u.freeze + O_RANGE = %r(\[^(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}-#{HEX}))+\])u.freeze O_ENUM = %r(\[^#{R_CHAR}+\])u.freeze STRING1 = %r("[\u0009\u000A\u000D\u0020\u0021\u0023-\uD7FF\u{10000}-\u{10FFFF}]*")u.freeze STRING2 = %r('[\u0009\u000A\u000D\u0020-\u0026\u0028-\uD7FF\u{10000}-\u{10FFFF}]*')u.freeze diff --git a/lib/ebnf/writer.rb b/lib/ebnf/writer.rb index f58b4de..4e01c63 100644 --- a/lib/ebnf/writer.rb +++ b/lib/ebnf/writer.rb @@ -347,10 +347,18 @@ def format_abnf_char(c) # FIXME: O_RANGE def format_abnf_range(string) if string.include?('-') + # Might include multiple ranges + # #x01-#x03#x05-#x06 + # a-bc-d dash = (@options[:html] ? "- " : "-") # Split into separate range segments if string.start_with?('#x') - '%x' + string[2..-1].gsub('#x', '') + ranges = [] + scanner = StringScanner.new(string) + while !scanner.eos? + ranges << scanner.scan(/#x\h+-#x\h+/) + end + ranges.map {|range|"%x" + range.gsub('#x', '').sub('-', dash)}.join(" / ") else '%d' + string.gsub(/[^-]/) {|c| c.ord} end @@ -465,31 +473,55 @@ def format_isoebnf(expr, sep: nil, embedded: false) # FIXME: O_RANGE def format_isoebnf_range(string) chars = [] + scanner = StringScanner.new(string) if string.include?('-') - first, last = if string.start_with?('#x') - string.split('-').map {|h| h[2..-1].hex.ord} + ranges = [] + # Might include multiple ranges + # #x01-#x03#x05-#x06 + # a-bc-d + # Split into separate range segments + if string.start_with?('#x') + while !scanner.eos? + ranges << scanner.scan(/#x\h+-#x\h+/) + end + ranges.each do |range| + first, last = range.split('-').map {|h| h[2..-1].hex.ord} + while first <= last + c = first.chr(Encoding::UTF_8) + raise RangeError, "cannot format #{string.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless + ISOEBNF::TERMINAL_CHARACTER.match?(c) + chars << c + first += 1 + end + end else - string.split('-').map {|c| c.ord} - end - while first <= last - c = first.chr(Encoding::UTF_8) - raise RangeError, "cannot format #{string.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless - ISOEBNF::TERMINAL_CHARACTER.match?(c) - chars << c - first += 1 + while !scanner.eos? + r = scanner.scan(/.-./) + require 'byebug'; byebug unless r + ranges << r + end + ranges.each do |range| + first, last = range.split('-').map {|c| c.ord} + while first <= last + c = first.chr(Encoding::UTF_8) + raise RangeError, "cannot format #{string.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless + ISOEBNF::TERMINAL_CHARACTER.match?(c) + chars << c + first += 1 + end + end end else - scanner = StringScanner.new(string) while !scanner.eos? - c = if h = scanner.scan(/#x\h+/) - h[2..-1].hex.ord.chr(Encoding::UTF_8) + c = if hex = scanner.scan(/#x\h+/) + hex[2..-1].hex.ord.chr(Encoding::UTF_8) else scanner.scan(/./) end - raise RangeError, "cannot format #{string.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless - ISOEBNF::TERMINAL_CHARACTER.match?(c) - chars << c end + raise RangeError, "cannot format #{string.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless + ISOEBNF::TERMINAL_CHARACTER.match?(c) + chars << c end end diff --git a/spec/parser_spec.rb b/spec/parser_spec.rb index 28a9a10..4e7d270 100644 --- a/spec/parser_spec.rb +++ b/spec/parser_spec.rb @@ -106,11 +106,16 @@ %{foo | x xlist} => %{(alt foo (seq x xlist))}, %{a | (b - c)} => %{(alt a (diff b c))}, %{a b | c d} => %{(alt (seq a b) (seq c d))}, + %{[a-z]} => %{(range "a-z")}, + %{[a-zA-Z]} => %{(range "a-zA-Z")}, + %{[#x20-#x22]} => %{(range "#x20-#x22")}, + %{[abc]} => %{(range "abc")}, + %{[#x20#x21#x22]} => %{(range "#x20#x21#x22")}, %{BaseDecl? PrefixDecl*} => %{(seq (opt BaseDecl) (star PrefixDecl))}, %{NCCHAR1 | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]} => %{(alt NCCHAR1 "-" (range "0-9") (hex "#x00B7") (range "#x0300-#x036F") (range "#x203F-#x2040"))}, %{'<' ([^<>"{}|^`\]-[#x00-#x20] | UCHAR)* '>'} => - %{(seq "<" (star (alt (diff (range "^<>\\\"{}|^`") (range "#x00-#x20")) UCHAR)) ">")} + %{(seq "<" (star (alt (diff (range "^<>\\\"{}|^`") (range "#x00-#x20")) UCHAR)) ">")}, }.each do |input, expected| it "given #{input.inspect} produces #{expected}" do rule = parse("rule ::= #{input}").ast.first diff --git a/spec/rule_spec.rb b/spec/rule_spec.rb index e63beac..e042e55 100644 --- a/spec/rule_spec.rb +++ b/spec/rule_spec.rb @@ -1010,10 +1010,6 @@ "a ::= [a-b-c]", /syntax error/ ], - "extra range (2)": [ - "a ::= [a-zA-Z]", - /syntax error/ - ], }.each do |name, (rule, message)| it name do expect {EBNF.parse(rule, validate: true)}.to raise_error SyntaxError, message From 20aa3e56b53504193160e9b2cbd0c9e7839560b3 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Tue, 7 Jul 2020 16:51:35 -0700 Subject: [PATCH 30/50] Allow enums to end with a '-'. --- etc/ebnf.ebnf | 4 ++-- etc/ebnf.html | 4 ++-- etc/ebnf.ll1.sxp | 4 ++-- etc/ebnf.peg.rb | 12 ++++++------ etc/ebnf.peg.sxp | 12 ++++++------ etc/ebnf.sxp | 4 ++-- etc/sparql.ebnf | 8 ++++---- etc/sparql.sxp | 11 ++++------- etc/turtle.ebnf | 12 ++++++------ etc/turtle.sxp | 14 ++++++-------- lib/ebnf/terminals.rb | 4 ++-- lib/ebnf/writer.rb | 4 ++-- spec/parser_spec.rb | 1 + spec/rule_spec.rb | 18 ++++++++---------- spec/writer_spec.rb | 4 ++++ 15 files changed, 57 insertions(+), 59 deletions(-) diff --git a/etc/ebnf.ebnf b/etc/ebnf.ebnf index 19d2824..b9e7554 100644 --- a/etc/ebnf.ebnf +++ b/etc/ebnf.ebnf @@ -38,9 +38,9 @@ [13] HEX ::= '#x' ([a-f] | [A-F] | [0-9])+ - [14] ENUM ::= ('[' R_CHAR+ | HEX+ ']') - LHS # exclusively R_CHAR or HEX + [14] ENUM ::= ('[' (R_CHAR+ | HEX+) '-'? ']') - LHS # exclusively R_CHAR or HEX - [15] O_ENUM ::= '[^' R_CHAR+ | HEX+ ']' + [15] O_ENUM ::= '[^' (R_CHAR+ | HEX+) '-'? ']' # both ENUM and O_ENUM can end with '-' [16] RANGE ::= '[' ((R_CHAR '-' R_CHAR) | (HEX '-' HEX))+ ']' diff --git a/etc/ebnf.html b/etc/ebnf.html index 1bf3054..84cc864 100644 --- a/etc/ebnf.html +++ b/etc/ebnf.html @@ -117,7 +117,7 @@ ENUM ::= -(("[" R_CHAR+) | (HEX+ "]")) - LHS +("[" (R_CHAR+ | HEX+) "-"? "]") - LHS @@ -125,7 +125,7 @@ O_ENUM ::= -("[^" R_CHAR+) | (HEX+ "]") +"[^" (R_CHAR+ | HEX+) "-"? "]" diff --git a/etc/ebnf.ll1.sxp b/etc/ebnf.ll1.sxp index dcbaed2..008eeb3 100644 --- a/etc/ebnf.ll1.sxp +++ b/etc/ebnf.ll1.sxp @@ -101,8 +101,8 @@ (terminal LHS "11" (seq (opt (seq "[" SYMBOL "]" (plus " "))) SYMBOL (star " ") "::=")) (terminal SYMBOL "12" (plus (alt (range "a-z") (range "A-Z") (range "0-9") "_" "."))) (terminal HEX "13" (seq "#x" (plus (alt (range "a-f") (range "A-F") (range "0-9"))))) - (terminal ENUM "14" (diff (alt (seq "[" (plus R_CHAR)) (seq (plus HEX) "]")) LHS)) - (terminal O_ENUM "15" (alt (seq "[^" (plus R_CHAR)) (seq (plus HEX) "]"))) + (terminal ENUM "14" (diff (seq "[" (alt (plus R_CHAR) (plus HEX)) (opt "-") "]") LHS)) + (terminal O_ENUM "15" (seq "[^" (alt (plus R_CHAR) (plus HEX)) (opt "-") "]")) (terminal RANGE "16" (seq "[" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) (terminal O_RANGE "17" (seq "[^" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) diff --git a/etc/ebnf.peg.rb b/etc/ebnf.peg.rb index ecf7edc..d5785b2 100644 --- a/etc/ebnf.peg.rb +++ b/etc/ebnf.peg.rb @@ -36,16 +36,16 @@ module Meta EBNF::Rule.new(:_HEX_4, "13.4", [:range, "A-F"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_HEX_5, "13.5", [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:ENUM, "14", [:diff, :_ENUM_1, :LHS], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_ENUM_1, "14.1", [:alt, :_ENUM_2, :_ENUM_3], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_ENUM_2, "14.2", [:seq, "[", :_ENUM_4], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_ENUM_1, "14.1", [:seq, "[", :_ENUM_2, :_ENUM_3, "]"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_ENUM_2, "14.2", [:alt, :_ENUM_4, :_ENUM_5], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_ENUM_4, "14.4", [:plus, :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_ENUM_3, "14.3", [:seq, :_ENUM_5, "]"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_ENUM_5, "14.5", [:plus, :HEX], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:O_ENUM, "15", [:alt, :_O_ENUM_1, :_O_ENUM_2], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_ENUM_1, "15.1", [:seq, "[^", :_O_ENUM_3], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_ENUM_3, "14.3", [:opt, "-"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:O_ENUM, "15", [:seq, "[^", :_O_ENUM_1, :_O_ENUM_2, "]"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_ENUM_1, "15.1", [:alt, :_O_ENUM_3, :_O_ENUM_4], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_O_ENUM_3, "15.3", [:plus, :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_ENUM_2, "15.2", [:seq, :_O_ENUM_4, "]"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_O_ENUM_4, "15.4", [:plus, :HEX], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_ENUM_2, "15.2", [:opt, "-"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:RANGE, "16", [:seq, "[", :_RANGE_1, "]"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_RANGE_1, "16.1", [:plus, :_RANGE_2], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_RANGE_2, "16.2", [:alt, :_RANGE_3, :_RANGE_4], kind: :terminal).extend(EBNF::PEG::Rule), diff --git a/etc/ebnf.peg.sxp b/etc/ebnf.peg.sxp index 6e16b19..87f6147 100644 --- a/etc/ebnf.peg.sxp +++ b/etc/ebnf.peg.sxp @@ -34,16 +34,16 @@ (terminal _HEX_4 "13.4" (range "A-F")) (terminal _HEX_5 "13.5" (range "0-9")) (terminal ENUM "14" (diff _ENUM_1 LHS)) - (terminal _ENUM_1 "14.1" (alt _ENUM_2 _ENUM_3)) - (terminal _ENUM_2 "14.2" (seq "[" _ENUM_4)) + (terminal _ENUM_1 "14.1" (seq "[" _ENUM_2 _ENUM_3 "]")) + (terminal _ENUM_2 "14.2" (alt _ENUM_4 _ENUM_5)) (terminal _ENUM_4 "14.4" (plus R_CHAR)) - (terminal _ENUM_3 "14.3" (seq _ENUM_5 "]")) (terminal _ENUM_5 "14.5" (plus HEX)) - (terminal O_ENUM "15" (alt _O_ENUM_1 _O_ENUM_2)) - (terminal _O_ENUM_1 "15.1" (seq "[^" _O_ENUM_3)) + (terminal _ENUM_3 "14.3" (opt "-")) + (terminal O_ENUM "15" (seq "[^" _O_ENUM_1 _O_ENUM_2 "]")) + (terminal _O_ENUM_1 "15.1" (alt _O_ENUM_3 _O_ENUM_4)) (terminal _O_ENUM_3 "15.3" (plus R_CHAR)) - (terminal _O_ENUM_2 "15.2" (seq _O_ENUM_4 "]")) (terminal _O_ENUM_4 "15.4" (plus HEX)) + (terminal _O_ENUM_2 "15.2" (opt "-")) (terminal RANGE "16" (seq "[" _RANGE_1 "]")) (terminal _RANGE_1 "16.1" (plus _RANGE_2)) (terminal _RANGE_2 "16.2" (alt _RANGE_3 _RANGE_4)) diff --git a/etc/ebnf.sxp b/etc/ebnf.sxp index a2be331..a806ef1 100644 --- a/etc/ebnf.sxp +++ b/etc/ebnf.sxp @@ -13,8 +13,8 @@ (terminal LHS "11" (seq (opt (seq "[" SYMBOL "]" (plus " "))) SYMBOL (star " ") "::=")) (terminal SYMBOL "12" (plus (alt (range "a-z") (range "A-Z") (range "0-9") "_" "."))) (terminal HEX "13" (seq "#x" (plus (alt (range "a-f") (range "A-F") (range "0-9"))))) - (terminal ENUM "14" (diff (alt (seq "[" (plus R_CHAR)) (seq (plus HEX) "]")) LHS)) - (terminal O_ENUM "15" (alt (seq "[^" (plus R_CHAR)) (seq (plus HEX) "]"))) + (terminal ENUM "14" (diff (seq "[" (alt (plus R_CHAR) (plus HEX)) (opt "-") "]") LHS)) + (terminal O_ENUM "15" (seq "[^" (alt (plus R_CHAR) (plus HEX)) (opt "-") "]")) (terminal RANGE "16" (seq "[" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) (terminal O_RANGE "17" (seq "[^" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) diff --git a/etc/sparql.ebnf b/etc/sparql.ebnf index ed4faee..9501e95 100644 --- a/etc/sparql.ebnf +++ b/etc/sparql.ebnf @@ -249,7 +249,7 @@ [142] BLANK_NODE_LABEL ::= '_:' ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)? [143] VAR1 ::= '?' VARNAME [144] VAR2 ::= '$' VARNAME - [145] LANGTAG ::= '@' ([a-z] | [A-Z])+ ('-' ([a-z] | [A-Z] | [0-9])+)* + [145] LANGTAG ::= '@' ([a-zA-Z])+ ('-' ([a-zA-Z0-9])+)* [146] INTEGER ::= [0-9]+ [147] DECIMAL ::= [0-9]* '.' [0-9]+ [148] DOUBLE ::= [0-9]+ '.' [0-9]* EXPONENT @@ -260,7 +260,7 @@ [152] INTEGER_NEGATIVE ::= '-' INTEGER [153] DECIMAL_NEGATIVE ::= '-' DECIMAL [154] DOUBLE_NEGATIVE ::= '-' DOUBLE - [155] EXPONENT ::= [eE] [#x2b#x2d]? [0-9]+ + [155] EXPONENT ::= [eE] [+-]? [0-9]+ [156] STRING_LITERAL1 ::= "'" ( ([^#x27#x5C#xA#xD]) | ECHAR )* "'" [157] STRING_LITERAL2 ::= '"' ( ([^#x22#x5C#xA#xD]) | ECHAR )* '"' [158] STRING_LITERAL_LONG1 ::= "'''" ( ( "'" | "''" )? ( [^'\] | ECHAR ) )* "'''" @@ -269,7 +269,7 @@ [161] NIL ::= '(' WS* ')' [162] WS ::= #x20 | #x9 | #xD | #xA [163] ANON ::= '[' WS* ']' - [164] PN_CHARS_BASE ::= [A-Z] | [a-z] | [#x00C0-#x00D6] + [164] PN_CHARS_BASE ::= [A-Za-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] @@ -283,6 +283,6 @@ [169] PN_LOCAL ::= ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)? [170] PLX ::= PERCENT | PN_LOCAL_ESC [171] PERCENT ::= '%' HEX HEX - [172] HEX ::= [0-9] | [A-F] | [a-f] + [172] HEX ::= [0-9A-Fa-f] [173] PN_LOCAL_ESC ::= '\' ( '_' | '~' | '.' | '-' | '!' | '$' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '/' | '?' | '#' | '@' | '%' ) \ No newline at end of file diff --git a/etc/sparql.sxp b/etc/sparql.sxp index 19622d5..414d742 100644 --- a/etc/sparql.sxp +++ b/etc/sparql.sxp @@ -291,9 +291,7 @@ (terminal VAR1 "143" (seq "?" VARNAME)) (terminal VAR2 "144" (seq "$" VARNAME)) (terminal LANGTAG "145" - (seq "@" - (plus (alt (range "a-z") (range "A-Z"))) - (star (seq "-" (plus (alt (range "a-z") (range "A-Z") (range "0-9")))))) ) + (seq "@" (plus (range "a-zA-Z")) (star (seq "-" (plus (range "a-zA-Z0-9")))))) (terminal INTEGER "146" (plus (range "0-9"))) (terminal DECIMAL "147" (seq (star (range "0-9")) "." (plus (range "0-9")))) (terminal DOUBLE "148" @@ -307,7 +305,7 @@ (terminal INTEGER_NEGATIVE "152" (seq "-" INTEGER)) (terminal DECIMAL_NEGATIVE "153" (seq "-" DECIMAL)) (terminal DOUBLE_NEGATIVE "154" (seq "-" DOUBLE)) - (terminal EXPONENT "155" (seq (range "eE") (opt (range "#x2b#x2d")) (plus (range "0-9")))) + (terminal EXPONENT "155" (seq (range "eE") (opt (range "+-")) (plus (range "0-9")))) (terminal STRING_LITERAL1 "156" (seq "'" (star (alt (range "^#x27#x5C#xA#xD") ECHAR)) "'")) (terminal STRING_LITERAL2 "157" @@ -322,8 +320,7 @@ (terminal ANON "163" (seq "[" (star WS) "]")) (terminal PN_CHARS_BASE "164" (alt - (range "A-Z") - (range "a-z") + (range "A-Za-z") (range "#x00C0-#x00D6") (range "#x00D8-#x00F6") (range "#x00F8-#x02FF") @@ -358,7 +355,7 @@ (seq (alt PN_CHARS_U (range "0-9")) (opt (seq (star (alt PN_CHARS ".")) PN_CHARS)))) (terminal PLX "170" (alt PERCENT PN_LOCAL_ESC)) (terminal PERCENT "171" (seq "%" HEX HEX)) - (terminal HEX "172" (alt (range "0-9") (range "A-F") (range "a-f"))) + (terminal HEX "172" (range "0-9A-Fa-f")) (terminal PN_LOCAL_ESC "173" (seq "\\" (alt "_" "~" "." "-" "!" "$" "&" "'" "(" ")" "*" "+" "," ";" "=" "/" "?" "#" diff --git a/etc/turtle.ebnf b/etc/turtle.ebnf index 6e45726..9b2024e 100644 --- a/etc/turtle.ebnf +++ b/etc/turtle.ebnf @@ -30,11 +30,11 @@ [139s] PNAME_NS ::= PN_PREFIX? ":" [140s] PNAME_LN ::= PNAME_NS PN_LOCAL [141s] BLANK_NODE_LABEL ::= '_:' ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)? -[144s] LANGTAG ::= "@" ([a-z] | [A-Z])+ ( "-" ([a-z] | [A-Z] | [0-9])+ )* -[19] INTEGER ::= [#x2b#x2d]? [0-9]+ -[20] DECIMAL ::= [#x2b#x2d]? ( ([0-9])* '.' ([0-9])+ ) -[21] DOUBLE ::= [#x2b#x2d]? ( [0-9]+ '.' [0-9]* EXPONENT | '.' ([0-9])+ EXPONENT | ([0-9])+ EXPONENT ) -[154s] EXPONENT ::= [eE] [#x2b#x2d]? [0-9]+ +[144s] LANGTAG ::= "@" ([a-zA-Z])+ ( "-" ([a-zA-Z0-9])+ )* +[19] INTEGER ::= [+-]? [0-9]+ +[20] DECIMAL ::= [+-]? ( ([0-9])* '.' ([0-9])+ ) +[21] DOUBLE ::= [+-]? ( [0-9]+ '.' [0-9]* EXPONENT | '.' ([0-9])+ EXPONENT | ([0-9])+ EXPONENT ) +[154s] EXPONENT ::= [eE] [+-]? [0-9]+ [22] STRING_LITERAL_QUOTE ::= '"' ( [^#x22#x5C#xA#xD] | ECHAR | UCHAR )* '"' [23] STRING_LITERAL_SINGLE_QUOTE ::= "'" ( [^#x27#x5C#xA#xD] | ECHAR | UCHAR )* "'" [24] STRING_LITERAL_LONG_SINGLE_QUOTE ::= "'''" ( ( "'" | "''" )? ( [^'\] | ECHAR | UCHAR ) )* "'''" @@ -65,6 +65,6 @@ [168s] PN_LOCAL ::= ( PN_CHARS_U | ':' | [0-9] | PLX ) ( ( PN_CHARS | '.' | ':' | PLX )* ( PN_CHARS | ':' | PLX ) ) ? [169s] PLX ::= PERCENT | PN_LOCAL_ESC [170s] PERCENT ::= '%' HEX HEX -[171s] HEX ::= [0-9] | [A-F] | [a-f] +[171s] HEX ::= [0-9A-Fa-f] [172s] PN_LOCAL_ESC ::= '\' ( '_' | '~' | '.' | '-' | '!' | '$' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '/' | '?' | '#' | '@' | '%' ) \ No newline at end of file diff --git a/etc/turtle.sxp b/etc/turtle.sxp index b6309fd..d095e2f 100644 --- a/etc/turtle.sxp +++ b/etc/turtle.sxp @@ -36,20 +36,18 @@ (terminal BLANK_NODE_LABEL "141s" (seq "_:" (alt PN_CHARS_U (range "0-9")) (opt (seq (star (alt PN_CHARS ".")) PN_CHARS)))) (terminal LANGTAG "144s" - (seq "@" - (plus (alt (range "a-z") (range "A-Z"))) - (star (seq "-" (plus (alt (range "a-z") (range "A-Z") (range "0-9")))))) ) - (terminal INTEGER "19" (seq (opt (range "#x2b#x2d")) (plus (range "0-9")))) + (seq "@" (plus (range "a-zA-Z")) (star (seq "-" (plus (range "a-zA-Z0-9")))))) + (terminal INTEGER "19" (seq (opt (range "+-")) (plus (range "0-9")))) (terminal DECIMAL "20" - (seq (opt (range "#x2b#x2d")) (seq (star (range "0-9")) "." (plus (range "0-9"))))) + (seq (opt (range "+-")) (seq (star (range "0-9")) "." (plus (range "0-9"))))) (terminal DOUBLE "21" (seq - (opt (range "#x2b#x2d")) + (opt (range "+-")) (alt (seq (plus (range "0-9")) "." (star (range "0-9")) EXPONENT) (seq "." (plus (range "0-9")) EXPONENT) (seq (plus (range "0-9")) EXPONENT)) )) - (terminal EXPONENT "154s" (seq (range "eE") (opt (range "#x2b#x2d")) (plus (range "0-9")))) + (terminal EXPONENT "154s" (seq (range "eE") (opt (range "+-")) (plus (range "0-9")))) (terminal STRING_LITERAL_QUOTE "22" (seq "\"" (star (alt (range "^#x22#x5C#xA#xD") ECHAR UCHAR)) "\"")) (terminal STRING_LITERAL_SINGLE_QUOTE "23" @@ -97,7 +95,7 @@ (opt (seq (star (alt PN_CHARS "." ":" PLX)) (alt PN_CHARS ":" PLX)))) ) (terminal PLX "169s" (alt PERCENT PN_LOCAL_ESC)) (terminal PERCENT "170s" (seq "%" HEX HEX)) - (terminal HEX "171s" (alt (range "0-9") (range "A-F") (range "a-f"))) + (terminal HEX "171s" (range "0-9A-Fa-f")) (terminal PN_LOCAL_ESC "172s" (seq "\\" (alt "_" "~" "." "-" "!" "$" "&" "'" "(" ")" "*" "+" "," ";" "=" "/" "?" "#" diff --git a/lib/ebnf/terminals.rb b/lib/ebnf/terminals.rb index af3bcbf..cf047a2 100644 --- a/lib/ebnf/terminals.rb +++ b/lib/ebnf/terminals.rb @@ -7,11 +7,11 @@ module EBNF::Terminals CHAR = %r([\u0009\u000A\u000D\u0020-\uD7FF\u{10000}-\u{10FFFF}])u.freeze R_CHAR = %r([\u0009\u000A\u000D\u0020-\u002C\u002E-\u005C\u005E-\uD7FF\u{10000}-\u{10FFFF}])u.freeze RANGE = %r(\[(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}-#{HEX}))+\])u.freeze - ENUM_BASE = %r(\[(?:(?:#{R_CHAR})+|(?:#{HEX})+)\])u.freeze + ENUM_BASE = %r(\[(?:(?:#{R_CHAR})+|(?:#{HEX})+)-?\])u.freeze ENUM = %r(#{ENUM_BASE}(?!\s+#{SYMBOL_BASE}\s*::=))u.freeze LHS = %r((?:\[#{SYMBOL_BASE}\])?\s*#{SYMBOL_BASE}\s*::=)u.freeze O_RANGE = %r(\[^(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}-#{HEX}))+\])u.freeze - O_ENUM = %r(\[^#{R_CHAR}+\])u.freeze + O_ENUM = %r(\[^#{ENUM_BASE}\])u.freeze STRING1 = %r("[\u0009\u000A\u000D\u0020\u0021\u0023-\uD7FF\u{10000}-\u{10FFFF}]*")u.freeze STRING2 = %r('[\u0009\u000A\u000D\u0020-\u0026\u0028-\uD7FF\u{10000}-\u{10FFFF}]*')u.freeze POSTFIX = %r([?*+])u.freeze diff --git a/lib/ebnf/writer.rb b/lib/ebnf/writer.rb index 4e01c63..30c9523 100644 --- a/lib/ebnf/writer.rb +++ b/lib/ebnf/writer.rb @@ -346,7 +346,7 @@ def format_abnf_char(c) # Format a range # FIXME: O_RANGE def format_abnf_range(string) - if string.include?('-') + if string.include?('-') && !string.end_with?('-') # Might include multiple ranges # #x01-#x03#x05-#x06 # a-bc-d @@ -474,7 +474,7 @@ def format_isoebnf(expr, sep: nil, embedded: false) def format_isoebnf_range(string) chars = [] scanner = StringScanner.new(string) - if string.include?('-') + if string.include?('-') && !string.end_with?('-') ranges = [] # Might include multiple ranges # #x01-#x03#x05-#x06 diff --git a/spec/parser_spec.rb b/spec/parser_spec.rb index 4e7d270..c72935b 100644 --- a/spec/parser_spec.rb +++ b/spec/parser_spec.rb @@ -110,6 +110,7 @@ %{[a-zA-Z]} => %{(range "a-zA-Z")}, %{[#x20-#x22]} => %{(range "#x20-#x22")}, %{[abc]} => %{(range "abc")}, + %{[abc-]} => %{(range "abc-")}, %{[#x20#x21#x22]} => %{(range "#x20#x21#x22")}, %{BaseDecl? PrefixDecl*} => %{(seq (opt BaseDecl) (star PrefixDecl))}, %{NCCHAR1 | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]} => diff --git a/spec/rule_spec.rb b/spec/rule_spec.rb index e042e55..f6198db 100644 --- a/spec/rule_spec.rb +++ b/spec/rule_spec.rb @@ -499,11 +499,13 @@ describe "#to_regexp" do { - hex: ["#x20", / /], - range: ["a-b", /[a-b]/], - }.each do |title, (exp, regexp)| + hex: [:hex, "#x20", / /], + range: [:range, "a-b", /[a-b]/], + range2: [:range, "a-zA-Z", /[a-zA-Z]/], + range3: [:range, "abc-", /[abc-]/], + }.each do |title, (op, exp, regexp)| it title do - expect(EBNF::Rule.new(title, nil, [title, exp]).to_regexp).to eql regexp + expect(EBNF::Rule.new(title, nil, [op, exp]).to_regexp).to eql regexp end end @@ -914,8 +916,8 @@ LHS: ["["], SYMBOL: ["a-z", "A-Z", "0-9", "_", "."], HEX: ["#x"], - ENUM: ["[", :HEX, :LHS], - O_ENUM: ["[^", :HEX], + ENUM: ["[", :LHS], + O_ENUM: ["[^"], RANGE: ["["], O_RANGE: ["[^"], STRING1: ['"'], @@ -999,10 +1001,6 @@ /syntax error/ ], "incomplete range": [ - "a ::= [a-]", - /syntax error/ - ], - "incomplete range (2)": [ "a ::= [-b]", /syntax error/ ], diff --git a/spec/writer_spec.rb b/spec/writer_spec.rb index cc18919..bf7ec5b 100644 --- a/spec/writer_spec.rb +++ b/spec/writer_spec.rb @@ -123,6 +123,10 @@ [:alt, :A, :B], "A | B" ], + "enum": [ + [:range, "abc-"], + "[abc-]" + ], "hex": [ [:hex, "#x20"], "#x20" From e79b96c884d6b232e3bae0a344b71eddc6e049bb Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Wed, 8 Jul 2020 15:32:28 -0700 Subject: [PATCH 31/50] Use Erubis instead of Haml for HTML formatting, as it preserves leading whitespace. Add abbr elements around escaped hex codes. --- ebnf.gemspec | 2 +- etc/abnf.sxp | 59 ++++---- etc/ebnf.html | 353 ++++++++++++++++++-------------------------- etc/iso-ebnf.sxp | 86 +++++------ lib/ebnf/writer.rb | 139 ++++++++++++----- spec/writer_spec.rb | 173 +++++++++++----------- 6 files changed, 412 insertions(+), 400 deletions(-) diff --git a/ebnf.gemspec b/ebnf.gemspec index 4d07e55..10cff8b 100755 --- a/ebnf.gemspec +++ b/ebnf.gemspec @@ -29,7 +29,7 @@ Gem::Specification.new do |gem| gem.add_runtime_dependency 'rdf', '~> 3.1' # Required by sxp gem.add_development_dependency 'rdf-spec', '~> 3.1' gem.add_development_dependency 'rdf-turtle', '~> 3.1' - gem.add_development_dependency 'haml', '~> 5.0' + gem.add_development_dependency 'erubis', '~> 2.7' gem.add_development_dependency 'nokogiri', '~> 1.10' gem.add_development_dependency 'rspec', '~> 3.9' gem.add_development_dependency 'rspec-its', '~> 1.3' diff --git a/etc/abnf.sxp b/etc/abnf.sxp index 2ccf6ed..30c0aa7 100644 --- a/etc/abnf.sxp +++ b/etc/abnf.sxp @@ -1,5 +1,32 @@ ( - (terminal ALPHA (alt (range "#x41-#x5A") (range "#x61-#x7A"))) + (rule rulelist (plus (alt rule (seq (star c_wsp) c_nl)))) + (rule rule (seq rulename defined_as elements c_nl)) + (rule elements (seq alternation (star c_wsp))) + (rule alternation + (seq concatenation (star (seq (star c_wsp) "/" (star c_wsp) concatenation)))) + (rule concatenation (seq repetition (star (seq (plus c_wsp) repetition)))) + (rule repetition (seq (opt repeat) element)) + (rule repeat (alt (seq (star DIGIT) "*" (star DIGIT)) (plus DIGIT))) + (rule element (alt rulename group option char_val num_val prose_val)) + (rule group (seq "(" (star c_wsp) alternation (star c_wsp) ")")) + (rule option (seq "[" (star c_wsp) alternation (star c_wsp) "]")) + (rule char_val (alt case_insensitive_string case_sensitive_string)) + (rule case_insensitive_string (seq (opt "%i") quoted_string)) + (rule case_sensitive_string (seq "%s" quoted_string)) + (rule num_val (seq "%" (alt bin_val dec_val hex_val))) + (terminal rulename (seq ALPHA (star (alt ALPHA DIGIT "-")))) + (terminal defined_as (seq (star c_wsp) (alt "=" "=/") (star c_wsp))) + (terminal c_wsp (alt WSP (seq c_nl WSP))) + (terminal c_nl (alt COMMENT CRLF)) + (terminal comment (seq ";" (star (alt WSP VCHAR)) CRLF)) + (terminal quoted_string (seq DQUOTE (star (range "#x20-#x21#x23-#x7E")) DQUOTE)) + (terminal bin_val (seq "b" (plus BIT) (opt (alt (plus (seq "." (plus BIT))) (seq "-" (plus BIT)))))) + (terminal dec_val + (seq "d" (plus DIGIT) (opt (alt (plus (seq "." (plus DIGIT))) (seq "-" (plus DIGIT)))))) + (terminal hex_val + (seq "x" (plus HEXDIG) (opt (alt (plus (seq "." (plus HEXDIG))) (seq "-" (plus HEXDIG)))))) + (terminal prose_val (seq "<" (star (range "#x20-#x3D#x3F-#x7E")) ">")) + (terminal ALPHA (range "#x41-#x5A#x61-#x7A")) (terminal BIT (alt "0" "1")) (terminal CHAR (range "#x01-#x7F")) (terminal CR (hex "#x0D")) @@ -14,32 +41,4 @@ (terminal OCTET (range "#x00-#xFF")) (terminal SP (hex "#x20")) (terminal VCHAR (range "#x21-#x7E")) - (terminal WSP (alt SP HTAB)) - (rule alternation - (seq concatenation (star (seq (star c_wsp) "/" (star c_wsp) concatenation)))) - (terminal bin_val (seq "b" (plus BIT) (opt (alt (plus (seq "." (plus BIT))) (seq "-" (plus BIT)))))) - (terminal c_nl (alt COMMENT CRLF)) - (terminal c_wsp (alt WSP (seq c_nl WSP))) - (rule case_insensitive_string (seq (opt "%i") quoted_string)) - (rule case_sensitive_string (seq "%s" quoted_string)) - (rule char_val (alt case_insensitive_string case_sensitive_string)) - (terminal comment (seq ";" (star (alt WSP VCHAR)) CRLF)) - (rule concatenation (seq repetition (star (seq (plus c_wsp) repetition)))) - (terminal dec_val - (seq "d" (plus DIGIT) (opt (alt (plus (seq "." (plus DIGIT))) (seq "-" (plus DIGIT)))))) - (terminal defined_as (seq (star c_wsp) (alt "=" "=/") (star c_wsp))) - (rule element (alt rulename group option char_val num_val prose_val)) - (rule elements (seq alternation (star c_wsp))) - (rule group (seq "(" (star c_wsp) alternation (star c_wsp) ")")) - (terminal hex_val - (seq "x" (plus HEXDIG) (opt (alt (plus (seq "." (plus HEXDIG))) (seq "-" (plus HEXDIG)))))) - (rule num_val (seq "%" (alt bin_val dec_val hex_val))) - (rule option (seq "[" (star c_wsp) alternation (star c_wsp) "]")) - (terminal prose_val (seq "<" (star (alt (range "#x20-#x3D") (range "#x3F-#x7E"))) ">")) - (terminal quoted_string - (seq DQUOTE (star (alt (range "#x20-#x21") (range "#x23-#x7E"))) DQUOTE)) - (rule repeat (alt (seq (star DIGIT) "*" (star DIGIT)) (plus DIGIT))) - (rule repetition (seq (opt repeat) element)) - (rule rule (seq rulename defined_as elements c_nl)) - (rule rulelist (plus (alt rule (seq (star c_wsp) c_nl)))) - (terminal rulename (seq ALPHA (star (alt ALPHA DIGIT "-"))))) + (terminal WSP (alt SP HTAB))) diff --git a/etc/ebnf.html b/etc/ebnf.html index 84cc864..68faf9a 100644 --- a/etc/ebnf.html +++ b/etc/ebnf.html @@ -1,207 +1,148 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +
[1]ebnf::= -(declaration | rule)* -
[2]declaration::= -"@terminals" | pass -
[3]rule::= -LHS expression -
[4]expression::= -alt -
[5]alt::= -seq ("|" seq)* -
[6]seq::= -diff+ -
[7]diff::= -postfix ("-" postfix)? -
[8]postfix::= -primary POSTFIX? -
[9]primary::= -HEX -| SYMBOL -| ENUM -| O_ENUM -| RANGE -| O_RANGE -| STRING1 -| STRING2 -| ("(" expression ")") -
[10]pass::= -"@pass" expression -
[11]LHS::= -("[" SYMBOL "]" #x20+)? SYMBOL #x20* "::=" -
[12]SYMBOL::= -([a-z] | [A-Z] | [0-9] | "_" | ".")+ -
[13]HEX::= -"#x" ([a-f] | [A-F] | [0-9])+ -
[14]ENUM::= -("[" (R_CHAR+ | HEX+) "-"? "]") - LHS -
[15]O_ENUM::= -"[^" (R_CHAR+ | HEX+) "-"? "]" -
[16]RANGE::= -"[" ((R_CHAR "-" R_CHAR) | (HEX "-" HEX))+ "]" -
[17]O_RANGE::= -"[^" ((R_CHAR "-" R_CHAR) | (HEX "-" HEX))+ "]" -
[18]STRING1::= -'"' (CHAR - '"')* '"' -
[19]STRING2::= -"'" (CHAR - "'")* "'" -
[20]CHAR::= -[#x9#xA#xD] -| [#x20-#xD7FF] -| [#xE000-#xFFFD] -| [#x10000-#x10FFFF] -
[21]R_CHAR::= -CHAR - ("]" | "-") -
[22]POSTFIX::= -[?*+] -
[23]PASS::= -([#x9#xA#xD#x20] | ((("#" - "#x") | "//") [^#xA#xD]*) | ("/*" (("*" [^/])? | [^*])* "*/") | ("(*" (("*" [^)])? | [^*])* "*)"))+ -
-@pass - -PASS -
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
[1]ebnf::=( declaration | rule) *
[2]declaration::="@terminals" | pass
[3]rule::=LHS expression
[4]expression::=alt
[5]alt::=seq ( "|" seq) *
[6]seq::=diff+
[7]diff::=postfix ( "-" postfix) ?
[8]postfix::=primary POSTFIX?
[9]primary::=HEX | SYMBOL | ENUM | O_ENUM | RANGE | O_RANGE | STRING1 | STRING2 | ( "(" expression ")")
[10]pass::="@pass" expression
[11]LHS::=( "[" SYMBOL "]" #x20+ ) ? SYMBOL #x20* "::="
[12]SYMBOL::=( [ a-z] | [ A-Z] | [ 0-9] | "_" | ".") +
[13]HEX::="#x" ( [ a-f] | [ A-F] | [ 0-9] ) +
[14]ENUM::=( "[" ( R_CHAR+ | HEX+ ) "-"? "]") - LHS
[15]O_ENUM::="[^" ( R_CHAR+ | HEX+ ) "-"? "]"
[16]RANGE::="[" ( ( R_CHAR "-" R_CHAR) | ( HEX "-" HEX) ) + "]"
[17]O_RANGE::="[^" ( ( R_CHAR "-" R_CHAR) | ( HEX "-" HEX) ) + "]"
[18]STRING1::='"' ( CHAR - '"') * '"'
[19]STRING2::="'" ( CHAR - "'") * "'"
[20]CHAR::=[ #x09#x0A#x0D] | [ #x20-#xD7FF] | [ #xE000-#xFFFD] | [ #x00010000-#x0010FFFF]
[21]R_CHAR::=CHAR - ( "]" | "-")
[22]POSTFIX::=[ ?*+]
[23]PASS::=( [ #x09#x0A#x0D#x20] | ( ( ( "#" - "#x") | "//") [ ^#x0A#x0D] * ) | ( "/*" ( ( "*" [ ^/] ) ? | [ ^*] ) * "*/") | ( "(*" ( ( "*" [ ^)] ) ? | [ ^*] ) * "*)") ) +
@passPASS
+ diff --git a/etc/iso-ebnf.sxp b/etc/iso-ebnf.sxp index 24fd9d4..ebe6127 100644 --- a/etc/iso-ebnf.sxp +++ b/etc/iso-ebnf.sxp @@ -1,53 +1,36 @@ ( - (pass _pass (alt (plus gap_separator) comment)) - (terminal comment (seq start_comment_symbol (star comment_symbol) end_comment_symbol)) - (terminal comment_symbol (alt comment terminal_string special_sequence character)) - (terminal concatenate_symbol (seq ",")) - (terminal decimal_digit (range "0-9")) - (terminal defining_symbol (alt "=" ":")) - (terminal definition_separator_symbol (alt "|" "/" "!")) + (rule syntax (star syntax_rule)) + (rule syntax_rule + (seq meta_identifier defining_symbol definitions_list terminator_symbol)) (rule definitions_list (seq single_definition (star (seq definition_separator_symbol definitions_list)))) - (terminal empty (seq ())) - (terminal end_comment_symbol (seq "*)")) - (terminal end_group_symbol (seq ")")) - (terminal end_option_symbol (alt "]" "/)")) - (terminal end_repeat_symbol (alt "}" ":)")) - (terminal except_symbol (seq "-")) + (rule single_definition (seq term (star (seq "," term)))) + (rule term (seq factor (opt (seq "-" exception)))) (rule exception (seq factor)) (rule factor (seq (opt (seq integer "*")) primary)) - (terminal first_quote_symbol (seq "'")) - (terminal first_terminal_character (diff terminal_character "'")) - (terminal gap_free_symbol (alt (diff terminal_character (range "'\"")) terminal_string)) - (terminal gap_separator (range "#x9#xa#xb#xc#xd#x20")) - (rule grouped_sequence (seq "(" definitions_list ")")) - (terminal integer (plus decimal_digit)) - (terminal letter (alt (range "a-z") (range "A-Z"))) - (terminal meta_identifier (seq letter (star meta_identifier_character))) - (terminal meta_identifier_character (alt letter decimal_digit "_")) - (rule optional_sequence - (seq start_option_symbol definitions_list end_option_symbol)) - (terminal other_character (alt (range ":+_%@&$<>^` ̃#x20#x23") "\\")) (rule primary (alt optional_sequence repeated_sequence special_sequence grouped_sequence meta_identifier terminal_string empty )) + (rule optional_sequence + (seq start_option_symbol definitions_list end_option_symbol)) (rule repeated_sequence (seq start_repeat_symbol definitions_list end_repeat_symbol)) - (terminal repetition_symbol (seq "*")) - (terminal second_quote_symbol (seq "\"")) - (terminal second_terminal_character (diff terminal_character "\"")) - (rule single_definition (seq term (star (seq "," term)))) + (rule grouped_sequence (seq "(" definitions_list ")")) + (terminal terminal_string + (alt + (seq "'" (plus first_terminal_character) "'") + (seq "\"" (plus second_terminal_character) "\"")) ) + (terminal meta_identifier (seq letter (star meta_identifier_character))) + (terminal integer (plus decimal_digit)) (terminal special_sequence (seq "?" (star special_sequence_character) "?")) + (terminal comment (seq start_comment_symbol (star comment_symbol) end_comment_symbol)) + (terminal comment_symbol (alt comment terminal_string special_sequence character)) + (terminal letter (range "a-zA-Z")) + (terminal decimal_digit (range "0-9")) + (terminal meta_identifier_character (alt letter decimal_digit "_")) + (terminal first_terminal_character (diff terminal_character "'")) + (terminal second_terminal_character (diff terminal_character "\"")) (terminal special_sequence_character (diff terminal_character "?")) - (terminal special_sequence_symbol (seq "?")) - (terminal start_comment_symbol (seq "(*")) - (terminal start_group_symbol (seq "(")) - (terminal start_option_symbol (alt "[" "(/")) - (terminal start_repeat_symbol (alt "{" "(:")) - (rule syntax (star syntax_rule)) - (rule syntax_rule - (seq meta_identifier defining_symbol definitions_list terminator_symbol)) - (rule term (seq factor (opt (seq "-" exception)))) (terminal terminal_character (alt letter decimal_digit concatenate_symbol defining_symbol definition_separator_symbol end_comment_symbol end_group_symbol @@ -55,8 +38,25 @@ repetition_symbol second_quote_symbol special_sequence_symbol start_comment_symbol start_group_symbol start_option_symbol start_repeat_symbol terminator_symbol other_character )) - (terminal terminal_string - (alt - (seq "'" (plus first_terminal_character) "'") - (seq "\"" (plus second_terminal_character) "\"")) ) - (terminal terminator_symbol (alt ";" "."))) + (terminal other_character (alt (range ":+_%@&$<>^` ̃#x20#x23") "\\")) + (terminal gap_separator (range "#x9#xa#xb#xc#xd#x20")) + (pass _pass (alt (plus gap_separator) comment)) + (terminal empty (seq "")) + (terminal defining_symbol (alt "=" ":")) + (terminal definition_separator_symbol (alt "|" "/" "!")) + (terminal terminator_symbol (alt ";" ".")) + (terminal start_option_symbol (alt "[" "(/")) + (terminal end_option_symbol (alt "]" "/)")) + (terminal start_repeat_symbol (alt "{" "(:")) + (terminal end_repeat_symbol (alt "}" ":)")) + (terminal gap_free_symbol (alt (diff terminal_character (range "'\"")) terminal_string)) + (terminal repetition_symbol (seq "*")) + (terminal except_symbol (seq "-")) + (terminal concatenate_symbol (seq ",")) + (terminal first_quote_symbol (seq "'")) + (terminal second_quote_symbol (seq "\"")) + (terminal start_comment_symbol (seq "(*")) + (terminal end_comment_symbol (seq "*)")) + (terminal start_group_symbol (seq "(")) + (terminal end_group_symbol (seq ")")) + (terminal special_sequence_symbol (seq "?"))) diff --git a/lib/ebnf/writer.rb b/lib/ebnf/writer.rb index 30c9523..9a85909 100644 --- a/lib/ebnf/writer.rb +++ b/lib/ebnf/writer.rb @@ -1,6 +1,7 @@ # -*- encoding: utf-8 -*- require 'rdf' require 'strscan' unless defined?(StringScanner) +require "ostruct" ## # Serialize ruleset back to EBNF @@ -8,6 +9,43 @@ module EBNF class Writer LINE_LENGTH = 80 + # ASCII escape names + ASCII_ESCAPE_NAMES = [ + "null", #x00 + "start of heading", #x01 + "start of text", #x02 + "end of text", #x03 + "end of transmission", #x04 + "enquiry", #x05 + "acknowledge", #x06 + "bell", #x07 + "backspace", #x08 + "horizontal tab", #x09 + "new line", #x0A + "vertical tab", #x0B + "form feed", #x0C + "carriage return", #x0D + "shift out", #x0E + "shift in", #x0F + "data link escape", #x10 + "device control 1", #x11 + "device control 2", #x12 + "device control 3", #x13 + "device control 4", #x14 + "negative acknowledge", #x15 + "synchronous idle", #x16 + "end of trans. block", #x17 + "cancel", #x18 + "end of medium", #x19 + "substitute", #x1A + "escape", #x1B + "file separator", #x1C + "group separator", #x1D + "record separator", #x1E + "unit separator", #x1F + "space" #x20 + ].freeze + ## # Format rules to a String # @@ -63,7 +101,7 @@ def self.html(*rules, format: :ebnf) # @option options [Symbol] format # @option options [Boolean] html (false) def initialize(rules, out: $stdout, html: false, format: :ebnf, **options) - @options = options.dup + @options = options.merge(html: html) return if rules.empty? # Determine max LHS length @@ -81,15 +119,17 @@ def initialize(rules, out: $stdout, html: false, format: :ebnf, **options) if html # Output as formatted HTML begin - require 'haml' - hout = Haml::Engine.new(HAML_DESC).render(self, rules: rules, format: format) do |rule| + require 'erubis' + eruby = Erubis::Eruby.new(ERB_DESC) + formatted_rules = rules.map do |rule| formatted_expr = self.send(format_meth, rule.expr) formatted_expr.length > rhs_length ? self.send(format_meth, rule.expr, sep: "\n") : formatted_expr + OpenStruct.new(id: rule.id, sym: rule.sym, pass: rule.pass?, formatted: formatted_expr) end - out.write hout + out.write eruby.evaluate(format: format, rules: formatted_rules) return rescue LoadError - $stderr.puts "Generating HTML requires haml gem to be loaded" + $stderr.puts "Generating HTML requires erubis gem to be loaded" end end @@ -124,7 +164,7 @@ def format_ebnf(expr, sep: nil, embedded: false) if expr.length == 1 return format_ebnf_char(expr) elsif expr =~ /\A#x\h+/ - return (@options[:html] ? %(#{expr}) : expr) + return format_ebnf_hex(expr[2..-1].hex.chr) elsif expr =~ /"/ return (@options[:html] ? %('#{escape_ebnf(expr, "'")}') : %('#{escape_ebnf(expr, "'")}')) else @@ -199,9 +239,11 @@ def format_ebnf(expr, sep: nil, embedded: false) # Format a single-character string, prefering hex for non-main ASCII def format_ebnf_char(c) case c.ord - when 0x22 then (@options[:html] ? %('"') : %{'"'}) - when (0x23..0x7e) then (@options[:html] ? %("#{c}") : %{"#{c}"}) - else (@options[:html] ? %(#{escape_ebnf_hex(c)}) : escape_ebnf_hex(c)) + when (0x21) then (@options[:html] ? %("#{c}") : %{"#{c}"}) + when 0x22 then (@options[:html] ? %('"') : %{'"'}) + when (0x23..0x7e) then (@options[:html] ? %("#{c}") : %{"#{c}"}) + when (0x80..0xFFFD) then (@options[:html] ? %("#{c}") : %{"#{c}"}) + else escape_ebnf_hex(c) end end @@ -218,11 +260,11 @@ def format_ebnf_range(string) when s.scan(/\A[!"\u0024-\u007e]+/) buffer << (@options[:html] ? %(#{s.matched}) : s.matched) when s.scan(/\A#x\h+/) - buffer << (@options[:html] ? %(#{s.matched}) : s.matched) + buffer << escape_ebnf_hex(s.matched[2..-1].hex.chr(Encoding::UTF_8)) when s.scan(/\A-/) buffer << dash else - buffer << (@options[:html] ? %(#{escape_ebnf_hex(s.getch)}) : escape_ebnf_hex(s.getch)) + buffer << escape_ebnf_hex(s.getch) end end buffer + rbrac @@ -243,11 +285,22 @@ def escape_ebnf(string, quote = '"') def escape_ebnf_hex(u) fmt = case u.ord + when 0x00..0x20 then "#x%02X" when 0x0000..0x00ff then "#x%02X" when 0x0100..0xffff then "#x%04X" else "#x%08X" end - sprintf(fmt, u.ord) + char = fmt % u.ord + if @options[:html] + if u.ord <= 0x20 + char = %(#{char}) + elsif u.ord == 0x7F + char = %(#{char}) + end + %(#{char}) + else + char + end end ## @@ -337,9 +390,9 @@ def format_abnf(expr, sep: nil, embedded: false, sensitive: true) # Format a single-character string, prefering hex for non-main ASCII def format_abnf_char(c) if /[\x20-\x21\x23-\x7E]/.match?(c) - return c.inspect + c.inspect else - (@options[:html] ? %(#{escape_abnf_hex(c)}) : escape_abnf_hex(c)) + escape_abnf_hex(c) end end @@ -373,11 +426,23 @@ def format_abnf_range(string) def escape_abnf_hex(u) fmt = case u.ord - when 0x0000..0x00ff then "%02X" - when 0x0100..0xffff then "%04X" - else "%08X" + when 0x0000..0x00ff then "#x%02X" + when 0x0100..0xffff then "#x%04X" + else "#x%08X" + end + char = "%x" + (fmt % u.ord) + if @options[:html] + if u.ord <= 0x20 + char = %(#{char}) + elsif u.ord == 0x7F + char = %(#{char}) + else + char = %(#{char}) + end + %(#{char}) + else + char end - "%x" + (fmt % u.ord) end ## @@ -525,25 +590,25 @@ def format_isoebnf_range(string) end end - HAML_DESC = %q( - %table.grammar - %tbody#grammar-productions - - rules.each do |rule| - %tr{id: "grammar-production-#{rule.sym}"} - - if rule.pass? - %td{colspan: (format == :ebnf && rule.id ? 4 : 3)} - %code<="@pass" - - else - - if format == :ebnf && rule.id - %td<= "[#{rule.id}]" - %td< - %code<= rule.sym - - if format == :ebnf - %td<= "::=" - - else - %td<= "=" - %td - != yield rule + ERB_DESC = %q( + + + <% for rule in @rules %> + + <% if rule.pass %> + + <% else %> + <% if rule.id %> + + <% end %> + + + <% end %> + + + <% end %> + +
@pass[<%==rule.id%>]<%== rule.sym %><%= @format == :ebnf ? '::=' : '='%><%= rule.formatted %>
).gsub(/^ /, '') end end diff --git a/spec/writer_spec.rb b/spec/writer_spec.rb index bf7ec5b..d216a34 100644 --- a/spec/writer_spec.rb +++ b/spec/writer_spec.rb @@ -101,13 +101,12 @@ '//tbody/tr/td[1]/text()': "[2]", '//tbody/tr/td[2]/code/text()': "Prolog", '//tbody/tr/td[3]/text()': "::=", - '//tbody/tr/td[4]/text()': /BaseDecl\? PrefixDecl\*/, + #'//tbody/tr/td[4]/*/text()': /BaseDecl\? PrefixDecl\*/, } ], }.each do |title, (grammar, xpaths)| context title do subject {EBNF::Writer.html(*EBNF::Base.new(grammar).ast)} - xpaths.each do |path, value| specify {is_expected.to have_xpath(path, value)} end @@ -115,92 +114,100 @@ end end - describe "#format_ebnf" do - subject {EBNF::Writer.new([])} + context "EBNF" do + describe "#format_ebnf" do + subject {EBNF::Writer.new([])} - { - "alt": [ - [:alt, :A, :B], - "A | B" - ], - "enum": [ - [:range, "abc-"], - "[abc-]" - ], + { + "alt": [ + [:alt, :A, :B], + "A | B" + ], + "enum": [ + [:range, "abc-"], + "[abc-]" + ], "hex": [ - [:hex, "#x20"], - "#x20" - ], - "istr": [ - [:istr, "foo"], - %("foo") - ], - "opt": [ - [:opt, :A], - "A?" - ], - "plus": [ - [:plus, :A], - "A+" - ], - "range": [ - [:range, "a-z"], - "[a-z]" - ], - "rept 0 1": [ - [:rept, 0, 1, :A], - "A?" - ], - "rept 0 *": [ - [:rept, 0, '*', :A], - "A*" - ], - "rept 1 1": [ - [:rept, 1, 1, :A], - "A" - ], - "rept 1 *": [ - [:rept, 1, '*', :A], - "A+" - ], - "rept 1 2": [ - [:rept, 1, 2, :A], - "A A?" - ], - "rept 1 3": [ - [:rept, 1, 3, :A], - "A (A A?)?" - ], - "rept 1 3 (A B)": [ - [:rept, 1, 3, [:seq, :A, :B]], - "(A B) ((A B) (A B)?)?" - ], - "rept 1 3 (A | B)": [ - [:rept, 1, 3, [:alt, :A, :B]], - "(A | B) ((A | B) (A | B)?)?" - ], - "star": [ - [:star, :A], - "A*" - ], - }.each do |title, (expr, result)| - it title do - expect(subject.send(:format_ebnf, expr)).to eql result + [:hex, "#x20"], + "#x20" + ], + "istr": [ + [:istr, "foo"], + %("foo") + ], + "opt": [ + [:opt, :A], + "A?" + ], + "plus": [ + [:plus, :A], + "A+" + ], + "range": [ + [:range, "a-z"], + "[a-z]" + ], + "rept 0 1": [ + [:rept, 0, 1, :A], + "A?" + ], + "rept 0 *": [ + [:rept, 0, '*', :A], + "A*" + ], + "rept 1 1": [ + [:rept, 1, 1, :A], + "A" + ], + "rept 1 *": [ + [:rept, 1, '*', :A], + "A+" + ], + "rept 1 2": [ + [:rept, 1, 2, :A], + "A A?" + ], + "rept 1 3": [ + [:rept, 1, 3, :A], + "A (A A?)?" + ], + "rept 1 3 (A B)": [ + [:rept, 1, 3, [:seq, :A, :B]], + "(A B) ((A B) (A B)?)?" + ], + "rept 1 3 (A | B)": [ + [:rept, 1, 3, [:alt, :A, :B]], + "(A | B) ((A | B) (A | B)?)?" + ], + "star": [ + [:star, :A], + "A*" + ], + "n3 path": [ + [:seq, :pathItem, [:alt, [:seq, "!", :path], [:seq, "^", :path]]], + %{pathItem (("!" path) | ("^" path))} + ], + }.each do |title, (expr, result)| + it title do + expect(subject.send(:format_ebnf, expr)).to eql result + end end end - end - context "Existing grammars" do - { - "EBNF Grammar" => File.expand_path("../../etc/ebnf.ebnf", __FILE__), - "Turtle Grammar" => File.expand_path("../../etc/turtle.ebnf", __FILE__) - }.each do |name, file| - context name do - it "outputs grammar as text" do - expect {EBNF.parse(File.read(file)).to_s}.to_not raise_error - end - it "outputs grammar as html" do - expect {EBNF.parse(File.read(file)).to_html}.to_not raise_error + context "Existing grammars" do + { + "ABNF Grammar" => File.expand_path("../../etc/abnf.ebnf", __FILE__), + "EBNF Grammar" => File.expand_path("../../etc/ebnf.ebnf", __FILE__), + "ISO EBNF Grammar" => File.expand_path("../../etc/iso-ebnf.ebnf", __FILE__), + "Turtle Grammar" => File.expand_path("../../etc/turtle.ebnf", __FILE__), + }.each do |name, file| + context name do + it "outputs grammar as text" do + expect {EBNF.parse(File.read(file)).to_s}.to_not raise_error + end + it "outputs grammar as html" do + expect {EBNF.parse(File.read(file)).to_html}.to_not raise_error + end end end end From e18acef541858c4e98681959f2d410c3b6b5c56e Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Wed, 8 Jul 2020 16:19:14 -0700 Subject: [PATCH 32/50] Remember `@terminals` as a rule, and reuse when writing out the grammar. --- etc/ebnf.html | 4 ++++ examples/ebnf-ll1-parser/parser.rb | 6 +++--- examples/ebnf-peg-parser/parser.rb | 6 +++--- lib/ebnf/parser.rb | 6 +++--- lib/ebnf/rule.rb | 15 ++++++++------- lib/ebnf/writer.rb | 22 ++++++++++++++++++---- spec/ll1/data/parser.rb | 6 +++--- spec/peg/data/parser.rb | 6 +++--- 8 files changed, 45 insertions(+), 26 deletions(-) diff --git a/etc/ebnf.html b/etc/ebnf.html index 68faf9a..82a1cdb 100644 --- a/etc/ebnf.html +++ b/etc/ebnf.html @@ -61,6 +61,10 @@ ::= "@pass" expression + + @terminals + Productions for terminals + [11] LHS diff --git a/examples/ebnf-ll1-parser/parser.rb b/examples/ebnf-ll1-parser/parser.rb index f2c6ce5..2176c6c 100644 --- a/examples/ebnf-ll1-parser/parser.rb +++ b/examples/ebnf-ll1-parser/parser.rb @@ -150,7 +150,7 @@ def inspect # data contains a declaration. # Invoke callback if data[:terminal] - callback.call(:terminal, data[:terminal]) + callback.call(:terminals, data[:terminal]) elsif data[:pass] callback.call(:pass, data[:pass]) end @@ -307,11 +307,11 @@ def initialize(input, **options, &block) **options ) do |context, *data| rule = case context - when :terminal + when :terminals # After parsing `@terminals` # This changes the state of the parser to treat subsequent rules as terminals. parsing_terminals = true - next + rule = EBNF::Rule.new(nil, nil, data.first, kind: :terminals) when :pass # After parsing `@pass` # This defines a specific rule for whitespace. diff --git a/examples/ebnf-peg-parser/parser.rb b/examples/ebnf-peg-parser/parser.rb index 52c2b90..ce75ff8 100644 --- a/examples/ebnf-peg-parser/parser.rb +++ b/examples/ebnf-peg-parser/parser.rb @@ -127,7 +127,7 @@ class EBNFPegParser production(:declaration, clear_packrat: true) do |value, data, callback| # value contains a declaration. # Invoke callback - callback.call(:terminal) if value == '@terminals' + callback.call(:terminals) if value == '@terminals' nil end @@ -303,11 +303,11 @@ def initialize(input, **options, &block) **options ) do |context, *data| rule = case context - when :terminal + when :terminals # After parsing `@terminals` # This changes the state of the parser to treat subsequent rules as terminals. parsing_terminals = true - next + rule = EBNF::Rule.new(nil, nil, data.first, kind: :terminals) when :pass # After parsing `@pass` # This defines a specific rule for whitespace. diff --git a/lib/ebnf/parser.rb b/lib/ebnf/parser.rb index 62d4460..78e5b31 100644 --- a/lib/ebnf/parser.rb +++ b/lib/ebnf/parser.rb @@ -121,7 +121,7 @@ class Parser production(:declaration, clear_packrat: true) do |value, data, callback| # value contains a declaration. # Invoke callback - callback.call(:terminal) if value == '@terminals' + callback.call(:terminals) if value == '@terminals' nil end @@ -297,11 +297,11 @@ def initialize(input, **options, &block) **options ) do |context, *data| rule = case context - when :terminal + when :terminals # After parsing `@terminals` # This changes the state of the parser to treat subsequent rules as terminals. parsing_terminals = true - next + rule = EBNF::Rule.new(nil, nil, data.first, kind: :terminals) when :pass # After parsing `@pass` # This defines a specific rule for whitespace. diff --git a/lib/ebnf/rule.rb b/lib/ebnf/rule.rb index 3b8c37a..efa35dd 100644 --- a/lib/ebnf/rule.rb +++ b/lib/ebnf/rule.rb @@ -44,7 +44,7 @@ class Rule # Kind of rule # - # @return [:rule, :terminal, or :pass] + # @return [:rule, :terminal, :terminals, or :pass] attr_accessor :kind # Rule expression @@ -76,7 +76,7 @@ class Rule attr_accessor :cleanup # @param [Symbol, nil] sym - # `nil` is allowed only for @pass + # `nil` is allowed only for @pass or @terminals # @param [Integer, nil] id # @param [Array] expr # The expression is an internal-representation of an S-Expression with one of the following oparators: @@ -91,7 +91,7 @@ class Rule # * `rept m n` – A sequence of at lest `m` and at most `n` of the matching rule. It will always return an array. # * `seq` – A sequence of rules or terminals. If any (other than `opt` or `star`) to not parse, the rule is terminated as unmatched. # * `star` – A sequence of zero or more of the matching rule. It will always return an array. - # @param [:rule, :terminal, :pass, ] kind (nil) + # @param [:rule, :terminal, :terminals, :pass] kind (nil) # @param [String] ebnf (nil) # When parsing, records the EBNF string used to create the rule. # @param [Array] first (nil) @@ -106,7 +106,7 @@ class Rule # Records information useful for cleaning up converted :plus, and :star expansions (LL(1)). def initialize(sym, id, expr, kind: nil, ebnf: nil, first: nil, follow: nil, start: nil, top_rule: nil, cleanup: nil) @sym, @id = sym, id - @expr = expr.is_a?(Array) ? expr : [:seq, expr] + @expr = expr.is_a?(Array) ? expr : [:seq, expr].compact @ebnf, @kind, @first, @follow, @start, @cleanup, @top_rule = ebnf, kind, first, follow, start, cleanup, top_rule @top_rule ||= self @kind ||= case @@ -115,13 +115,14 @@ def initialize(sym, id, expr, kind: nil, ebnf: nil, first: nil, follow: nil, sta else :rule end - # Allow @pass to not be named + # Allow @pass and @terminals to not be named @sym ||= :_pass if @kind == :pass + @sym ||= :_terminals if @kind == :terminals raise ArgumentError, "Rule sym must be a symbol, was #{@sym.inspect}" unless @sym.is_a?(Symbol) raise ArgumentError, "Rule id must be a string or nil, was #{@id.inspect}" unless (@id || "").is_a?(String) - raise ArgumentError, "Rule kind must be one of :rule, :terminal, or :pass, was #{@kind.inspect}" unless - @kind.is_a?(Symbol) && %w(rule terminal pass).map(&:to_sym).include?(@kind) + raise ArgumentError, "Rule kind must be one of :rule, :terminal, :terminals, or :pass, was #{@kind.inspect}" unless + @kind.is_a?(Symbol) && %w(rule terminal terminals pass).map(&:to_sym).include?(@kind) case @expr.first when :alt diff --git a/lib/ebnf/writer.rb b/lib/ebnf/writer.rb index 9a85909..4266c8e 100644 --- a/lib/ebnf/writer.rb +++ b/lib/ebnf/writer.rb @@ -122,9 +122,18 @@ def initialize(rules, out: $stdout, html: false, format: :ebnf, **options) require 'erubis' eruby = Erubis::Eruby.new(ERB_DESC) formatted_rules = rules.map do |rule| - formatted_expr = self.send(format_meth, rule.expr) - formatted_expr.length > rhs_length ? self.send(format_meth, rule.expr, sep: "\n") : formatted_expr - OpenStruct.new(id: rule.id, sym: rule.sym, pass: rule.pass?, formatted: formatted_expr) + if rule.kind == :terminals + formatted_expr = "Productions for terminals" + formatted_expr.length > rhs_length ? self.send(format_meth, rule.expr, sep: "\n") : formatted_expr + else + formatted_expr = self.send(format_meth, rule.expr) + formatted_expr.length > rhs_length ? self.send(format_meth, rule.expr, sep: "\n") : formatted_expr + end + OpenStruct.new(id: rule.id, + sym: rule.sym, + pass: rule.pass?, + terminals: (rule.kind == :terminals), + formatted: formatted_expr) end out.write eruby.evaluate(format: format, rules: formatted_rules) return @@ -136,7 +145,9 @@ def initialize(rules, out: $stdout, html: false, format: :ebnf, **options) # Format each rule, considering the available rhs size rules.each do |rule| buffer = if rule.pass? - "%-#{lhs_length-2}s" % "@pass" + "\n%-#{lhs_length-2}s" % "@pass" + elsif rule.kind == :terminals + "\n%-#{lhs_length-2}s" % "@terminals" else lhs_fmt % {id: "[#{rule.id}]", sym: rule.sym} end @@ -147,6 +158,7 @@ def initialize(rules, out: $stdout, html: false, format: :ebnf, **options) else buffer << formatted_expr end + buffer << "\n\n" if rule.kind == :terminals out.puts(buffer) end end @@ -597,6 +609,8 @@ def format_isoebnf_range(string) <% if rule.pass %> @pass + <% elsif rule.terminals %> + @terminals <% else %> <% if rule.id %> [<%==rule.id%>] diff --git a/spec/ll1/data/parser.rb b/spec/ll1/data/parser.rb index 954eccb..29e166d 100644 --- a/spec/ll1/data/parser.rb +++ b/spec/ll1/data/parser.rb @@ -69,7 +69,7 @@ class EBNFParser production(:declaration) do |input, current, callback| # current contains a declaration. # Invoke callback - callback.call(:terminal) if current[:terminal] == '@terminals' + callback.call(:terminals) if current[:terminal] == '@terminals' end production(:rule) do |input, current, callback| @@ -210,9 +210,9 @@ def initialize(input, **options, &block) **options ) do |context, *data| rule = case context - when :terminal + when :terminals parsing_terminals = true - next + rule = EBNF::Rule.new(nil, nil, data.first, kind: :terminals) when :pass rule = EBNF::Rule.new(nil, nil, data.first, kind: :pass) when :rule diff --git a/spec/peg/data/parser.rb b/spec/peg/data/parser.rb index 0cb8a99..687cdb8 100644 --- a/spec/peg/data/parser.rb +++ b/spec/peg/data/parser.rb @@ -60,7 +60,7 @@ class EBNFPegParser production(:declaration, clear_packrat: true) do |value, data, callback| # current contains a declaration. # Invoke callback - callback.call(:terminal) if value == '@terminals' + callback.call(:terminals) if value == '@terminals' end start_production(:rule, as_hash: true) @@ -151,9 +151,9 @@ def initialize(input, **options, &block) **options ) do |context, *data| rule = case context - when :terminal + when :terminals parsing_terminals = true - next + rule = EBNF::Rule.new(nil, nil, data.first, kind: :terminals) when :pass rule = EBNF::Rule.new(nil, nil, data.first, kind: :pass) when :rule From c615e79b845c4d0c5456f350f20471034362eebf Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Wed, 8 Jul 2020 17:05:04 -0700 Subject: [PATCH 33/50] Better support for annotating hex escapes. --- lib/ebnf/writer.rb | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/lib/ebnf/writer.rb b/lib/ebnf/writer.rb index 4266c8e..c217ef1 100644 --- a/lib/ebnf/writer.rb +++ b/lib/ebnf/writer.rb @@ -44,7 +44,7 @@ class Writer "record separator", #x1E "unit separator", #x1F "space" #x20 - ].freeze + ] ## # Format rules to a String @@ -178,9 +178,9 @@ def format_ebnf(expr, sep: nil, embedded: false) elsif expr =~ /\A#x\h+/ return format_ebnf_hex(expr[2..-1].hex.chr) elsif expr =~ /"/ - return (@options[:html] ? %('#{escape_ebnf(expr, "'")}') : %('#{escape_ebnf(expr, "'")}')) + return escape_ebnf(expr, "'") else - return (@options[:html] ? %("#{escape_ebnf(expr, '"')}") : %("#{escape_ebnf(expr, '"')}")) + return escape_ebnf(expr, '"') end end parts = { @@ -206,7 +206,7 @@ def format_ebnf(expr, sep: nil, embedded: false) r = format_ebnf(expr[1], embedded: true) "#{r}#{char}" when :hex - (@options[:html] ? %(#{expr.last}) : expr.last) + escape_ebnf_hex(expr.last[2..-1].hex.chr(Encoding::UTF_8)) when :range format_ebnf_range(expr.last) when :seq @@ -287,12 +287,17 @@ def escape_ebnf(string, quote = '"') buffer = "" string.each_char do |c| buffer << case (u = c.ord) - when (0x00..0x1f) then "#x%02X" % u - when quote.ord then "#x%02X" % u + when 0x00..0x20 then escape_ebnf_hex(c) + when quote.ord then escape_ebnf_hex(c) + when 0x7F then escape_ebnf_hex(c) else c end end - buffer + if @options[:html] + %('#{buffer}') + else + buffer + end end def escape_ebnf_hex(u) @@ -306,8 +311,14 @@ def escape_ebnf_hex(u) if @options[:html] if u.ord <= 0x20 char = %(#{char}) + elsif u.ord <= 0x7F + char = %(#{char}) elsif u.ord == 0x7F char = %(#{char}) + elsif u.ord <= 0xFF + char = %(#{char}) + else + char = %(#{char}) end %(#{char}) else @@ -371,8 +382,7 @@ def format_abnf(expr, sep: nil, embedded: false, sensitive: true) r = format_abnf(expr[1], embedded: true) "#{char}#{r}" when :hex - hex = expr.last.sub('#', '%') - (@options[:html] ? %(#{hex}) : hex) + escape_abnf_hex(expr.last[2..-1].hex.chr) when :range format_abnf_range(expr.last) when :seq @@ -446,10 +456,14 @@ def escape_abnf_hex(u) if @options[:html] if u.ord <= 0x20 char = %(#{char}) + elsif u.ord <= 0x7F + char = %(#{char}) elsif u.ord == 0x7F char = %(#{char}) + elsif u.ord <= 0xFF + char = %(#{char}) else - char = %(#{char}) + char = %(#{char}) end %(#{char}) else From c81d38124670cd94487ee10856813fac7f18225c Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Wed, 8 Jul 2020 17:17:50 -0700 Subject: [PATCH 34/50] Fix string quoting in writer. --- etc/abnf.sxp | 1 + etc/ebnf.html | 16 ++++++++-------- etc/ebnf.ll1.sxp | 1 + etc/ebnf.peg.rb | 1 + etc/ebnf.peg.sxp | 1 + etc/ebnf.sxp | 1 + etc/iso-ebnf.sxp | 1 + etc/sparql.sxp | 1 + etc/turtle.sxp | 1 + lib/ebnf/writer.rb | 4 ++-- spec/base_spec.rb | 3 ++- spec/ebnf_spec.rb | 3 ++- spec/writer_spec.rb | 2 +- 13 files changed, 23 insertions(+), 13 deletions(-) diff --git a/etc/abnf.sxp b/etc/abnf.sxp index 30c0aa7..df0b99b 100644 --- a/etc/abnf.sxp +++ b/etc/abnf.sxp @@ -14,6 +14,7 @@ (rule case_insensitive_string (seq (opt "%i") quoted_string)) (rule case_sensitive_string (seq "%s" quoted_string)) (rule num_val (seq "%" (alt bin_val dec_val hex_val))) + (terminals _terminals (seq)) (terminal rulename (seq ALPHA (star (alt ALPHA DIGIT "-")))) (terminal defined_as (seq (star c_wsp) (alt "=" "=/") (star c_wsp))) (terminal c_wsp (alt WSP (seq c_nl WSP))) diff --git a/etc/ebnf.html b/etc/ebnf.html index 82a1cdb..5e6c454 100644 --- a/etc/ebnf.html +++ b/etc/ebnf.html @@ -11,7 +11,7 @@ [2] declaration ::= - "@terminals" | pass + '@terminals' | pass [3] @@ -59,7 +59,7 @@ [10] pass ::= - "@pass" expression + '@pass' expression @terminals @@ -69,7 +69,7 @@ [11] LHS ::= - ( "[" SYMBOL "]" #x20+ ) ? SYMBOL #x20* "::=" + ( "[" SYMBOL "]" #x20+ ) ? SYMBOL #x20* '::=' [12] @@ -81,7 +81,7 @@ [13] HEX ::= - "#x" ( [ a-f] | [ A-F] | [ 0-9] ) + + '#x' ( [ a-f] | [ A-F] | [ 0-9] ) + [14] @@ -93,7 +93,7 @@ [15] O_ENUM ::= - "[^" ( R_CHAR+ | HEX+ ) "-"? "]" + '[^' ( R_CHAR+ | HEX+ ) "-"? "]" [16] @@ -105,7 +105,7 @@ [17] O_RANGE ::= - "[^" ( ( R_CHAR "-" R_CHAR) | ( HEX "-" HEX) ) + "]" + '[^' ( ( R_CHAR "-" R_CHAR) | ( HEX "-" HEX) ) + "]" [18] @@ -123,7 +123,7 @@ [20] CHAR ::= - [ #x09#x0A#x0D] | [ #x20-#xD7FF] | [ #xE000-#xFFFD] | [ #x00010000-#x0010FFFF] + [ #x09#x0A#x0D] | [ #x20-#xD7FF] | [ #xE000-#xFFFD] | [ #x00010000-#x0010FFFF] [21] @@ -141,7 +141,7 @@ [23] PASS ::= - ( [ #x09#x0A#x0D#x20] | ( ( ( "#" - "#x") | "//") [ ^#x0A#x0D] * ) | ( "/*" ( ( "*" [ ^/] ) ? | [ ^*] ) * "*/") | ( "(*" ( ( "*" [ ^)] ) ? | [ ^*] ) * "*)") ) + + ( [ #x09#x0A#x0D#x20] | ( ( ( "#" - '#x') | '//') [ ^#x0A#x0D] * ) | ( '/*' ( ( "*" [ ^/] ) ? | [ ^*] ) * '*/') | ( '(*' ( ( "*" [ ^)] ) ? | [ ^*] ) * '*)') ) + @pass diff --git a/etc/ebnf.ll1.sxp b/etc/ebnf.ll1.sxp index 008eeb3..7efef66 100644 --- a/etc/ebnf.ll1.sxp +++ b/etc/ebnf.ll1.sxp @@ -98,6 +98,7 @@ (first "@pass") (follow "@pass" "@terminals" LHS _eof) (seq "@pass" expression)) + (terminals _terminals (seq)) (terminal LHS "11" (seq (opt (seq "[" SYMBOL "]" (plus " "))) SYMBOL (star " ") "::=")) (terminal SYMBOL "12" (plus (alt (range "a-z") (range "A-Z") (range "0-9") "_" "."))) (terminal HEX "13" (seq "#x" (plus (alt (range "a-f") (range "A-F") (range "0-9"))))) diff --git a/etc/ebnf.peg.rb b/etc/ebnf.peg.rb index d5785b2..0e10a23 100644 --- a/etc/ebnf.peg.rb +++ b/etc/ebnf.peg.rb @@ -19,6 +19,7 @@ module Meta EBNF::Rule.new(:primary, "9", [:alt, :HEX, :SYMBOL, :ENUM, :O_ENUM, :RANGE, :O_RANGE, :STRING1, :STRING2, :_primary_1]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_primary_1, "9.1", [:seq, "(", :expression, ")"]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:pass, "10", [:seq, "@pass", :expression]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule), EBNF::Rule.new(:LHS, "11", [:seq, :_LHS_1, :SYMBOL, :_LHS_2, "::="], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_LHS_1, "11.1", [:opt, :_LHS_3], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_LHS_3, "11.3", [:seq, "[", :SYMBOL, "]", :_LHS_4], kind: :terminal).extend(EBNF::PEG::Rule), diff --git a/etc/ebnf.peg.sxp b/etc/ebnf.peg.sxp index 87f6147..b9c6eb3 100644 --- a/etc/ebnf.peg.sxp +++ b/etc/ebnf.peg.sxp @@ -17,6 +17,7 @@ (alt HEX SYMBOL ENUM O_ENUM RANGE O_RANGE STRING1 STRING2 _primary_1)) (rule _primary_1 "9.1" (seq "(" expression ")")) (rule pass "10" (seq "@pass" expression)) + (terminals _terminals (seq)) (terminal LHS "11" (seq _LHS_1 SYMBOL _LHS_2 "::=")) (terminal _LHS_1 "11.1" (opt _LHS_3)) (terminal _LHS_3 "11.3" (seq "[" SYMBOL "]" _LHS_4)) diff --git a/etc/ebnf.sxp b/etc/ebnf.sxp index a806ef1..647c9c9 100644 --- a/etc/ebnf.sxp +++ b/etc/ebnf.sxp @@ -10,6 +10,7 @@ (rule primary "9" (alt HEX SYMBOL ENUM O_ENUM RANGE O_RANGE STRING1 STRING2 (seq "(" expression ")"))) (rule pass "10" (seq "@pass" expression)) + (terminals _terminals (seq)) (terminal LHS "11" (seq (opt (seq "[" SYMBOL "]" (plus " "))) SYMBOL (star " ") "::=")) (terminal SYMBOL "12" (plus (alt (range "a-z") (range "A-Z") (range "0-9") "_" "."))) (terminal HEX "13" (seq "#x" (plus (alt (range "a-f") (range "A-F") (range "0-9"))))) diff --git a/etc/iso-ebnf.sxp b/etc/iso-ebnf.sxp index ebe6127..9aff3d5 100644 --- a/etc/iso-ebnf.sxp +++ b/etc/iso-ebnf.sxp @@ -16,6 +16,7 @@ (rule repeated_sequence (seq start_repeat_symbol definitions_list end_repeat_symbol)) (rule grouped_sequence (seq "(" definitions_list ")")) + (terminals _terminals (seq)) (terminal terminal_string (alt (seq "'" (plus first_terminal_character) "'") diff --git a/etc/sparql.sxp b/etc/sparql.sxp index 414d742..9fffee2 100644 --- a/etc/sparql.sxp +++ b/etc/sparql.sxp @@ -282,6 +282,7 @@ (rule iri "136" (alt IRIREF PrefixedName)) (rule PrefixedName "137" (alt PNAME_LN PNAME_NS)) (rule BlankNode "138" (alt BLANK_NODE_LABEL ANON)) + (terminals _terminals (seq)) (terminal IRIREF "139" (seq "<" (star (diff (range "^<>\"{}|^`\\") (range "#x00-#x20"))) ">")) (terminal PNAME_NS "140" (seq (opt PN_PREFIX) ":")) diff --git a/etc/turtle.sxp b/etc/turtle.sxp index d095e2f..720c758 100644 --- a/etc/turtle.sxp +++ b/etc/turtle.sxp @@ -29,6 +29,7 @@ (rule iri "135s" (alt IRIREF PrefixedName)) (rule PrefixedName "136s" (alt PNAME_LN PNAME_NS)) (rule BlankNode "137s" (alt BLANK_NODE_LABEL ANON)) + (terminals _terminals (seq)) (terminal IRIREF "18" (seq "<" (star (alt (diff (range "^<>\"{}|^`\\") (range "#x00-#x20")) UCHAR)) ">")) (terminal PNAME_NS "139s" (seq (opt PN_PREFIX) ":")) diff --git a/lib/ebnf/writer.rb b/lib/ebnf/writer.rb index c217ef1..9d07b59 100644 --- a/lib/ebnf/writer.rb +++ b/lib/ebnf/writer.rb @@ -178,9 +178,9 @@ def format_ebnf(expr, sep: nil, embedded: false) elsif expr =~ /\A#x\h+/ return format_ebnf_hex(expr[2..-1].hex.chr) elsif expr =~ /"/ - return escape_ebnf(expr, "'") + return "'" + escape_ebnf(expr, "'") + "'" else - return escape_ebnf(expr, '"') + return '"' + escape_ebnf(expr, '"') + '"' end end parts = { diff --git a/spec/base_spec.rb b/spec/base_spec.rb index 2e5b6e7..6735ce6 100644 --- a/spec/base_spec.rb +++ b/spec/base_spec.rb @@ -15,7 +15,8 @@ %{ @terminals [3] terminal ::= [A-Z]+ - } => %{((terminal terminal "3" (plus (range "A-Z"))))}, + } => %{((terminals _terminals (seq)) + (terminal terminal "3" (plus (range "A-Z"))))}, %{ [9] primary ::= HEX | RANGE diff --git a/spec/ebnf_spec.rb b/spec/ebnf_spec.rb index e5d1877..bb85579 100644 --- a/spec/ebnf_spec.rb +++ b/spec/ebnf_spec.rb @@ -12,7 +12,8 @@ %{ @terminals [3] terminal ::= [A-Z]+ - } => %{((terminal terminal "3" (plus (range "A-Z"))))}, + } => %{((terminals _terminals (seq)) + (terminal terminal "3" (plus (range "A-Z"))))}, %{ [9] primary ::= HEX | RANGE diff --git a/spec/writer_spec.rb b/spec/writer_spec.rb index d216a34..8635cd6 100644 --- a/spec/writer_spec.rb +++ b/spec/writer_spec.rb @@ -127,7 +127,7 @@ [:range, "abc-"], "[abc-]" ], - "hex": [ + "hex": [ [:hex, "#x20"], "#x20" ], From ebb893f7397f4ee2899ccaf08562592ce5629b82 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Wed, 8 Jul 2020 17:19:57 -0700 Subject: [PATCH 35/50] Redux. --- etc/ebnf.html | 14 +++++++------- lib/ebnf/writer.rb | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/etc/ebnf.html b/etc/ebnf.html index 5e6c454..cee124d 100644 --- a/etc/ebnf.html +++ b/etc/ebnf.html @@ -11,7 +11,7 @@ [2] declaration ::= - '@terminals' | pass + "@terminals" | pass [3] @@ -59,7 +59,7 @@ [10] pass ::= - '@pass' expression + "@pass" expression @terminals @@ -69,7 +69,7 @@ [11] LHS ::= - ( "[" SYMBOL "]" #x20+ ) ? SYMBOL #x20* '::=' + ( "[" SYMBOL "]" #x20+ ) ? SYMBOL #x20* "::=" [12] @@ -81,7 +81,7 @@ [13] HEX ::= - '#x' ( [ a-f] | [ A-F] | [ 0-9] ) + + "#x" ( [ a-f] | [ A-F] | [ 0-9] ) + [14] @@ -93,7 +93,7 @@ [15] O_ENUM ::= - '[^' ( R_CHAR+ | HEX+ ) "-"? "]" + "[^" ( R_CHAR+ | HEX+ ) "-"? "]" [16] @@ -105,7 +105,7 @@ [17] O_RANGE ::= - '[^' ( ( R_CHAR "-" R_CHAR) | ( HEX "-" HEX) ) + "]" + "[^" ( ( R_CHAR "-" R_CHAR) | ( HEX "-" HEX) ) + "]" [18] @@ -141,7 +141,7 @@ [23] PASS ::= - ( [ #x09#x0A#x0D#x20] | ( ( ( "#" - '#x') | '//') [ ^#x0A#x0D] * ) | ( '/*' ( ( "*" [ ^/] ) ? | [ ^*] ) * '*/') | ( '(*' ( ( "*" [ ^)] ) ? | [ ^*] ) * '*)') ) + + ( [ #x09#x0A#x0D#x20] | ( ( ( "#" - "#x") | "//") [ ^#x0A#x0D] * ) | ( "/*" ( ( "*" [ ^/] ) ? | [ ^*] ) * "*/") | ( "(*" ( ( "*" [ ^)] ) ? | [ ^*] ) * "*)") ) + @pass diff --git a/lib/ebnf/writer.rb b/lib/ebnf/writer.rb index 9d07b59..ee6d6b5 100644 --- a/lib/ebnf/writer.rb +++ b/lib/ebnf/writer.rb @@ -294,7 +294,7 @@ def escape_ebnf(string, quote = '"') end end if @options[:html] - %('#{buffer}') + %(#{buffer}) else buffer end From ecac5a90c432eda80a2cbc2225a07ad2b2b976b7 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Fri, 10 Jul 2020 12:35:27 -0700 Subject: [PATCH 36/50] Add missing block to ll1 parser debug. --- lib/ebnf/ll1/parser.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ebnf/ll1/parser.rb b/lib/ebnf/ll1/parser.rb index 234f205..e8f5159 100644 --- a/lib/ebnf/ll1/parser.rb +++ b/lib/ebnf/ll1/parser.rb @@ -576,7 +576,7 @@ def progress(node, *args, &block) # @option options [Integer] :depth # Recursion depth for indenting output # @yieldreturn [String] additional string appended to `message`. - def debug(*args) + def debug(*args, &block) return unless @options[:logger] options = args.last.is_a?(Hash) ? args.pop : {} lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno)) From 254aa398be6527c76a42fbdc8ab9b7f75e9cc95c Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Fri, 10 Jul 2020 16:31:33 -0700 Subject: [PATCH 37/50] Writer improvements. Build lib/ebnf/ebnf/meta.rb by copying from /etc. --- Rakefile | 29 +- etc/ebnf.ebnf | 9 +- etc/ebnf.html | 380 +++++++++++++-------- etc/ebnf.ll1.sxp | 11 +- etc/ebnf.peg.rb | 43 ++- etc/ebnf.peg.sxp | 43 ++- etc/ebnf.sxp | 11 +- etc/iso-ebnf.isoebnf | 4 +- examples/isoebnf/examples/iso-ebnf.isoebnf | 4 +- lib/ebnf/isoebnf/meta.rb | 5 +- lib/ebnf/writer.rb | 103 ++++-- 11 files changed, 385 insertions(+), 257 deletions(-) diff --git a/Rakefile b/Rakefile index fdf3050..5ee4416 100755 --- a/Rakefile +++ b/Rakefile @@ -56,21 +56,26 @@ namespace :etc do task build: ETC_FILES end -desc "Build meta files for ISO EBNF and ABNF" -task :meta => %w{lib/ebnf/abnf/meta.rb lib/ebnf/abnf/core.rb lib/ebnf/isoebnf/meta.rb} do - file "lib/ebnf/abnf/meta.rb" => "etc/abnf.ebnf" do - %x(bin/ebnf --peg -f rb --mod-name ABNFMeta -o lib/ebnf/abnf/meta.rb etc/abnf.ebnf) - end +desc "Build meta files for ABNF, EBNF and ISO EBNF" +task :meta => %w{lib/ebnf/ebnf/meta.rb lib/ebnf/isoebnf/meta.rb lib/ebnf/abnf/meta.rb lib/ebnf/abnf/core.rb} - file "lib/ebnf/abnf/core.rb" => "etc/abnf-core.ebnf" do - %x(bin/ebnf --peg -f rb --mod-name ABNFCore -o lib/ebnf/abnf/core.rb etc/abnf-core.ebnf) - end +file "lib/ebnf/abnf/meta.rb" => "etc/abnf.ebnf" do + %x(bin/ebnf --peg -f rb --mod-name ABNFMeta -o lib/ebnf/abnf/meta.rb etc/abnf.ebnf) +end - file "lib/ebnf/isoebnf/meta.rb" => "etc/iso-ebnf.ebnf" do - %x(bin/ebnf --peg -f rb --mod-name ISOEBNFMeta -o lib/ebnf/isoebnf/meta.rb etc/iso-ebnf.ebnf) - end +file "lib/ebnf/abnf/core.rb" => "etc/abnf-core.ebnf" do + %x(bin/ebnf --peg -f rb --mod-name ABNFCore -o lib/ebnf/abnf/core.rb etc/abnf-core.ebnf) end +file "lib/ebnf/ebnf/meta.rb" => "etc/ebnf.peg.rb" do + %x(cp etc/ebnf.peg.rb lib/ebnf/ebnf/meta.rb) +end + +file "lib/ebnf/isoebnf/meta.rb" => "etc/iso-ebnf.ebnf" do + %x(bin/ebnf --peg -f rb --mod-name ISOEBNFMeta -o lib/ebnf/isoebnf/meta.rb etc/iso-ebnf.ebnf) +end + + # Build SXP output with leading space to allow for Markdown formatting. rule ".sxp" => %w{.ebnf} do |t| puts "build #{t.name}" @@ -106,7 +111,7 @@ end file "etc/ebnf.peg.rb" => "etc/ebnf.ebnf" do |t| puts "build #{t.name}" - %x(bin/ebnf --peg -f rb -o etc/ebnf.peg.rb etc/ebnf.ebnf) + %x(bin/ebnf --peg --mod-name EBNFMeta -f rb -o etc/ebnf.peg.rb etc/ebnf.ebnf) end file "etc/ebnf.ll1.rb" => "etc/ebnf.ebnf" do |t| diff --git a/etc/ebnf.ebnf b/etc/ebnf.ebnf index b9e7554..fefafa8 100644 --- a/etc/ebnf.ebnf +++ b/etc/ebnf.ebnf @@ -58,10 +58,9 @@ [22] POSTFIX ::= [?*+] # Ignore all whitespace and comments between non-terminals - [23] PASS ::= ( [#x9#xA#xD#x20] - | ( ('#' - '#x') | '//' ) [^#xA#xD]* - | '/*' (( '*' [^/] )? | [^*] )* '*/' - | '(*' (( '*' [^)] )? | [^*] )* '*)' - )+ + [23] PASS ::= [#x9#xA#xD#x20] + | ( ('#' - '#x') | '//' ) [^#xA#xD]* + | '/*' (( '*' [^/] )? | [^*] )* '*/' + | '(*' (( '*' [^)] )? | [^*] )* '*)' @pass PASS diff --git a/etc/ebnf.html b/etc/ebnf.html index cee124d..4dd8b1e 100644 --- a/etc/ebnf.html +++ b/etc/ebnf.html @@ -1,152 +1,240 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
[1]ebnf::=( declaration | rule) *
[2]declaration::="@terminals" | pass
[3]rule::=LHS expression
[4]expression::=alt
[5]alt::=seq ( "|" seq) *
[6]seq::=diff+
[7]diff::=postfix ( "-" postfix) ?
[8]postfix::=primary POSTFIX?
[9]primary::=HEX | SYMBOL | ENUM | O_ENUM | RANGE | O_RANGE | STRING1 | STRING2 | ( "(" expression ")")
[10]pass::="@pass" expression
@terminalsProductions for terminals
[11]LHS::=( "[" SYMBOL "]" #x20+ ) ? SYMBOL #x20* "::="
[12]SYMBOL::=( [ a-z] | [ A-Z] | [ 0-9] | "_" | ".") +
[13]HEX::="#x" ( [ a-f] | [ A-F] | [ 0-9] ) +
[14]ENUM::=( "[" ( R_CHAR+ | HEX+ ) "-"? "]") - LHS
[15]O_ENUM::="[^" ( R_CHAR+ | HEX+ ) "-"? "]"
[16]RANGE::="[" ( ( R_CHAR "-" R_CHAR) | ( HEX "-" HEX) ) + "]"
[17]O_RANGE::="[^" ( ( R_CHAR "-" R_CHAR) | ( HEX "-" HEX) ) + "]"
[18]STRING1::='"' ( CHAR - '"') * '"'
[19]STRING2::="'" ( CHAR - "'") * "'"
[20]CHAR::=[ #x09#x0A#x0D] | [ #x20-#xD7FF] | [ #xE000-#xFFFD] | [ #x00010000-#x0010FFFF]
[21]R_CHAR::=CHAR - ( "]" | "-")
[22]POSTFIX::=[ ?*+]
[23]PASS::=( [ #x09#x0A#x0D#x20] | ( ( ( "#" - "#x") | "//") [ ^#x0A#x0D] * ) | ( "/*" ( ( "*" [ ^/] ) ? | [ ^*] ) * "*/") | ( "(*" ( ( "*" [ ^)] ) ? | [ ^*] ) * "*)") ) +
@passPASS
[1]ebnf::=( declaration | rule) *
[2]declaration::="@terminals" | pass
[3]rule::=LHS expression
[4]expression::=alt
[5]alt::=seq ( "|" seq) *
[6]seq::=diff+
[7]diff::=postfix ( "-" postfix) ?
[8]postfix::=primary POSTFIX?
[9]primary::=HEX
[9]|SYMBOL
[9]|ENUM
[9]|O_ENUM
[9]|RANGE
[9]|O_RANGE
[9]|STRING1
[9]|STRING2
[9]|( "(" expression ")")
[10]pass::="@pass" expression
@terminalsProductions for terminals
[11]LHS::=( "[" SYMBOL "]" #x20+ ) ? SYMBOL #x20* "::="
[12]SYMBOL::=( [ a-z] | [ A-Z] | [ 0-9] | "_" | ".") +
[13]HEX::="#x" ( [ a-f] | [ A-F] | [ 0-9] ) +
[14]ENUM::=( "[" ( R_CHAR+ | HEX+ ) "-"? "]") - LHS
[15]O_ENUM::="[^" ( R_CHAR+ | HEX+ ) "-"? "]"
[16]RANGE::="[" ( ( R_CHAR "-" R_CHAR) | ( HEX "-" HEX) ) + "]"
[17]O_RANGE::="[^" ( ( R_CHAR "-" R_CHAR) | ( HEX "-" HEX) ) + "]"
[18]STRING1::='"' ( CHAR - '"') * '"'
[19]STRING2::="'" ( CHAR - "'") * "'"
[20]CHAR::=[ #x09#x0A#x0D]
[20]|[ #x20-#xD7FF]
[20]|[ #xE000-#xFFFD]
[20]|[ #x00010000-#x0010FFFF]
[21]R_CHAR::=CHAR - ( "]" | "-")
[22]POSTFIX::=[ ?*+]
[23]PASS::=[ #x09#x0A#x0D#x20]
[23]|( ( ( "#" - "#x") | "//") [ ^#x0A#x0D] * )
[23]|( "/*" ( ( "*" [ ^/] ) ? | [ ^*] ) * "*/")
[23]|( "(*" ( ( "*" [ ^)] ) ? | [ ^*] ) * "*)")
@pass
diff --git a/etc/ebnf.ll1.sxp b/etc/ebnf.ll1.sxp index 7efef66..2f629e4 100644 --- a/etc/ebnf.ll1.sxp +++ b/etc/ebnf.ll1.sxp @@ -118,12 +118,11 @@ (terminal R_CHAR "21" (diff CHAR (alt "]" "-"))) (terminal POSTFIX "22" (range "?*+")) (terminal PASS "23" - (plus - (alt - (range "#x9#xA#xD#x20") - (seq (alt (diff "#" "#x") "//") (star (range "^#xA#xD"))) - (seq "/*" (star (alt (opt (seq "*" (range "^/"))) (range "^*"))) "*/") - (seq "(*" (star (alt (opt (seq "*" (range "^)"))) (range "^*"))) "*)")) )) + (alt + (range "#x9#xA#xD#x20") + (seq (alt (diff "#" "#x") "//") (star (range "^#xA#xD"))) + (seq "/*" (star (alt (opt (seq "*" (range "^/"))) (range "^*"))) "*/") + (seq "(*" (star (alt (opt (seq "*" (range "^)"))) (range "^*"))) "*)")) ) (pass _pass (seq PASS)) (rule _ebnf_3 "1.3" (first "@pass" "@terminals" LHS _eps) (follow _eof) (seq ebnf)) (rule _rule_1 "3.1" diff --git a/etc/ebnf.peg.rb b/etc/ebnf.peg.rb index 0e10a23..fc03759 100644 --- a/etc/ebnf.peg.rb +++ b/etc/ebnf.peg.rb @@ -71,28 +71,27 @@ module Meta EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, :_R_CHAR_1], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_R_CHAR_1, "21.1", [:alt, "]", "-"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:POSTFIX, "22", [:range, "?*+"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:PASS, "23", [:plus, :_PASS_1], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_1, "23.1", [:alt, :_PASS_2, :_PASS_3, :_PASS_4, :_PASS_5], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_2, "23.2", [:range, "#x9#xA#xD#x20"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_3, "23.3", [:seq, :_PASS_6, :_PASS_7], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_6, "23.6", [:alt, :_PASS_8, "//"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_8, "23.8", [:diff, "#", "#x"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_7, "23.7", [:star, :_PASS_9], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_9, "23.9", [:range, "^#xA#xD"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_4, "23.4", [:seq, "/*", :_PASS_10, "*/"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_10, "23.10", [:star, :_PASS_11], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_11, "23.11", [:alt, :_PASS_12, :_PASS_13], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_12, "23.12", [:opt, :_PASS_14], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_14, "23.14", [:seq, "*", :_PASS_15], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_15, "23.15", [:range, "^/"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_13, "23.13", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_5, "23.5", [:seq, "(*", :_PASS_16, "*)"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_16, "23.16", [:star, :_PASS_17], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_17, "23.17", [:alt, :_PASS_18, :_PASS_19], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_18, "23.18", [:opt, :_PASS_20], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_20, "23.20", [:seq, "*", :_PASS_21], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_21, "23.21", [:range, "^)"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_19, "23.19", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:PASS, "23", [:alt, :_PASS_1, :_PASS_2, :_PASS_3, :_PASS_4], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_1, "23.1", [:range, "#x9#xA#xD#x20"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_2, "23.2", [:seq, :_PASS_5, :_PASS_6], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_5, "23.5", [:alt, :_PASS_7, "//"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_7, "23.7", [:diff, "#", "#x"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_6, "23.6", [:star, :_PASS_8], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_8, "23.8", [:range, "^#xA#xD"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_3, "23.3", [:seq, "/*", :_PASS_9, "*/"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_9, "23.9", [:star, :_PASS_10], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_10, "23.10", [:alt, :_PASS_11, :_PASS_12], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_11, "23.11", [:opt, :_PASS_13], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_13, "23.13", [:seq, "*", :_PASS_14], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_14, "23.14", [:range, "^/"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_12, "23.12", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_4, "23.4", [:seq, "(*", :_PASS_15, "*)"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_15, "23.15", [:star, :_PASS_16], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_16, "23.16", [:alt, :_PASS_17, :_PASS_18], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_17, "23.17", [:opt, :_PASS_19], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_19, "23.19", [:seq, "*", :_PASS_20], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_20, "23.20", [:range, "^)"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_18, "23.18", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_pass, nil, [:seq, :PASS], kind: :pass).extend(EBNF::PEG::Rule), ] end diff --git a/etc/ebnf.peg.sxp b/etc/ebnf.peg.sxp index b9c6eb3..c79f0d2 100644 --- a/etc/ebnf.peg.sxp +++ b/etc/ebnf.peg.sxp @@ -69,26 +69,25 @@ (terminal R_CHAR "21" (diff CHAR _R_CHAR_1)) (terminal _R_CHAR_1 "21.1" (alt "]" "-")) (terminal POSTFIX "22" (range "?*+")) - (terminal PASS "23" (plus _PASS_1)) - (terminal _PASS_1 "23.1" (alt _PASS_2 _PASS_3 _PASS_4 _PASS_5)) - (terminal _PASS_2 "23.2" (range "#x9#xA#xD#x20")) - (terminal _PASS_3 "23.3" (seq _PASS_6 _PASS_7)) - (terminal _PASS_6 "23.6" (alt _PASS_8 "//")) - (terminal _PASS_8 "23.8" (diff "#" "#x")) - (terminal _PASS_7 "23.7" (star _PASS_9)) - (terminal _PASS_9 "23.9" (range "^#xA#xD")) - (terminal _PASS_4 "23.4" (seq "/*" _PASS_10 "*/")) - (terminal _PASS_10 "23.10" (star _PASS_11)) - (terminal _PASS_11 "23.11" (alt _PASS_12 _PASS_13)) - (terminal _PASS_12 "23.12" (opt _PASS_14)) - (terminal _PASS_14 "23.14" (seq "*" _PASS_15)) - (terminal _PASS_15 "23.15" (range "^/")) - (terminal _PASS_13 "23.13" (range "^*")) - (terminal _PASS_5 "23.5" (seq "(*" _PASS_16 "*)")) - (terminal _PASS_16 "23.16" (star _PASS_17)) - (terminal _PASS_17 "23.17" (alt _PASS_18 _PASS_19)) - (terminal _PASS_18 "23.18" (opt _PASS_20)) - (terminal _PASS_20 "23.20" (seq "*" _PASS_21)) - (terminal _PASS_21 "23.21" (range "^)")) - (terminal _PASS_19 "23.19" (range "^*")) + (terminal PASS "23" (alt _PASS_1 _PASS_2 _PASS_3 _PASS_4)) + (terminal _PASS_1 "23.1" (range "#x9#xA#xD#x20")) + (terminal _PASS_2 "23.2" (seq _PASS_5 _PASS_6)) + (terminal _PASS_5 "23.5" (alt _PASS_7 "//")) + (terminal _PASS_7 "23.7" (diff "#" "#x")) + (terminal _PASS_6 "23.6" (star _PASS_8)) + (terminal _PASS_8 "23.8" (range "^#xA#xD")) + (terminal _PASS_3 "23.3" (seq "/*" _PASS_9 "*/")) + (terminal _PASS_9 "23.9" (star _PASS_10)) + (terminal _PASS_10 "23.10" (alt _PASS_11 _PASS_12)) + (terminal _PASS_11 "23.11" (opt _PASS_13)) + (terminal _PASS_13 "23.13" (seq "*" _PASS_14)) + (terminal _PASS_14 "23.14" (range "^/")) + (terminal _PASS_12 "23.12" (range "^*")) + (terminal _PASS_4 "23.4" (seq "(*" _PASS_15 "*)")) + (terminal _PASS_15 "23.15" (star _PASS_16)) + (terminal _PASS_16 "23.16" (alt _PASS_17 _PASS_18)) + (terminal _PASS_17 "23.17" (opt _PASS_19)) + (terminal _PASS_19 "23.19" (seq "*" _PASS_20)) + (terminal _PASS_20 "23.20" (range "^)")) + (terminal _PASS_18 "23.18" (range "^*")) (pass _pass (seq PASS))) diff --git a/etc/ebnf.sxp b/etc/ebnf.sxp index 647c9c9..e545b50 100644 --- a/etc/ebnf.sxp +++ b/etc/ebnf.sxp @@ -30,10 +30,9 @@ (terminal R_CHAR "21" (diff CHAR (alt "]" "-"))) (terminal POSTFIX "22" (range "?*+")) (terminal PASS "23" - (plus - (alt - (range "#x9#xA#xD#x20") - (seq (alt (diff "#" "#x") "//") (star (range "^#xA#xD"))) - (seq "/*" (star (alt (opt (seq "*" (range "^/"))) (range "^*"))) "*/") - (seq "(*" (star (alt (opt (seq "*" (range "^)"))) (range "^*"))) "*)")) )) + (alt + (range "#x9#xA#xD#x20") + (seq (alt (diff "#" "#x") "//") (star (range "^#xA#xD"))) + (seq "/*" (star (alt (opt (seq "*" (range "^/"))) (range "^*"))) "*/") + (seq "(*" (star (alt (opt (seq "*" (range "^)"))) (range "^*"))) "*)")) ) (pass _pass (seq PASS))) diff --git a/etc/iso-ebnf.isoebnf b/etc/iso-ebnf.isoebnf index 90084f1..8bcda08 100644 --- a/etc/iso-ebnf.isoebnf +++ b/etc/iso-ebnf.isoebnf @@ -47,7 +47,7 @@ grouped_sequence = '(', definitions_list, ')' terminal_string = ("'", first_terminal_character, {first_terminal_character}, "'") | ('"', second_terminal_character, {second_terminal_character}, '"') (* A represents the - between the quote symbols ’_’ or "_" *); + between the quote symbols '_' or "_" *); meta_identifier = letter, {meta_identifier_character} (* A is the name of a syntactic element of the language being defined *); @@ -57,7 +57,7 @@ integer = decimal_digit, {decimal_digit} ; special_sequence = '?', {special_sequence_character}, '?' (* The meaning of a is not defined in the standard metalanguage. *); -comment = ’(*’, {comment_symbol}, ’*)’ +comment = '(*', {comment_symbol}, '*)' (* A comment is allowed anywhere outside a , , or *); diff --git a/examples/isoebnf/examples/iso-ebnf.isoebnf b/examples/isoebnf/examples/iso-ebnf.isoebnf index 90084f1..8bcda08 100644 --- a/examples/isoebnf/examples/iso-ebnf.isoebnf +++ b/examples/isoebnf/examples/iso-ebnf.isoebnf @@ -47,7 +47,7 @@ grouped_sequence = '(', definitions_list, ')' terminal_string = ("'", first_terminal_character, {first_terminal_character}, "'") | ('"', second_terminal_character, {second_terminal_character}, '"') (* A represents the - between the quote symbols ’_’ or "_" *); + between the quote symbols '_' or "_" *); meta_identifier = letter, {meta_identifier_character} (* A is the name of a syntactic element of the language being defined *); @@ -57,7 +57,7 @@ integer = decimal_digit, {decimal_digit} ; special_sequence = '?', {special_sequence_character}, '?' (* The meaning of a is not defined in the standard metalanguage. *); -comment = ’(*’, {comment_symbol}, ’*)’ +comment = '(*', {comment_symbol}, '*)' (* A comment is allowed anywhere outside a , , or *); diff --git a/lib/ebnf/isoebnf/meta.rb b/lib/ebnf/isoebnf/meta.rb index 3c943c6..6a88b03 100644 --- a/lib/ebnf/isoebnf/meta.rb +++ b/lib/ebnf/isoebnf/meta.rb @@ -1,5 +1,5 @@ # This file is automatically generated by ebnf version 2.0.0 -# Derived from iso-ebnf.ebnf +# Derived from etc/iso-ebnf.ebnf module ISOEBNFMeta RULES = [ EBNF::Rule.new(:syntax, nil, [:star, :syntax_rule]).extend(EBNF::PEG::Rule), @@ -21,6 +21,7 @@ module ISOEBNFMeta EBNF::Rule.new(:optional_sequence, nil, [:seq, :start_option_symbol, :definitions_list, :end_option_symbol]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:repeated_sequence, nil, [:seq, :start_repeat_symbol, :definitions_list, :end_repeat_symbol]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:grouped_sequence, nil, [:seq, "(", :definitions_list, ")"]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule), EBNF::Rule.new(:terminal_string, nil, [:alt, :_terminal_string_1, :_terminal_string_2], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_terminal_string_1, nil, [:seq, "'", :_terminal_string_3, "'"]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_terminal_string_3, nil, [:plus, :first_terminal_character]).extend(EBNF::PEG::Rule), @@ -46,7 +47,7 @@ module ISOEBNFMeta EBNF::Rule.new(:gap_separator, nil, [:range, "#x9#xa#xb#xc#xd#x20"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_pass, nil, [:alt, :__pass_1, :comment], kind: :pass).extend(EBNF::PEG::Rule), EBNF::Rule.new(:__pass_1, nil, [:plus, :gap_separator]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:empty, nil, [:seq, []], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:empty, nil, [:seq, ""], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:defining_symbol, nil, [:alt, "=", ":"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:definition_separator_symbol, nil, [:alt, "|", "/", "!"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:terminator_symbol, nil, [:alt, ";", "."], kind: :terminal).extend(EBNF::PEG::Rule), diff --git a/lib/ebnf/writer.rb b/lib/ebnf/writer.rb index ee6d6b5..08d3f13 100644 --- a/lib/ebnf/writer.rb +++ b/lib/ebnf/writer.rb @@ -109,7 +109,11 @@ def initialize(rules, out: $stdout, html: false, format: :ebnf, **options) max_id = rules.max_by {|r| r.id.to_s.length}.id.to_s.length max_sym = rules.max_by {|r| r.sym.to_s.length}.sym.to_s.length lhs_length = max_sym + 1 - lhs_fmt = "%-#{max_sym}s #{format == :ebnf ? '::=' : '='} " + lhs_fmt = case format + when :abnf then "%-#{max_sym}s = " + when :ebnf then "%-#{max_sym}s ::= " + when :isoebnf then "%-#{max_sym}s = " + end if format == :ebnf && max_id > 0 lhs_fmt = "%-#{max_id+2}s " + lhs_fmt lhs_length += max_id + 3 @@ -122,19 +126,48 @@ def initialize(rules, out: $stdout, html: false, format: :ebnf, **options) require 'erubis' eruby = Erubis::Eruby.new(ERB_DESC) formatted_rules = rules.map do |rule| - if rule.kind == :terminals - formatted_expr = "Productions for terminals" - formatted_expr.length > rhs_length ? self.send(format_meth, rule.expr, sep: "\n") : formatted_expr + if rule.kind == :terminals || rule.kind == :pass + OpenStruct.new(id: ("@#{rule.kind}"), + sym: nil, + assign: nil, + formatted: ("Productions for terminals" if rule.kind == :terminals)) else formatted_expr = self.send(format_meth, rule.expr) - formatted_expr.length > rhs_length ? self.send(format_meth, rule.expr, sep: "\n") : formatted_expr + # Measure text without markup + formatted_expr_text = formatted_expr.gsub(%r{]*>}, '') + if formatted_expr_text.length > rhs_length && (format != :abnf || rule.alt?) + lines = [] + # Can only reasonably split apart alts + self.send(format_meth, rule.expr, sep: "--rule-extensions--"). + split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx| + assign = case format + when :ebnf + formatted.sub!(%r{\s*\|\s*}, '') + (ndx > 0 ? (rule.alt? ? '|' : '') : '::=') + when :abnf + formatted.sub!(%r{\s*/\s*}, '') + (ndx > 0 ? '=/' : '=') + else + formatted.sub!(%r{\s*\|\s*}, '') + (ndx > 0 ? (rule.alt? ? '|' : '') : '=') + end + lines << OpenStruct.new(id: ("[#{rule.id}]" if rule.id), + sym: (rule.sym if ndx == 0 || format == :abnf), + assign: assign, + formatted: formatted) + end + if format == :isoebnf + lines << OpenStruct.new(assign: ';') + end + lines + else + OpenStruct.new(id: ("[#{rule.id}]" if rule.id), + sym: rule.sym, + assign: (format == :ebnf ? '::=' : '='), + formatted: (formatted_expr + (format == :isoebnf ? ' ;' : ''))) + end end - OpenStruct.new(id: rule.id, - sym: rule.sym, - pass: rule.pass?, - terminals: (rule.kind == :terminals), - formatted: formatted_expr) - end + end.flatten out.write eruby.evaluate(format: format, rules: formatted_rules) return rescue LoadError @@ -145,20 +178,32 @@ def initialize(rules, out: $stdout, html: false, format: :ebnf, **options) # Format each rule, considering the available rhs size rules.each do |rule| buffer = if rule.pass? - "\n%-#{lhs_length-2}s" % "@pass" + "\n%-#{lhs_length-2}s " % "@pass" elsif rule.kind == :terminals "\n%-#{lhs_length-2}s" % "@terminals" else lhs_fmt % {id: "[#{rule.id}]", sym: rule.sym} end formatted_expr = self.send(format_meth, rule.expr) - if formatted_expr.length > rhs_length - # Space out past "= " - buffer << self.send(format_meth, rule.expr, sep: ("\n" + " " * (lhs_length + (format == :ebnf ? 4 : 2)))) + if formatted_expr.length > rhs_length && (format != :abnf || rule.alt?) + if format == :abnf + # No whitespace, use =/ + self.send(format_meth, rule.expr, sep: "--rule-extensions--"). + split(/\s*--rule-extensions--\s*/).each_with_index do |formatted, ndx| + if ndx > 0 + buffer << "\n" + lhs_fmt.sub('= ', '=/') % {id: "[#{rule.id}]", sym: rule.sym} + end + buffer << formatted.sub(/\s*\/\s*/, '') + end + else + # Space out past "= " + buffer << self.send(format_meth, rule.expr, sep: ("\n" + " " * (lhs_length + (rule.alt? ? 2 : 4) - (format == :ebnf ? 0 : 2)))) + buffer << ("\n" + " " * (lhs_length) + ';') if format == :isoebnf + end else - buffer << formatted_expr + buffer << formatted_expr + (format == :isoebnf ? ' ;' : '') end - buffer << "\n\n" if rule.kind == :terminals + buffer << "\n\n" if [:terminals, :pass].include?(rule.kind) out.puts(buffer) end end @@ -353,7 +398,7 @@ def format_abnf(expr, sep: nil, embedded: false, sensitive: true) end end parts = { - alt: (@options[:html] ? "| " : "| "), + alt: (@options[:html] ? "/ " : "/ "), star: (@options[:html] ? "* " : "*"), plus: (@options[:html] ? "+ " : "1*"), opt: (@options[:html] ? "? " : "?") @@ -620,20 +665,14 @@ def format_isoebnf_range(string) <% for rule in @rules %> - - <% if rule.pass %> - - <% elsif rule.terminals %> - - <% else %> - <% if rule.id %> - - <% end %> - - - <% end %> - - + > + <% if rule.id %> + + <% end %> + + + + <% end %>
@pass@terminals[<%==rule.id%>]<%== rule.sym %><%= @format == :ebnf ? '::=' : '='%><%= rule.formatted %>
<%= rule.id %><%== rule.sym %><%= rule.assign %><%= rule.formatted %>
From 6511967ca6d6190b8ba85a96f493344b42c20605 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Fri, 10 Jul 2020 17:28:25 -0700 Subject: [PATCH 38/50] Allow ranges and enums to be mixed with R_CHAR and HEX, as long as they are in appropriate groups. --- README.md | 8 ++++---- lib/ebnf/rule.rb | 18 +++++++----------- spec/rule_spec.rb | 23 +++++++++++++++-------- 3 files changed, 26 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index 85bc45a..7e9e8fd 100644 --- a/README.md +++ b/README.md @@ -119,13 +119,13 @@ Within the expression on the right-hand side of a rule, the following expression #xN where N is a hexadecimal integer, the expression matches the character whose number (code point) in ISO/IEC 10646 is N. The number of leading zeros in the #xN form is insignificant. [a-zA-Z], [#xN-#xN] - matches any Char with a value in the range(s) indicated (inclusive). + matches any Char or HEX with a value in the range(s) indicated (inclusive). [abc], [#xN#xN#xN] - matches any Char with a value among the characters enumerated. Enumerations and ranges can be mixed in one set of brackets. + matches any UTF-8 Char or HEX with a value among the characters enumerated. The last component may be '-'. Enumerations and ranges may be mixed in one set of brackets. [^a-z], [^#xN-#xN] - matches any UTF-8 Char with a value outside the range indicated. + matches any UTF-8 Char or HEX a value outside the range indicated. [^abc], [^#xN#xN#xN] - matches any UTF-8 Char with a value not among the characters given. Enumerations and ranges of forbidden values can be mixed in one set of brackets. + matches any UTF-8 Char or HEX with a value not among the characters given. The last component may be '-'. Enumerations and ranges of forbidden values may be mixed in one set of brackets. "string" matches a literal string matching that given inside the double quotes. 'string' diff --git a/lib/ebnf/rule.rb b/lib/ebnf/rule.rb index efa35dd..e215111 100644 --- a/lib/ebnf/rule.rb +++ b/lib/ebnf/rule.rb @@ -587,17 +587,13 @@ def validate!(ast, expr = @expr) when :range str = expr.last.dup str = str[1..-1] if str.start_with?('^') - if str.include?('-') - # If range is RANGE or O_RANGE, must be of form R_CHAR-R_CHAR or HEX-HEX - raise SyntaxError, "Range must be of form HEX-HEX or R_CHAR-R_CHAR: was #{str.inspect}" unless - str.match?(/^\^?(?:(?:#{Terminals::HEX}-#{Terminals::HEX})|(?:#{Terminals::R_CHAR}-#{Terminals::R_CHAR}))$/) - else - if str.match?(/^#{Terminals::HEX}+$/) - # Okay - elsif str.match?(Terminals::HEX) || !str.match?(/^#{Terminals::R_CHAR}+$/) - # Can't include both CHAR and HEX - raise SyntaxError, "Range must be of form HEX+ or R_CHAR+: was #{str.inspect}" - end + str = str[0..-2] if str.end_with?('-') # Allowed at end of range + scanner = StringScanner.new(str) + while !scanner.eos? + scanner.scan(/#{Terminals::HEX}-#{Terminals::HEX}/) || + scanner.scan(/#{Terminals::R_CHAR}-#{Terminals::R_CHAR}/) || + scanner.scan(/#{Terminals::HEX}|#{Terminals::R_CHAR}/) || + raise(SyntaxError, "Range contains illegal components at offset #{scanner.pos}: was #{expr.last}") end else ([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).each do |sym| diff --git a/spec/rule_spec.rb b/spec/rule_spec.rb index f6198db..8bb6a32 100644 --- a/spec/rule_spec.rb +++ b/spec/rule_spec.rb @@ -971,6 +971,21 @@ let(:gram) {EBNF.parse("a ::= 'b'?")} subject {gram.ast.first} + { + "mixed enum char and hex": [ + "a ::= [b#x20]", + %(In rule a: Range must be of form HEX+ or R_CHAR+: was "b#x20") + ], + "mixed enum char and hex (2)": [ + "a ::= [#x20z]", + %(In rule a: Range must be of form HEX+ or R_CHAR+: was "#x20z") + ], + }.each do |name, (rule, message)| + it name do + expect(EBNF.parse(rule)).to be_valid + end + end + { "missing rule": [ "a ::= b", @@ -984,14 +999,6 @@ "a ::= []", /syntax error/ ], - "mixed enum char and hex": [ - "a ::= [b#x20]", - %(In rule a: Range must be of form HEX+ or R_CHAR+: was "b#x20") - ], - "mixed enum char and hex (2)": [ - "a ::= [#x20z]", - %(In rule a: Range must be of form HEX+ or R_CHAR+: was "#x20z") - ], "mixed range char and hex": [ "a ::= [b-#x20]", /syntax error/ From 2bf07b43c21856015206d9a852bc15006d9038cc Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Sat, 11 Jul 2020 12:28:26 -0700 Subject: [PATCH 39/50] Fix writing EBNF strings to detect illegal characters. --- lib/ebnf/writer.rb | 26 +++---- spec/writer_spec.rb | 177 ++++++++++++++++++++++++++------------------ 2 files changed, 114 insertions(+), 89 deletions(-) diff --git a/lib/ebnf/writer.rb b/lib/ebnf/writer.rb index 08d3f13..364230b 100644 --- a/lib/ebnf/writer.rb +++ b/lib/ebnf/writer.rb @@ -221,11 +221,9 @@ def format_ebnf(expr, sep: nil, embedded: false) if expr.length == 1 return format_ebnf_char(expr) elsif expr =~ /\A#x\h+/ - return format_ebnf_hex(expr[2..-1].hex.chr) - elsif expr =~ /"/ - return "'" + escape_ebnf(expr, "'") + "'" + return escape_ebnf_hex(expr[2..-1].hex.chr) else - return '"' + escape_ebnf(expr, '"') + '"' + return format_ebnf_string(expr, expr.include?('"') ? "'" : '"') end end parts = { @@ -328,21 +326,16 @@ def format_ebnf_range(string) end # Escape a string, using as many UTF-8 characters as possible - def escape_ebnf(string, quote = '"') - buffer = "" + def format_ebnf_string(string, quote = '"') string.each_char do |c| - buffer << case (u = c.ord) - when 0x00..0x20 then escape_ebnf_hex(c) - when quote.ord then escape_ebnf_hex(c) - when 0x7F then escape_ebnf_hex(c) - else c + case c.ord + when 0x00..0x19, quote.ord + raise RangeError, "cannot format #{string.inspect} as an EBNF String: #{c.inspect} is out of range" unless + ISOEBNF::TERMINAL_CHARACTER.match?(c) end end - if @options[:html] - %(#{buffer}) - else - buffer - end + + "#{quote}#{string}#{quote}" end def escape_ebnf_hex(u) @@ -633,7 +626,6 @@ def format_isoebnf_range(string) else while !scanner.eos? r = scanner.scan(/.-./) - require 'byebug'; byebug unless r ranges << r end ranges.each do |range| diff --git a/spec/writer_spec.rb b/spec/writer_spec.rb index 8635cd6..ed7ddf0 100644 --- a/spec/writer_spec.rb +++ b/spec/writer_spec.rb @@ -118,78 +118,110 @@ describe "#format_ebnf" do subject {EBNF::Writer.new([])} - { - "alt": [ - [:alt, :A, :B], - "A | B" - ], - "enum": [ - [:range, "abc-"], - "[abc-]" - ], - "hex": [ - [:hex, "#x20"], - "#x20" - ], - "istr": [ - [:istr, "foo"], - %("foo") - ], - "opt": [ - [:opt, :A], - "A?" - ], - "plus": [ - [:plus, :A], - "A+" - ], - "range": [ - [:range, "a-z"], - "[a-z]" - ], - "rept 0 1": [ - [:rept, 0, 1, :A], - "A?" - ], - "rept 0 *": [ - [:rept, 0, '*', :A], - "A*" - ], - "rept 1 1": [ - [:rept, 1, 1, :A], - "A" - ], - "rept 1 *": [ - [:rept, 1, '*', :A], - "A+" - ], - "rept 1 2": [ - [:rept, 1, 2, :A], - "A A?" - ], - "rept 1 3": [ - [:rept, 1, 3, :A], - "A (A A?)?" - ], - "rept 1 3 (A B)": [ - [:rept, 1, 3, [:seq, :A, :B]], - "(A B) ((A B) (A B)?)?" - ], - "rept 1 3 (A | B)": [ - [:rept, 1, 3, [:alt, :A, :B]], - "(A | B) ((A | B) (A | B)?)?" - ], - "star": [ - [:star, :A], - "A*" - ], - "n3 path": [ - [:seq, :pathItem, [:alt, [:seq, "!", :path], [:seq, "^", :path]]], - %{pathItem (("!" path) | ("^" path))} - ], - }.each do |title, (expr, result)| - it title do - expect(subject.send(:format_ebnf, expr)).to eql result + context "legal expressions" do + { + "alt": [ + [:alt, :A, :B], + "A | B" + ], + "enum": [ + [:range, "abc-"], + "[abc-]" + ], + "hex": [ + [:hex, "#x20"], + "#x20" + ], + "istr": [ + [:istr, "foo"], + %("foo") + ], + "opt": [ + [:opt, :A], + "A?" + ], + "plus": [ + [:plus, :A], + "A+" + ], + "range": [ + [:range, "a-zA-Z"], + "[a-zA-Z]" + ], + "rept 0 1": [ + [:rept, 0, 1, :A], + "A?" + ], + "rept 0 *": [ + [:rept, 0, '*', :A], + "A*" + ], + "rept 1 1": [ + [:rept, 1, 1, :A], + "A" + ], + "rept 1 *": [ + [:rept, 1, '*', :A], + "A+" + ], + "rept 1 2": [ + [:rept, 1, 2, :A], + "A A?" + ], + "rept 1 3": [ + [:rept, 1, 3, :A], + "A (A A?)?" + ], + "rept 1 3 (A B)": [ + [:rept, 1, 3, [:seq, :A, :B]], + "(A B) ((A B) (A B)?)?" + ], + "rept 1 3 (A | B)": [ + [:rept, 1, 3, [:alt, :A, :B]], + "(A | B) ((A | B) (A | B)?)?" + ], + "star": [ + [:star, :A], + "A*" + ], + "string '\\r'": [ + [:seq, "\r"], + %{#x0D} + ], + "string ' '": [ + [:seq, " "], + %{#x20} + ], + "string 'a'": [ + [:seq, "a"], + %{"a"} + ], + "string '\"'": [ + [:seq, '"'], + %{'"'} + ], + "string \"'\"": [ + [:seq, '\''], + %{"'"} + ], + "n3 path": [ + [:seq, :pathItem, [:alt, [:seq, "!", :path], [:seq, "^", :path]]], + %{pathItem (("!" path) | ("^" path))} + ], + }.each do |title, (expr, result)| + it title do + expect(subject.send(:format_ebnf, expr)).to eql result + end + end + end + + context "illegal expressions" do + { + "string 'a\nb": [:seq, "a\nb"], + }.each do |title, expr| + it title do + expect {subject.send(:format_ebnf, expr)}.to raise_error RangeError + end end end end @@ -200,6 +232,7 @@ "EBNF Grammar" => File.expand_path("../../etc/ebnf.ebnf", __FILE__), "ISO EBNF Grammar" => File.expand_path("../../etc/iso-ebnf.ebnf", __FILE__), "Turtle Grammar" => File.expand_path("../../etc/turtle.ebnf", __FILE__), + "SPARQL Grammar" => File.expand_path("../../etc/sparql.ebnf", __FILE__), }.each do |name, file| context name do it "outputs grammar as text" do From 106189f1ba83471b9f3d56f1a0b9dcf097b3b2be Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Sat, 11 Jul 2020 12:51:57 -0700 Subject: [PATCH 40/50] Add back original custom EBNF parser as 'native'. --- bin/ebnf | 4 +- lib/ebnf.rb | 1 + lib/ebnf/base.rb | 26 +++- lib/ebnf/native.rb | 320 ++++++++++++++++++++++++++++++++++++++++++++ spec/native_spec.rb | 137 +++++++++++++++++++ 5 files changed, 485 insertions(+), 3 deletions(-) create mode 100644 lib/ebnf/native.rb create mode 100644 spec/native_spec.rb diff --git a/bin/ebnf b/bin/ebnf index 1300954..d762f1a 100755 --- a/bin/ebnf +++ b/bin/ebnf @@ -26,7 +26,7 @@ OPT_ARGS = [ ["--evaluate","-e", GetoptLong::REQUIRED_ARGUMENT,"Evaluate argument as an EBNF document"], ["--ll1", GetoptLong::REQUIRED_ARGUMENT,"Generate First/Follow rules, argument is start symbol"], ["--format", "-f", GetoptLong::REQUIRED_ARGUMENT,"Specify output format one of abnf, abnfh, ebnf, html, isoebnf, isoebnfh, ttl, sxp, or rb"], - ["--input-format", GetoptLong::REQUIRED_ARGUMENT,"Specify input format one of abnf, ebnf isoebnf, or sxp"], + ["--input-format", GetoptLong::REQUIRED_ARGUMENT,"Specify input format one of abnf, ebnf isoebnf, native, or sxp"], ["--mod-name", GetoptLong::REQUIRED_ARGUMENT,"Module name used when creating ruby tables"], ["--output", "-o", GetoptLong::REQUIRED_ARGUMENT,"Output to the specified file path"], ["--peg", GetoptLong::NO_ARGUMENT, "Transform EBNF to PEG"], @@ -60,7 +60,7 @@ opts.each do |opt, arg| when '--bnf' then options[:bnf] = true when '--evaluate' then input = arg when '--input-format' - unless %w(abnf ebnf isoebnf sxp).include?(arg) + unless %w(abnf ebnf isoebnf native sxp).include?(arg) STDERR.puts("unrecognized input format #{arg}") usage end diff --git a/lib/ebnf.rb b/lib/ebnf.rb index 4c54337..dce92bd 100755 --- a/lib/ebnf.rb +++ b/lib/ebnf.rb @@ -4,6 +4,7 @@ module EBNF autoload :BNF, "ebnf/bnf" autoload :ISOEBNF, "ebnf/isoebnf" autoload :LL1, "ebnf/ll1" + autoload :Native, "ebnf/native" autoload :Parser, "ebnf/parser" autoload :PEG, "ebnf/peg" autoload :Rule, "ebnf/rule" diff --git a/lib/ebnf/base.rb b/lib/ebnf/base.rb index 93ba533..2c685c2 100644 --- a/lib/ebnf/base.rb +++ b/lib/ebnf/base.rb @@ -84,6 +84,7 @@ module EBNF class Base include BNF include LL1 + include Native include PEG # Abstract syntax tree from parse @@ -101,7 +102,8 @@ class Base # # @param [#read, #to_s] input # @param [Symbol] format (:ebnf) - # Format of input, one of :abnf, :ebnf, :isoebnf, :isoebnf, or :sxp + # Format of input, one of `:abnf`, `:ebnf`, `:isoebnf`, `:isoebnf`, `:native`, or `:sxp`. + # Use `:native` for the native EBNF parser, rather than the PEG parser. # @param [Hash{Symbol => Object}] options # @option options [Boolean, Array] :debug # Output debug information to an array or $stdout. @@ -125,6 +127,28 @@ def initialize(input, format: :ebnf, **options) when :isoebnf iso = ISOEBNF.new(input, **options) @ast = iso.ast + when :native + scanner = StringScanner.new(input) + + eachRule(scanner) do |r| + debug("rule string") {r.inspect} + case r + when /^@terminals/ + # Switch mode to parsing terminals + terminal = true + when /^@pass\s*(.*)$/m + expr = expression($1).first + rule = Rule.new(nil, nil, expr, kind: :pass, ebnf: self) + rule.orig = expr + @ast << rule + else + rule = depth {ruleParts(r)} + + rule.kind = :terminal if terminal # Override after we've parsed @terminals + rule.orig = r + @ast << rule + end + end when :sxp require 'sxp' unless defined?(SXP) @ast = SXP::Reader::Basic.read(input).map {|e| Rule.from_sxp(e)} diff --git a/lib/ebnf/native.rb b/lib/ebnf/native.rb new file mode 100644 index 0000000..664d01c --- /dev/null +++ b/lib/ebnf/native.rb @@ -0,0 +1,320 @@ +module EBNF + module Native + ## + # Native parser for EBNF; less accurate, but appropriate when changing EBNF grammar, itself. + # + # Iterate over rule strings. + # a line that starts with '\[' or '@' starts a new rule + # + # @param [StringScanner] scanner + # @yield rule_string + # @yieldparam [String] rule_string + def eachRule(scanner) + cur_lineno = 1 + r = '' + until scanner.eos? + case + when s = scanner.scan(%r(\s+)m) + # Eat whitespace + cur_lineno += s.count("\n") + #debug("eachRule(ws)") { "[#{cur_lineno}] #{s.inspect}" } + when s = scanner.scan(%r(/\*([^\*]|\*[^\/])*\*/)m) + # Eat comments /* .. */ + cur_lineno += s.count("\n") + debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" } + when s = scanner.scan(%r(\(\*([^\*]|\*[^\)])*\*\))m) + # Eat comments (* .. *) + cur_lineno += s.count("\n") + debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" } + when s = scanner.scan(%r((#(?!x)|//).*$)) + # Eat comments // & # + cur_lineno += s.count("\n") + debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" } + when s = scanner.scan(/\A["']/) + # Found a quote, scan until end of matching quote + s += scanner.scan_until(/#{scanner.matched}|$/) + r += s + when s = scanner.scan(%r(^@terminals)) + #debug("eachRule(@terminals)") { "[#{cur_lineno}] #{s.inspect}" } + yield(r) unless r.empty? + @lineno = cur_lineno + yield(s) + r = '' + when s = scanner.scan(/@pass/) + # Found rule start, if we've already collected a rule, yield it + #debug("eachRule(@pass)") { "[#{cur_lineno}] #{s.inspect}" } + yield r unless r.empty? + @lineno = cur_lineno + r = s + when s = scanner.scan(EBNF::Terminals::LHS) + # Found rule start, if we've already collected a rule, yield it + yield r unless r.empty? + #debug("eachRule(rule)") { "[#{cur_lineno}] #{s.inspect}" } + @lineno = cur_lineno + r = s + else + # Collect until end of line, or start of comment or quote + s = scanner.scan_until(%r{(?:[/\(]\*)|#(?!x)|//|["']|$}) + if scanner.matched.length > 0 + # Back up scan head before ending match + scanner.pos = scanner.pos - scanner.matched.length + + # Remove matched from end of string + s = s[0..-(scanner.matched.length+1)] + end + cur_lineno += s.count("\n") + #debug("eachRule(rest)") { "[#{cur_lineno}] #{s.inspect}" } + r += s + end + end + yield r unless r.empty? + end + + ## + # Parse a rule into an optional rule number, a symbol and an expression + # + # @param [String] rule + # @return [Rule] + def ruleParts(rule) + num_sym, expr = rule.split('::=', 2).map(&:strip) + num, sym = num_sym.split(']', 2).map(&:strip) + num, sym = "", num if sym.nil? + num = num[1..-1] + r = Rule.new(sym && sym.to_sym, num, expression(expr).first, ebnf: self) + debug("ruleParts") { r.inspect } + r + end + + ## + # Parse a string into an expression tree and a remaining string + # + # @example + # >>> expression("a b c") + # ((seq a b c) '') + # + # >>> expression("a? b+ c*") + # ((seq (opt a) (plus b) (star c)) '') + # + # >>> expression(" | x xlist") + # ((alt (seq) (seq x xlist)) '') + # + # >>> expression("a | (b - c)") + # ((alt a (diff b c)) '') + # + # >>> expression("a b | c d") + # ((alt (seq a b) (seq c d)) '') + # + # >>> expression("a | b | c") + # ((alt a b c) '') + # + # >>> expression("a) b c") + # (a ' b c') + # + # >>> expression("BaseDecl? PrefixDecl*") + # ((seq (opt BaseDecl) (star PrefixDecl)) '') + # + # >>> expression("NCCHAR1 | diff | [0-9] | #x00B7 | [#x0300-#x036F] | \[#x203F-#x2040\]") + # ((alt NCCHAR1 diff + # (range '0-9') + # (hex '#x00B7') + # (range '#x0300-#x036F') + # (range, '#x203F-#x2040')) '') + # + # @param [String] s + # @return [Array] + def expression(s) + debug("expression") {"(#{s.inspect})"} + e, s = depth {alt(s)} + debug {"=> alt returned #{[e, s].inspect}"} + unless s.to_s.empty? + t, ss = depth {terminal(s)} + debug {"=> terminal returned #{[t, ss].inspect}"} + return [e, ss] if t.is_a?(Array) && t.first == :")" + end + [e, s] + end + + ## + # Parse alt + # >>> alt("a | b | c") + # ((alt a b c) '') + # @param [String] s + # @return [Array] + def alt(s) + debug("alt") {"(#{s.inspect})"} + args = [] + while !s.to_s.empty? + e, s = depth {seq(s)} + debug {"=> seq returned #{[e, s].inspect}"} + if e.to_s.empty? + break unless args.empty? + e = [:seq, []] # empty sequence + end + args << e + unless s.to_s.empty? + t, ss = depth {terminal(s)} + break unless t[0] == :alt + s = ss + end + end + args.length > 1 ? [args.unshift(:alt), s] : [e, s] + end + + ## + # parse seq + # + # >>> seq("a b c") + # ((seq a b c) '') + # + # >>> seq("a b? c") + # ((seq a (opt b) c) '') + def seq(s) + debug("seq") {"(#{s.inspect})"} + args = [] + while !s.to_s.empty? + e, ss = depth {diff(s)} + debug {"=> diff returned #{[e, ss].inspect}"} + unless e.to_s.empty? + args << e + s = ss + else + break; + end + end + if args.length > 1 + [args.unshift(:seq), s] + elsif args.length == 1 + args + [s] + else + ["", s] + end + end + + ## + # parse diff + # + # >>> diff("a - b") + # ((diff a b) '') + def diff(s) + debug("diff") {"(#{s.inspect})"} + e1, s = depth {postfix(s)} + debug {"=> postfix returned #{[e1, s].inspect}"} + unless e1.to_s.empty? + unless s.to_s.empty? + t, ss = depth {terminal(s)} + debug {"diff #{[t, ss].inspect}"} + if t.is_a?(Array) && t.first == :diff + s = ss + e2, s = primary(s) + unless e2.to_s.empty? + return [[:diff, e1, e2], s] + else + error("diff", "Syntax Error") + raise SyntaxError, "diff missing second operand" + end + end + end + end + [e1, s] + end + + ## + # parse postfix + # + # >>> postfix("a b c") + # (a ' b c') + # + # >>> postfix("a? b c") + # ((opt, a) ' b c') + def postfix(s) + debug("postfix") {"(#{s.inspect})"} + e, s = depth {primary(s)} + debug {"=> primary returned #{[e, s].inspect}"} + return ["", s] if e.to_s.empty? + if !s.to_s.empty? + t, ss = depth {terminal(s)} + debug {"=> #{[t, ss].inspect}"} + if t.is_a?(Array) && [:opt, :star, :plus].include?(t.first) + return [[t.first, e], ss] + end + end + [e, s] + end + + ## + # parse primary + # + # >>> primary("a b c") + # (a ' b c') + def primary(s) + debug("primary") {"(#{s.inspect})"} + t, s = depth {terminal(s)} + debug {"=> terminal returned #{[t, s].inspect}"} + if t.is_a?(Symbol) || t.is_a?(String) + [t, s] + elsif %w(range hex).map(&:to_sym).include?(t.first) + [t, s] + elsif t.first == :"(" + e, s = depth {expression(s)} + debug {"=> expression returned #{[e, s].inspect}"} + [e, s] + else + ["", s] + end + end + + ## + # parse one terminal; return the terminal and the remaining string + # + # A terminal is represented as a tuple whose 1st item gives the type; + # some types have additional info in the tuple. + # + # @example + # >>> terminal("'abc' def") + # ('abc' ' def') + # + # >>> terminal("[0-9]") + # ((range '0-9') '') + # >>> terminal("#x00B7") + # ((hex '#x00B7') '') + # >>> terminal ("\[#x0300-#x036F\]") + # ((range '#x0300-#x036F') '') + # >>> terminal("\[^<>'{}|^`\]-\[#x00-#x20\]") + # ((range "^<>'{}|^`") '-\[#x00-#x20\]') + def terminal(s) + s = s.strip + #STDERR.puts s.inspect + case m = s[0,1] + when '"', "'" # STRING1 or STRING2 + l, s = s[1..-1].split(m.rstrip, 2) + [LL1::Lexer.unescape_string(l), s] + when '[' # RANGE, O_RANGE + l, s = s[1..-1].split(/(?<=[^\\])\]/, 2) + [[:range, LL1::Lexer.unescape_string(l)], s] + when '#' # HEX + s.match(/(#x\h+)(.*)$/) + l, s = $1, $2 + [[:hex, l], s] + when /[\w\.]/ # SYMBOL + s.match(/([\w\.]+)(.*)$/) + l, s = $1, $2 + [l.to_sym, s] + when '-' + [[:diff], s[1..-1]] + when '?' + [[:opt], s[1..-1]] + when '|' + [[:alt], s[1..-1]] + when '+' + [[:plus], s[1..-1]] + when '*' + [[:star], s[1..-1]] + when /[\(\)]/ # '(' or ')' + [[m.to_sym], s[1..-1]] + else + error("terminal", "unrecognized terminal: #{s.inspect}") + raise SyntaxError, "unrecognized terminal: #{s.inspect}" + end + end + end +end \ No newline at end of file diff --git a/spec/native_spec.rb b/spec/native_spec.rb new file mode 100644 index 0000000..8c71fea --- /dev/null +++ b/spec/native_spec.rb @@ -0,0 +1,137 @@ +# coding: utf-8 +$:.unshift "." +require 'spec_helper' +require 'ebnf' +require 'sxp' + +describe EBNF::Native do + let(:logger) {RDF::Spec.logger} + after(:each) do |example| + puts logger.to_s if example.exception && !example.exception.is_a?(RSpec::Expectations::ExpectationNotMetError) + end + + context "rule variations" do + { + "legal rule name": [ + 'rulename ::= "foo"', + %{((rule rulename (seq "foo")))} + ], + "prolog": [ + %{[2] Prolog ::= BaseDecl? PrefixDecl*}, + %{((rule Prolog "2" (seq (opt BaseDecl) (star PrefixDecl))))} + ], + "aliteration": [ + %{[2] declaration ::= '@terminals' | '@pass'}, + %{((rule declaration "2" (alt "@terminals" "@pass")))}, + ], + "posfix": [ + %{[9] postfix ::= primary ( [?*+] )?}, + %{((rule postfix "9" (seq primary (opt (range "?*+")))))}, + ], + "diff": [ + %{[18] STRING2 ::= "'" (CHAR - "'")* "'"}, + %{((terminal STRING2 "18" (seq "'" (star (diff CHAR "'")) "'")))}, + ], + "IRIREF": [ + %([18] IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20] | UCHAR)* '>'), + %{((terminal IRIREF "18" + (seq "<" + (star + (alt + (diff (range "^<>\\\"{}|^`") (range "#x00-#x20")) + UCHAR)) + ">")))}, + ], + }.each do |title, (input, expect)| + it title do + expect(parse(input).to_sxp).to produce(expect, logger) + end + end + + context "without rule identifiers" do + { + "prolog": [ + %{Prolog ::= BaseDecl? PrefixDecl*}, + %{((rule Prolog (seq (opt BaseDecl) (star PrefixDecl))))} + ], + "aliteration": [ + %{declaration ::= '@terminals' | '@pass'}, + %{((rule declaration (alt "@terminals" "@pass")))}, + ], + "posfix": [ + %{postfix ::= primary ( [?*+] )?}, + %{((rule postfix (seq primary (opt (range "?*+")))))}, + ], + "diff": [ + %{STRING2 ::= "'" (CHAR - "'")* "'"}, + %{((terminal STRING2 (seq "'" (star (diff CHAR "'")) "'")))}, + ], + "IRIREF": [ + %(IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20] | UCHAR)* '>'), + %{((terminal IRIREF + (seq "<" + (star + (alt + (diff (range "^<>\\\"{}|^`") (range "#x00-#x20")) + UCHAR)) + ">")))}, + ], + }.each do |title, (input, expect)| + it title do + expect(parse(input).to_sxp).to produce(expect, logger) + end + end + end + end + + describe "#expression" do + { + "'abc' def" => %{(seq "abc" def)}, + %{[0-9]} => %{(range "0-9")}, + %{#x00B7} => %{(hex "#x00B7")}, + %{[#x0300-#x036F]} => %{(range "#x0300-#x036F")}, + %{[^<>'{}|^`]-[#x00-#x20]} => %{(diff (range "^<>'{}|^`") (range "#x00-#x20"))}, + %{a b c} => %{(seq a b c)}, + %{a? b c} => %{(seq (opt a) b c)}, + %{a - b} => %{(diff a b)}, + %{(a - b) - c} => %{(diff (diff a b) c)}, + %{a b? c} => %{(seq a (opt b) c)}, + %{a | b | c} => %{(alt a b c)}, + %{a? b+ c*} => %{(seq (opt a) (plus b) (star c))}, + %{foo | x xlist} => %{(alt foo (seq x xlist))}, + %{a | (b - c)} => %{(alt a (diff b c))}, + %{a b | c d} => %{(alt (seq a b) (seq c d))}, + %{BaseDecl? PrefixDecl*} => %{(seq (opt BaseDecl) (star PrefixDecl))}, + %{NCCHAR1 | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]} => + %{(alt NCCHAR1 "-" (range "0-9") (hex "#x00B7") (range "#x0300-#x036F") (range "#x203F-#x2040"))}, + %{'<' ([^<>"{}|^`\]-[#x00-#x20] | UCHAR)* '>'} => + %{(seq "<" (star (alt (diff (range "^<>\\\"{}|^`") (range "#x00-#x20")) UCHAR)) ">")} + }.each do |input, expected| + it "given #{input.inspect} produces #{expected}" do + rule = parse("rule ::= #{input}").ast.first + expect(rule.expr.to_sxp).to produce(expected, @debug) + end + end + end + + context "illegal syntax" do + { + "diff missing second operand": %{rule ::= a -}, + "unrecognized terminal" => %{rule ::= %foo%}, + }.each do |title, input| + it title do + expect {parse(input)}.to raise_error(SyntaxError) + end + end + end + + it "parses EBNF grammar" do + gram = parse(File.open(File.expand_path("../../etc/ebnf.ebnf", __FILE__))) + expect(gram).to be_valid + end + + def parse(input, **options) + @debug = [] + EBNF.parse(input, debug: @debug, format: :native, **options) + end +end From b0a421bc25db4d352f352078c991d8c88363c6df Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Sat, 11 Jul 2020 15:38:25 -0700 Subject: [PATCH 41/50] Remove ENUM and O_ENUM from grammar and just use RANGE and O_RANGE, which can now mix ranges and enumerations, similar to normal regular expression grammars. --- README.md | 6 +- Rakefile | 12 +- etc/ebnf.ebnf | 30 ++--- etc/ebnf.html | 106 ++++++++------- etc/ebnf.ll1.rb | 76 ----------- etc/ebnf.ll1.sxp | 104 +++++++------- etc/ebnf.peg.rb | 108 +++++++-------- etc/ebnf.peg.sxp | 107 +++++++-------- etc/ebnf.sxp | 26 ++-- etc/iso-ebnf.ebnf | 1 + etc/iso-ebnf.sxp | 2 +- etc/sparql.sxp | 9 +- etc/turtle.sxp | 11 +- examples/ebnf-ll1-parser/doc/parser.html | 158 +++++++++------------- examples/ebnf-ll1-parser/meta.rb | 76 ----------- examples/ebnf-ll1-parser/parser.rb | 24 +--- examples/ebnf-peg-parser/doc/parser.html | 164 +++++++++-------------- examples/ebnf-peg-parser/meta.rb | 107 +++++++-------- examples/ebnf-peg-parser/parser.rb | 24 +--- lib/ebnf/base.rb | 8 +- lib/ebnf/ebnf/meta.rb | 106 +++++++-------- lib/ebnf/ll1.rb | 6 +- lib/ebnf/parser.rb | 26 +--- lib/ebnf/rule.rb | 27 +++- lib/ebnf/terminals.rb | 7 +- spec/base_spec.rb | 4 +- spec/bnf_spec.rb | 4 +- spec/ebnf_spec.rb | 8 +- spec/ll1/data/meta.rb | 92 ++----------- spec/ll1/data/parser.rb | 8 -- spec/peg/data/parser.rb | 8 -- spec/peg_spec.rb | 4 +- spec/rule_spec.rb | 63 ++------- 33 files changed, 561 insertions(+), 961 deletions(-) diff --git a/README.md b/README.md index 7e9e8fd..461c426 100644 --- a/README.md +++ b/README.md @@ -111,6 +111,8 @@ which can also be proceeded by an optional number enclosed in square brackets to [1] symbol ::= expression +(Note, this can introduce an ambiguity if the previous rule ends in a range or enum and the current rule has no identifier. In this case, enclosing `expression` within parentheses, or adding intervening comments can resolve the ambiguity.) + Symbols are written in CAPITAL CASE if they are the start symbol of a regular language (terminals), otherwise with they are treated as non-terminal rules. Literal strings are quoted. Within the expression on the right-hand side of a rule, the following expressions are used to match strings of one or more characters: @@ -121,11 +123,11 @@ Within the expression on the right-hand side of a rule, the following expression [a-zA-Z], [#xN-#xN] matches any Char or HEX with a value in the range(s) indicated (inclusive). [abc], [#xN#xN#xN] - matches any UTF-8 Char or HEX with a value among the characters enumerated. The last component may be '-'. Enumerations and ranges may be mixed in one set of brackets. + matches any UTF-8 R\_CHAR or HEX with a value among the characters enumerated. The last component may be '-'. Enumerations and ranges may be mixed in one set of brackets. [^a-z], [^#xN-#xN] matches any UTF-8 Char or HEX a value outside the range indicated. [^abc], [^#xN#xN#xN] - matches any UTF-8 Char or HEX with a value not among the characters given. The last component may be '-'. Enumerations and ranges of forbidden values may be mixed in one set of brackets. + matches any UTF-8 R\_CHAR or HEX with a value not among the characters given. The last component may be '-'. Enumerations and ranges of excluded values may be mixed in one set of brackets. "string" matches a literal string matching that given inside the double quotes. 'string' diff --git a/Rakefile b/Rakefile index 5ee4416..e06ed3a 100755 --- a/Rakefile +++ b/Rakefile @@ -80,7 +80,7 @@ end rule ".sxp" => %w{.ebnf} do |t| puts "build #{t.name}" File.open(t.name, "w") do |f| - IO.popen(%(bin/ebnf #{t.source})).each_line do |line| + IO.popen(%(bin/ebnf --input-format native #{t.source})).each_line do |line| f.puts ' ' + line end end @@ -89,7 +89,7 @@ end rule ".peg.sxp" => %w{.ebnf} do |t| puts "build #{t.name}" File.open(t.name, "w") do |f| - IO.popen(%(bin/ebnf --peg #{t.source})).each_line do |line| + IO.popen(%(bin/ebnf --input-format native --peg #{t.source})).each_line do |line| f.puts ' ' + line end end @@ -97,13 +97,13 @@ end rule ".html" => %w{.ebnf} do |t| puts "build #{t.name}" - %x(bin/ebnf --format html -o #{t.name} #{t.source}) + %x(bin/ebnf --input-format native --format html -o #{t.name} #{t.source}) end file "etc/ebnf.ll1.sxp" => "etc/ebnf.ebnf" do |t| puts "build #{t.name}" File.open(t.name, "w") do |f| - IO.popen(%(bin/ebnf --ll1 ebnf #{t.source})).each_line do |line| + IO.popen(%(bin/ebnf --input-format native --ll1 ebnf #{t.source})).each_line do |line| f.puts ' ' + line end end @@ -111,10 +111,10 @@ end file "etc/ebnf.peg.rb" => "etc/ebnf.ebnf" do |t| puts "build #{t.name}" - %x(bin/ebnf --peg --mod-name EBNFMeta -f rb -o etc/ebnf.peg.rb etc/ebnf.ebnf) + %x(bin/ebnf --input-format native --peg --mod-name EBNFMeta --input-format native -f rb -o etc/ebnf.peg.rb etc/ebnf.ebnf) end file "etc/ebnf.ll1.rb" => "etc/ebnf.ebnf" do |t| puts "build #{t.name}" - %x(bin/ebnf --ll1 ebnf -f rb -o etc/ebnf.ll1.rb etc/ebnf.ebnf) + %x(bin/ebnf --input-format native --ll1 ebnf -f rb -o etc/ebnf.ll1.rb etc/ebnf.ebnf) end diff --git a/etc/ebnf.ebnf b/etc/ebnf.ebnf index fefafa8..b2e9a34 100644 --- a/etc/ebnf.ebnf +++ b/etc/ebnf.ebnf @@ -3,10 +3,12 @@ [2] declaration ::= '@terminals' | pass - [3] rule ::= LHS expression - # Use the LHS terminal to match the identifier, rule name and assignment due to - # confusion between the identifier and RANGE + # confusion between the identifier and RANGE. + # Note, for grammars not using identifiers, it is still possible to confuse + # a rule ending with a range the next rule, as it may be interpreted as an identifier. + # In such case, best to enclose the rule in '()'. + [3] rule ::= LHS expression [4] expression ::= alt @@ -20,8 +22,6 @@ [9] primary ::= HEX | SYMBOL - | ENUM - | O_ENUM | RANGE | O_RANGE | STRING1 @@ -38,27 +38,23 @@ [13] HEX ::= '#x' ([a-f] | [A-F] | [0-9])+ - [14] ENUM ::= ('[' (R_CHAR+ | HEX+) '-'? ']') - LHS # exclusively R_CHAR or HEX - - [15] O_ENUM ::= '[^' (R_CHAR+ | HEX+) '-'? ']' # both ENUM and O_ENUM can end with '-' - - [16] RANGE ::= '[' ((R_CHAR '-' R_CHAR) | (HEX '-' HEX))+ ']' + [14] RANGE ::= '[' ((R_CHAR '-' R_CHAR) | (HEX '-' HEX) | R_CHAR | HEX)+ '-'? ']' - LHS - [17] O_RANGE ::= '[^' ((R_CHAR '-' R_CHAR) | (HEX '-' HEX))+ ']' + [15] O_RANGE ::= '[^' ((R_CHAR '-' R_CHAR) | (HEX '-' HEX) | R_CHAR | HEX)+ '-'? ']' # Strings are unescaped Unicode, excepting control characters and hash (#) - [18] STRING1 ::= '"' (CHAR - '"')* '"' + [16] STRING1 ::= '"' (CHAR - '"')* '"' - [19] STRING2 ::= "'" (CHAR - "'")* "'" + [17] STRING2 ::= "'" (CHAR - "'")* "'" - [20] CHAR ::= [#x9#xA#xD] | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] + [18] CHAR ::= [#x9#xA#xD] | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] - [21] R_CHAR ::= CHAR - (']' | '-') + [19] R_CHAR ::= CHAR - (']' | '-' | HEX) - [22] POSTFIX ::= [?*+] + [20] POSTFIX ::= [?*+] # Ignore all whitespace and comments between non-terminals - [23] PASS ::= [#x9#xA#xD#x20] + [21] PASS ::= [#x9#xA#xD#x20] | ( ('#' - '#x') | '//' ) [^#xA#xD]* | '/*' (( '*' [^/] )? | [^*] )* '*/' | '(*' (( '*' [^)] )? | [^*] )* '*)' diff --git a/etc/ebnf.html b/etc/ebnf.html index 4dd8b1e..d1d9104 100644 --- a/etc/ebnf.html +++ b/etc/ebnf.html @@ -11,7 +11,7 @@ [2] declaration ::= - "@terminals" | pass + "@terminals" | pass [3] @@ -61,18 +61,6 @@ | SYMBOL - - [9] - - | - ENUM - - - [9] - - | - O_ENUM - [9] @@ -107,7 +95,7 @@ [10] pass ::= - "@pass" expression + "@pass" expression @terminals @@ -119,7 +107,7 @@ [11] LHS ::= - ( "[" SYMBOL "]" #x20+ ) ? SYMBOL #x20* "::=" + ( "[" SYMBOL "]" #x20+ ) ? SYMBOL #x20* "::=" [12] @@ -131,103 +119,127 @@ [13] HEX ::= - "#x" ( [ a-f] | [ A-F] | [ 0-9] ) + + "#x" ( [ a-f] | [ A-F] | [ 0-9] ) + - + [14] - ENUM + RANGE ::= - ( "[" ( R_CHAR+ | HEX+ ) "-"? "]") - LHS + "[" - - [15] - O_ENUM - ::= - "[^" ( R_CHAR+ | HEX+ ) "-"? "]" + + [14] + + + ( ( R_CHAR "-" R_CHAR)( HEX "-" HEX) | R_CHAR | HEX) + - - [16] - RANGE - ::= - "[" ( ( R_CHAR "-" R_CHAR) | ( HEX "-" HEX) ) + "]" + + [14] + + + "-"? + + + [14] + + + ( "]" - LHS) - [17] + [15] O_RANGE ::= - "[^" ( ( R_CHAR "-" R_CHAR) | ( HEX "-" HEX) ) + "]" + "[^" + + + [15] + + + ( ( R_CHAR "-" R_CHAR)( HEX "-" HEX) | R_CHAR | HEX) + + + + [15] + + + "-"? + + + [15] + + + "]" - [18] + [16] STRING1 ::= '"' ( CHAR - '"') * '"' - [19] + [17] STRING2 ::= "'" ( CHAR - "'") * "'" - [20] + [18] CHAR ::= [ #x09#x0A#x0D] - [20] + [18] | [ #x20-#xD7FF] - [20] + [18] | [ #xE000-#xFFFD] - [20] + [18] | [ #x00010000-#x0010FFFF] - [21] + [19] R_CHAR ::= - CHAR - ( "]" | "-") + CHAR - ( "]" | "-" | HEX) - [22] + [20] POSTFIX ::= [ ?*+] - [23] + [21] PASS ::= [ #x09#x0A#x0D#x20] - [23] + [21] | - ( ( ( "#" - "#x") | "//") [ ^#x0A#x0D] * ) + ( ( ( "#" - "#x") | "//") [ ^#x0A#x0D] * ) - [23] + [21] | - ( "/*" ( ( "*" [ ^/] ) ? | [ ^*] ) * "*/") + ( "/*" ( ( "*" [ ^/] ) ? | [ ^*] ) * "*/") - [23] + [21] | - ( "(*" ( ( "*" [ ^)] ) ? | [ ^*] ) * "*)") + ( "(*" ( ( "*" [ ^)] ) ? | [ ^*] ) * "*)") @pass diff --git a/etc/ebnf.ll1.rb b/etc/ebnf.ll1.rb index dd497d5..f4000fc 100644 --- a/etc/ebnf.ll1.rb +++ b/etc/ebnf.ll1.rb @@ -5,9 +5,7 @@ module Meta BRANCH = { :alt => { "(" => [:seq, :_alt_1], - :ENUM => [:seq, :_alt_1], :HEX => [:seq, :_alt_1], - :O_ENUM => [:seq, :_alt_1], :O_RANGE => [:seq, :_alt_1], :RANGE => [:seq, :_alt_1], :STRING1 => [:seq, :_alt_1], @@ -33,9 +31,7 @@ module Meta }, :diff => { "(" => [:postfix, :_diff_1], - :ENUM => [:postfix, :_diff_1], :HEX => [:postfix, :_diff_1], - :O_ENUM => [:postfix, :_diff_1], :O_RANGE => [:postfix, :_diff_1], :RANGE => [:postfix, :_diff_1], :STRING1 => [:postfix, :_diff_1], @@ -48,10 +44,8 @@ module Meta "-" => [:_diff_2], "@pass" => [], "@terminals" => [], - :ENUM => [], :HEX => [], :LHS => [], - :O_ENUM => [], :O_RANGE => [], :RANGE => [], :STRING1 => [], @@ -79,9 +73,7 @@ module Meta }, :expression => { "(" => [:alt], - :ENUM => [:alt], :HEX => [:alt], - :O_ENUM => [:alt], :O_RANGE => [:alt], :RANGE => [:alt], :STRING1 => [:alt], @@ -93,9 +85,7 @@ module Meta }, :postfix => { "(" => [:primary, :_postfix_1], - :ENUM => [:primary, :_postfix_1], :HEX => [:primary, :_postfix_1], - :O_ENUM => [:primary, :_postfix_1], :O_RANGE => [:primary, :_postfix_1], :RANGE => [:primary, :_postfix_1], :STRING1 => [:primary, :_postfix_1], @@ -108,10 +98,8 @@ module Meta "-" => [], "@pass" => [], "@terminals" => [], - :ENUM => [], :HEX => [], :LHS => [], - :O_ENUM => [], :O_RANGE => [], :POSTFIX => [:POSTFIX], :RANGE => [], @@ -122,9 +110,7 @@ module Meta }, :primary => { "(" => [:_primary_1], - :ENUM => [:ENUM], :HEX => [:HEX], - :O_ENUM => [:O_ENUM], :O_RANGE => [:O_RANGE], :RANGE => [:RANGE], :STRING1 => [:STRING1], @@ -139,9 +125,7 @@ module Meta }, :seq => { "(" => [:diff, :_seq_1], - :ENUM => [:diff, :_seq_1], :HEX => [:diff, :_seq_1], - :O_ENUM => [:diff, :_seq_1], :O_RANGE => [:diff, :_seq_1], :RANGE => [:diff, :_seq_1], :STRING1 => [:diff, :_seq_1], @@ -153,10 +137,8 @@ module Meta ")" => [], "@pass" => [], "@terminals" => [], - :ENUM => [:_seq_2], :HEX => [:_seq_2], :LHS => [], - :O_ENUM => [:_seq_2], :O_RANGE => [:_seq_2], :RANGE => [:_seq_2], :STRING1 => [:_seq_2], @@ -166,9 +148,7 @@ module Meta }, :_seq_2 => { "(" => [:diff, :_seq_1], - :ENUM => [:diff, :_seq_1], :HEX => [:diff, :_seq_1], - :O_ENUM => [:diff, :_seq_1], :O_RANGE => [:diff, :_seq_1], :RANGE => [:diff, :_seq_1], :STRING1 => [:diff, :_seq_1], @@ -182,10 +162,8 @@ module Meta "-", "@pass", "@terminals", - :ENUM, :HEX, :LHS, - :O_ENUM, :O_RANGE, :POSTFIX, :RANGE, @@ -198,8 +176,6 @@ module Meta :alt => [ :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -221,8 +197,6 @@ module Meta :_alt_6 => [ :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -234,8 +208,6 @@ module Meta :diff => [ :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -252,8 +224,6 @@ module Meta :_diff_4 => [ :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -282,8 +252,6 @@ module Meta :expression => [ :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -294,8 +262,6 @@ module Meta :_pass_1 => [ :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -304,8 +270,6 @@ module Meta :postfix => [ :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -320,8 +284,6 @@ module Meta :primary => [ :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -332,8 +294,6 @@ module Meta :_primary_2 => [ :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -346,8 +306,6 @@ module Meta :_rule_1 => [ :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -356,8 +314,6 @@ module Meta :seq => [ :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -367,8 +323,6 @@ module Meta :_eps, :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -377,8 +331,6 @@ module Meta :_seq_2 => [ :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -388,8 +340,6 @@ module Meta :_eps, :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -399,8 +349,6 @@ module Meta :_eps, :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -463,8 +411,6 @@ module Meta :_eof, :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -479,8 +425,6 @@ module Meta :_eof, :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -495,8 +439,6 @@ module Meta :_eof, :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -511,8 +453,6 @@ module Meta :_eof, :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -527,8 +467,6 @@ module Meta :_eof, :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -571,8 +509,6 @@ module Meta :_eof, :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -588,8 +524,6 @@ module Meta :_eof, :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -605,8 +539,6 @@ module Meta :_eof, :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -623,8 +555,6 @@ module Meta :_eof, :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -641,8 +571,6 @@ module Meta :_eof, :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -659,8 +587,6 @@ module Meta :_eof, :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -677,8 +603,6 @@ module Meta :_eof, :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, diff --git a/etc/ebnf.ll1.sxp b/etc/ebnf.ll1.sxp index 2f629e4..fee71b2 100644 --- a/etc/ebnf.ll1.sxp +++ b/etc/ebnf.ll1.sxp @@ -21,11 +21,11 @@ (alt "@terminals" pass)) (rule rule "3" (first LHS) (follow "@pass" "@terminals" LHS _eof) (seq LHS expression)) (rule expression "4" - (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL) + (first "(" HEX O_RANGE RANGE STRING1 STRING2 SYMBOL) (follow ")" "@pass" "@terminals" LHS _eof) (seq alt)) (rule alt "5" - (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL) + (first "(" HEX O_RANGE RANGE STRING1 STRING2 SYMBOL) (follow ")" "@pass" "@terminals" LHS _eof) (seq seq _alt_1)) (rule _alt_1 "5.1" @@ -43,56 +43,56 @@ (follow ")" "@pass" "@terminals" LHS _eof "|") (seq "|" seq)) (rule seq "6" - (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL) + (first "(" HEX O_RANGE RANGE STRING1 STRING2 SYMBOL) (follow ")" "@pass" "@terminals" LHS _eof "|") (cleanup plus) (seq diff _seq_1)) (rule _seq_1 "6.1" - (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL _eps) + (first "(" HEX O_RANGE RANGE STRING1 STRING2 SYMBOL _eps) (follow ")" "@pass" "@terminals" LHS _eof "|") (cleanup star) (alt _empty _seq_2)) (rule _seq_2 "6.2" - (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL) + (first "(" HEX O_RANGE RANGE STRING1 STRING2 SYMBOL) (follow ")" "@pass" "@terminals" LHS _eof "|") (cleanup merge) (seq diff _seq_1)) (rule diff "7" - (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL) - (follow "(" ")" "@pass" "@terminals" ENUM HEX LHS O_ENUM O_RANGE RANGE - STRING1 STRING2 SYMBOL _eof "|" ) + (first "(" HEX O_RANGE RANGE STRING1 STRING2 SYMBOL) + (follow "(" ")" "@pass" "@terminals" HEX LHS O_RANGE RANGE STRING1 STRING2 + SYMBOL _eof "|" ) (seq postfix _diff_1)) (rule _diff_1 "7.1" (first "-" _eps) - (follow "(" ")" "@pass" "@terminals" ENUM HEX LHS O_ENUM O_RANGE RANGE - STRING1 STRING2 SYMBOL _eof "|" ) + (follow "(" ")" "@pass" "@terminals" HEX LHS O_RANGE RANGE STRING1 STRING2 + SYMBOL _eof "|" ) (cleanup opt) (alt _empty _diff_2)) (rule _diff_2 "7.2" (first "-") - (follow "(" ")" "@pass" "@terminals" ENUM HEX LHS O_ENUM O_RANGE RANGE - STRING1 STRING2 SYMBOL _eof "|" ) + (follow "(" ")" "@pass" "@terminals" HEX LHS O_RANGE RANGE STRING1 STRING2 + SYMBOL _eof "|" ) (seq "-" postfix)) (rule postfix "8" - (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL) - (follow "(" ")" "-" "@pass" "@terminals" ENUM HEX LHS O_ENUM O_RANGE RANGE - STRING1 STRING2 SYMBOL _eof "|" ) + (first "(" HEX O_RANGE RANGE STRING1 STRING2 SYMBOL) + (follow "(" ")" "-" "@pass" "@terminals" HEX LHS O_RANGE RANGE STRING1 + STRING2 SYMBOL _eof "|" ) (seq primary _postfix_1)) (rule _postfix_1 "8.1" (first POSTFIX _eps) - (follow "(" ")" "-" "@pass" "@terminals" ENUM HEX LHS O_ENUM O_RANGE RANGE - STRING1 STRING2 SYMBOL _eof "|" ) + (follow "(" ")" "-" "@pass" "@terminals" HEX LHS O_RANGE RANGE STRING1 + STRING2 SYMBOL _eof "|" ) (cleanup opt) (alt _empty POSTFIX)) (rule primary "9" - (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL) - (follow "(" ")" "-" "@pass" "@terminals" ENUM HEX LHS O_ENUM O_RANGE POSTFIX - RANGE STRING1 STRING2 SYMBOL _eof "|" ) - (alt HEX SYMBOL ENUM O_ENUM RANGE O_RANGE STRING1 STRING2 _primary_1)) + (first "(" HEX O_RANGE RANGE STRING1 STRING2 SYMBOL) + (follow "(" ")" "-" "@pass" "@terminals" HEX LHS O_RANGE POSTFIX RANGE + STRING1 STRING2 SYMBOL _eof "|" ) + (alt HEX SYMBOL RANGE O_RANGE STRING1 STRING2 _primary_1)) (rule _primary_1 "9.1" (first "(") - (follow "(" ")" "-" "@pass" "@terminals" ENUM HEX LHS O_ENUM O_RANGE POSTFIX - RANGE STRING1 STRING2 SYMBOL _eof "|" ) + (follow "(" ")" "-" "@pass" "@terminals" HEX LHS O_RANGE POSTFIX RANGE + STRING1 STRING2 SYMBOL _eof "|" ) (seq "(" expression ")")) (rule pass "10" (first "@pass") @@ -102,22 +102,24 @@ (terminal LHS "11" (seq (opt (seq "[" SYMBOL "]" (plus " "))) SYMBOL (star " ") "::=")) (terminal SYMBOL "12" (plus (alt (range "a-z") (range "A-Z") (range "0-9") "_" "."))) (terminal HEX "13" (seq "#x" (plus (alt (range "a-f") (range "A-F") (range "0-9"))))) - (terminal ENUM "14" (diff (seq "[" (alt (plus R_CHAR) (plus HEX)) (opt "-") "]") LHS)) - (terminal O_ENUM "15" (seq "[^" (alt (plus R_CHAR) (plus HEX)) (opt "-") "]")) - (terminal RANGE "16" (seq "[" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) - (terminal O_RANGE "17" - (seq "[^" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) - (terminal STRING1 "18" (seq "\"" (star (diff CHAR "\"")) "\"")) - (terminal STRING2 "19" (seq "'" (star (diff CHAR "'")) "'")) - (terminal CHAR "20" + (terminal RANGE "14" + (seq "[" + (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX) R_CHAR HEX)) + (opt "-") + (diff "]" LHS)) ) + (terminal O_RANGE "15" + (seq "[^" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX) R_CHAR HEX)) (opt "-") "]")) + (terminal STRING1 "16" (seq "\"" (star (diff CHAR "\"")) "\"")) + (terminal STRING2 "17" (seq "'" (star (diff CHAR "'")) "'")) + (terminal CHAR "18" (alt (range "#x9#xA#xD") (range "#x20-#xD7FF") (range "#xE000-#xFFFD") (range "#x10000-#x10FFFF")) ) - (terminal R_CHAR "21" (diff CHAR (alt "]" "-"))) - (terminal POSTFIX "22" (range "?*+")) - (terminal PASS "23" + (terminal R_CHAR "19" (diff CHAR (alt "]" "-" HEX))) + (terminal POSTFIX "20" (range "?*+")) + (terminal PASS "21" (alt (range "#x9#xA#xD#x20") (seq (alt (diff "#" "#x") "//") (star (range "^#xA#xD"))) @@ -126,7 +128,7 @@ (pass _pass (seq PASS)) (rule _ebnf_3 "1.3" (first "@pass" "@terminals" LHS _eps) (follow _eof) (seq ebnf)) (rule _rule_1 "3.1" - (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL) + (first "(" HEX O_RANGE RANGE STRING1 STRING2 SYMBOL) (follow "@pass" "@terminals" LHS _eof) (seq expression)) (rule _alt_4 "5.4" @@ -138,43 +140,43 @@ (follow ")" "@pass" "@terminals" LHS _eof) (seq _alt_1)) (rule _alt_6 "5.6" - (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL) + (first "(" HEX O_RANGE RANGE STRING1 STRING2 SYMBOL) (follow ")" "@pass" "@terminals" LHS _eof "|") (seq seq)) (rule _seq_3 "6.3" - (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL _eps) + (first "(" HEX O_RANGE RANGE STRING1 STRING2 SYMBOL _eps) (follow ")" "@pass" "@terminals" LHS _eof "|") (seq _seq_1)) (rule _seq_4 "6.4" - (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL _eps) + (first "(" HEX O_RANGE RANGE STRING1 STRING2 SYMBOL _eps) (follow ")" "@pass" "@terminals" LHS _eof "|") (seq _seq_1)) (rule _diff_3 "7.3" (first "-" _eps) - (follow "(" ")" "@pass" "@terminals" ENUM HEX LHS O_ENUM O_RANGE RANGE - STRING1 STRING2 SYMBOL _eof "|" ) + (follow "(" ")" "@pass" "@terminals" HEX LHS O_RANGE RANGE STRING1 STRING2 + SYMBOL _eof "|" ) (seq _diff_1)) (rule _diff_4 "7.4" - (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL) - (follow "(" ")" "@pass" "@terminals" ENUM HEX LHS O_ENUM O_RANGE RANGE - STRING1 STRING2 SYMBOL _eof "|" ) + (first "(" HEX O_RANGE RANGE STRING1 STRING2 SYMBOL) + (follow "(" ")" "@pass" "@terminals" HEX LHS O_RANGE RANGE STRING1 STRING2 + SYMBOL _eof "|" ) (seq postfix)) (rule _postfix_2 "8.2" (first POSTFIX _eps) - (follow "(" ")" "-" "@pass" "@terminals" ENUM HEX LHS O_ENUM O_RANGE RANGE - STRING1 STRING2 SYMBOL _eof "|" ) + (follow "(" ")" "-" "@pass" "@terminals" HEX LHS O_RANGE RANGE STRING1 + STRING2 SYMBOL _eof "|" ) (seq _postfix_1)) (rule _primary_2 "9.2" - (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL) - (follow "(" ")" "-" "@pass" "@terminals" ENUM HEX LHS O_ENUM O_RANGE POSTFIX - RANGE STRING1 STRING2 SYMBOL _eof "|" ) + (first "(" HEX O_RANGE RANGE STRING1 STRING2 SYMBOL) + (follow "(" ")" "-" "@pass" "@terminals" HEX LHS O_RANGE POSTFIX RANGE + STRING1 STRING2 SYMBOL _eof "|" ) (seq expression ")")) (rule _pass_1 "10.1" - (first "(" ENUM HEX O_ENUM O_RANGE RANGE STRING1 STRING2 SYMBOL) + (first "(" HEX O_RANGE RANGE STRING1 STRING2 SYMBOL) (follow "@pass" "@terminals" LHS _eof) (seq expression)) (rule _primary_3 "9.3" (first ")") - (follow "(" ")" "-" "@pass" "@terminals" ENUM HEX LHS O_ENUM O_RANGE POSTFIX - RANGE STRING1 STRING2 SYMBOL _eof "|" ) + (follow "(" ")" "-" "@pass" "@terminals" HEX LHS O_RANGE POSTFIX RANGE + STRING1 STRING2 SYMBOL _eof "|" ) (seq ")")) ) diff --git a/etc/ebnf.peg.rb b/etc/ebnf.peg.rb index fc03759..af660ef 100644 --- a/etc/ebnf.peg.rb +++ b/etc/ebnf.peg.rb @@ -1,6 +1,6 @@ # This file is automatically generated by ebnf version 2.0.0 # Derived from etc/ebnf.ebnf -module Meta +module EBNFMeta RULES = [ EBNF::Rule.new(:ebnf, "1", [:star, :_ebnf_1]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_ebnf_1, "1.1", [:alt, :declaration, :rule]).extend(EBNF::PEG::Rule), @@ -16,7 +16,7 @@ module Meta EBNF::Rule.new(:_diff_2, "7.2", [:seq, "-", :postfix]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:postfix, "8", [:seq, :primary, :_postfix_1]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_postfix_1, "8.1", [:opt, :POSTFIX]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:primary, "9", [:alt, :HEX, :SYMBOL, :ENUM, :O_ENUM, :RANGE, :O_RANGE, :STRING1, :STRING2, :_primary_1]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:primary, "9", [:alt, :HEX, :SYMBOL, :RANGE, :O_RANGE, :STRING1, :STRING2, :_primary_1]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_primary_1, "9.1", [:seq, "(", :expression, ")"]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:pass, "10", [:seq, "@pass", :expression]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule), @@ -36,62 +36,54 @@ module Meta EBNF::Rule.new(:_HEX_3, "13.3", [:range, "a-f"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_HEX_4, "13.4", [:range, "A-F"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_HEX_5, "13.5", [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:ENUM, "14", [:diff, :_ENUM_1, :LHS], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_ENUM_1, "14.1", [:seq, "[", :_ENUM_2, :_ENUM_3, "]"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_ENUM_2, "14.2", [:alt, :_ENUM_4, :_ENUM_5], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_ENUM_4, "14.4", [:plus, :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_ENUM_5, "14.5", [:plus, :HEX], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_ENUM_3, "14.3", [:opt, "-"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:O_ENUM, "15", [:seq, "[^", :_O_ENUM_1, :_O_ENUM_2, "]"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_ENUM_1, "15.1", [:alt, :_O_ENUM_3, :_O_ENUM_4], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_ENUM_3, "15.3", [:plus, :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_ENUM_4, "15.4", [:plus, :HEX], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_ENUM_2, "15.2", [:opt, "-"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:RANGE, "16", [:seq, "[", :_RANGE_1, "]"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_RANGE_1, "16.1", [:plus, :_RANGE_2], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_RANGE_2, "16.2", [:alt, :_RANGE_3, :_RANGE_4], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_RANGE_3, "16.3", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_RANGE_4, "16.4", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:O_RANGE, "17", [:seq, "[^", :_O_RANGE_1, "]"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_RANGE_1, "17.1", [:plus, :_O_RANGE_2], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_RANGE_2, "17.2", [:alt, :_O_RANGE_3, :_O_RANGE_4], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_RANGE_3, "17.3", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_RANGE_4, "17.4", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:STRING1, "18", [:seq, "\"", :_STRING1_1, "\""], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_STRING1_1, "18.1", [:star, :_STRING1_2], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_STRING1_2, "18.2", [:diff, :CHAR, "\""], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:STRING2, "19", [:seq, "'", :_STRING2_1, "'"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_STRING2_1, "19.1", [:star, :_STRING2_2], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_STRING2_2, "19.2", [:diff, :CHAR, "'"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:CHAR, "20", [:alt, :_CHAR_1, :_CHAR_2, :_CHAR_3, :_CHAR_4], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_CHAR_1, "20.1", [:range, "#x9#xA#xD"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_CHAR_2, "20.2", [:range, "#x20-#xD7FF"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_CHAR_3, "20.3", [:range, "#xE000-#xFFFD"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_CHAR_4, "20.4", [:range, "#x10000-#x10FFFF"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, :_R_CHAR_1], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_R_CHAR_1, "21.1", [:alt, "]", "-"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:POSTFIX, "22", [:range, "?*+"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:PASS, "23", [:alt, :_PASS_1, :_PASS_2, :_PASS_3, :_PASS_4], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_1, "23.1", [:range, "#x9#xA#xD#x20"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_2, "23.2", [:seq, :_PASS_5, :_PASS_6], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_5, "23.5", [:alt, :_PASS_7, "//"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_7, "23.7", [:diff, "#", "#x"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_6, "23.6", [:star, :_PASS_8], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_8, "23.8", [:range, "^#xA#xD"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_3, "23.3", [:seq, "/*", :_PASS_9, "*/"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_9, "23.9", [:star, :_PASS_10], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_10, "23.10", [:alt, :_PASS_11, :_PASS_12], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_11, "23.11", [:opt, :_PASS_13], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_13, "23.13", [:seq, "*", :_PASS_14], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_14, "23.14", [:range, "^/"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_12, "23.12", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_4, "23.4", [:seq, "(*", :_PASS_15, "*)"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_15, "23.15", [:star, :_PASS_16], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_16, "23.16", [:alt, :_PASS_17, :_PASS_18], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_17, "23.17", [:opt, :_PASS_19], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_19, "23.19", [:seq, "*", :_PASS_20], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_20, "23.20", [:range, "^)"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_18, "23.18", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:RANGE, "14", [:seq, "[", :_RANGE_1, :_RANGE_2, :_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_1, "14.1", [:plus, :_RANGE_4], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_4, "14.4", [:alt, :_RANGE_5, :_RANGE_6, :R_CHAR, :HEX], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_5, "14.5", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_6, "14.6", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_2, "14.2", [:opt, "-"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_3, "14.3", [:diff, "]", :LHS], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:O_RANGE, "15", [:seq, "[^", :_O_RANGE_1, :_O_RANGE_2, "]"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_1, "15.1", [:plus, :_O_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_3, "15.3", [:alt, :_O_RANGE_4, :_O_RANGE_5, :R_CHAR, :HEX], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_4, "15.4", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_5, "15.5", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_2, "15.2", [:opt, "-"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:STRING1, "16", [:seq, "\"", :_STRING1_1, "\""], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_STRING1_1, "16.1", [:star, :_STRING1_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_STRING1_2, "16.2", [:diff, :CHAR, "\""], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:STRING2, "17", [:seq, "'", :_STRING2_1, "'"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_STRING2_1, "17.1", [:star, :_STRING2_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_STRING2_2, "17.2", [:diff, :CHAR, "'"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:CHAR, "18", [:alt, :_CHAR_1, :_CHAR_2, :_CHAR_3, :_CHAR_4], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_CHAR_1, "18.1", [:range, "#x9#xA#xD"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_CHAR_2, "18.2", [:range, "#x20-#xD7FF"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_CHAR_3, "18.3", [:range, "#xE000-#xFFFD"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_CHAR_4, "18.4", [:range, "#x10000-#x10FFFF"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:R_CHAR, "19", [:diff, :CHAR, :_R_CHAR_1], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_R_CHAR_1, "19.1", [:alt, "]", "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:POSTFIX, "20", [:range, "?*+"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:PASS, "21", [:alt, :_PASS_1, :_PASS_2, :_PASS_3, :_PASS_4], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_1, "21.1", [:range, "#x9#xA#xD#x20"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_2, "21.2", [:seq, :_PASS_5, :_PASS_6], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_5, "21.5", [:alt, :_PASS_7, "//"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_7, "21.7", [:diff, "#", "#x"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_6, "21.6", [:star, :_PASS_8], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_8, "21.8", [:range, "^#xA#xD"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_3, "21.3", [:seq, "/*", :_PASS_9, "*/"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_9, "21.9", [:star, :_PASS_10], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_10, "21.10", [:alt, :_PASS_11, :_PASS_12], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_11, "21.11", [:opt, :_PASS_13], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_13, "21.13", [:seq, "*", :_PASS_14], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_14, "21.14", [:range, "^/"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_12, "21.12", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_4, "21.4", [:seq, "(*", :_PASS_15, "*)"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_15, "21.15", [:star, :_PASS_16], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_16, "21.16", [:alt, :_PASS_17, :_PASS_18], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_17, "21.17", [:opt, :_PASS_19], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_19, "21.19", [:seq, "*", :_PASS_20], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_20, "21.20", [:range, "^)"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_18, "21.18", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_pass, nil, [:seq, :PASS], kind: :pass).extend(EBNF::PEG::Rule), ] end diff --git a/etc/ebnf.peg.sxp b/etc/ebnf.peg.sxp index c79f0d2..35ae320 100644 --- a/etc/ebnf.peg.sxp +++ b/etc/ebnf.peg.sxp @@ -13,8 +13,7 @@ (rule _diff_2 "7.2" (seq "-" postfix)) (rule postfix "8" (seq primary _postfix_1)) (rule _postfix_1 "8.1" (opt POSTFIX)) - (rule primary "9" - (alt HEX SYMBOL ENUM O_ENUM RANGE O_RANGE STRING1 STRING2 _primary_1)) + (rule primary "9" (alt HEX SYMBOL RANGE O_RANGE STRING1 STRING2 _primary_1)) (rule _primary_1 "9.1" (seq "(" expression ")")) (rule pass "10" (seq "@pass" expression)) (terminals _terminals (seq)) @@ -34,60 +33,52 @@ (terminal _HEX_3 "13.3" (range "a-f")) (terminal _HEX_4 "13.4" (range "A-F")) (terminal _HEX_5 "13.5" (range "0-9")) - (terminal ENUM "14" (diff _ENUM_1 LHS)) - (terminal _ENUM_1 "14.1" (seq "[" _ENUM_2 _ENUM_3 "]")) - (terminal _ENUM_2 "14.2" (alt _ENUM_4 _ENUM_5)) - (terminal _ENUM_4 "14.4" (plus R_CHAR)) - (terminal _ENUM_5 "14.5" (plus HEX)) - (terminal _ENUM_3 "14.3" (opt "-")) - (terminal O_ENUM "15" (seq "[^" _O_ENUM_1 _O_ENUM_2 "]")) - (terminal _O_ENUM_1 "15.1" (alt _O_ENUM_3 _O_ENUM_4)) - (terminal _O_ENUM_3 "15.3" (plus R_CHAR)) - (terminal _O_ENUM_4 "15.4" (plus HEX)) - (terminal _O_ENUM_2 "15.2" (opt "-")) - (terminal RANGE "16" (seq "[" _RANGE_1 "]")) - (terminal _RANGE_1 "16.1" (plus _RANGE_2)) - (terminal _RANGE_2 "16.2" (alt _RANGE_3 _RANGE_4)) - (terminal _RANGE_3 "16.3" (seq R_CHAR "-" R_CHAR)) - (terminal _RANGE_4 "16.4" (seq HEX "-" HEX)) - (terminal O_RANGE "17" (seq "[^" _O_RANGE_1 "]")) - (terminal _O_RANGE_1 "17.1" (plus _O_RANGE_2)) - (terminal _O_RANGE_2 "17.2" (alt _O_RANGE_3 _O_RANGE_4)) - (terminal _O_RANGE_3 "17.3" (seq R_CHAR "-" R_CHAR)) - (terminal _O_RANGE_4 "17.4" (seq HEX "-" HEX)) - (terminal STRING1 "18" (seq "\"" _STRING1_1 "\"")) - (terminal _STRING1_1 "18.1" (star _STRING1_2)) - (terminal _STRING1_2 "18.2" (diff CHAR "\"")) - (terminal STRING2 "19" (seq "'" _STRING2_1 "'")) - (terminal _STRING2_1 "19.1" (star _STRING2_2)) - (terminal _STRING2_2 "19.2" (diff CHAR "'")) - (terminal CHAR "20" (alt _CHAR_1 _CHAR_2 _CHAR_3 _CHAR_4)) - (terminal _CHAR_1 "20.1" (range "#x9#xA#xD")) - (terminal _CHAR_2 "20.2" (range "#x20-#xD7FF")) - (terminal _CHAR_3 "20.3" (range "#xE000-#xFFFD")) - (terminal _CHAR_4 "20.4" (range "#x10000-#x10FFFF")) - (terminal R_CHAR "21" (diff CHAR _R_CHAR_1)) - (terminal _R_CHAR_1 "21.1" (alt "]" "-")) - (terminal POSTFIX "22" (range "?*+")) - (terminal PASS "23" (alt _PASS_1 _PASS_2 _PASS_3 _PASS_4)) - (terminal _PASS_1 "23.1" (range "#x9#xA#xD#x20")) - (terminal _PASS_2 "23.2" (seq _PASS_5 _PASS_6)) - (terminal _PASS_5 "23.5" (alt _PASS_7 "//")) - (terminal _PASS_7 "23.7" (diff "#" "#x")) - (terminal _PASS_6 "23.6" (star _PASS_8)) - (terminal _PASS_8 "23.8" (range "^#xA#xD")) - (terminal _PASS_3 "23.3" (seq "/*" _PASS_9 "*/")) - (terminal _PASS_9 "23.9" (star _PASS_10)) - (terminal _PASS_10 "23.10" (alt _PASS_11 _PASS_12)) - (terminal _PASS_11 "23.11" (opt _PASS_13)) - (terminal _PASS_13 "23.13" (seq "*" _PASS_14)) - (terminal _PASS_14 "23.14" (range "^/")) - (terminal _PASS_12 "23.12" (range "^*")) - (terminal _PASS_4 "23.4" (seq "(*" _PASS_15 "*)")) - (terminal _PASS_15 "23.15" (star _PASS_16)) - (terminal _PASS_16 "23.16" (alt _PASS_17 _PASS_18)) - (terminal _PASS_17 "23.17" (opt _PASS_19)) - (terminal _PASS_19 "23.19" (seq "*" _PASS_20)) - (terminal _PASS_20 "23.20" (range "^)")) - (terminal _PASS_18 "23.18" (range "^*")) + (terminal RANGE "14" (seq "[" _RANGE_1 _RANGE_2 _RANGE_3)) + (terminal _RANGE_1 "14.1" (plus _RANGE_4)) + (terminal _RANGE_4 "14.4" (alt _RANGE_5 _RANGE_6 R_CHAR HEX)) + (terminal _RANGE_5 "14.5" (seq R_CHAR "-" R_CHAR)) + (terminal _RANGE_6 "14.6" (seq HEX "-" HEX)) + (terminal _RANGE_2 "14.2" (opt "-")) + (terminal _RANGE_3 "14.3" (diff "]" LHS)) + (terminal O_RANGE "15" (seq "[^" _O_RANGE_1 _O_RANGE_2 "]")) + (terminal _O_RANGE_1 "15.1" (plus _O_RANGE_3)) + (terminal _O_RANGE_3 "15.3" (alt _O_RANGE_4 _O_RANGE_5 R_CHAR HEX)) + (terminal _O_RANGE_4 "15.4" (seq R_CHAR "-" R_CHAR)) + (terminal _O_RANGE_5 "15.5" (seq HEX "-" HEX)) + (terminal _O_RANGE_2 "15.2" (opt "-")) + (terminal STRING1 "16" (seq "\"" _STRING1_1 "\"")) + (terminal _STRING1_1 "16.1" (star _STRING1_2)) + (terminal _STRING1_2 "16.2" (diff CHAR "\"")) + (terminal STRING2 "17" (seq "'" _STRING2_1 "'")) + (terminal _STRING2_1 "17.1" (star _STRING2_2)) + (terminal _STRING2_2 "17.2" (diff CHAR "'")) + (terminal CHAR "18" (alt _CHAR_1 _CHAR_2 _CHAR_3 _CHAR_4)) + (terminal _CHAR_1 "18.1" (range "#x9#xA#xD")) + (terminal _CHAR_2 "18.2" (range "#x20-#xD7FF")) + (terminal _CHAR_3 "18.3" (range "#xE000-#xFFFD")) + (terminal _CHAR_4 "18.4" (range "#x10000-#x10FFFF")) + (terminal R_CHAR "19" (diff CHAR _R_CHAR_1)) + (terminal _R_CHAR_1 "19.1" (alt "]" "-" HEX)) + (terminal POSTFIX "20" (range "?*+")) + (terminal PASS "21" (alt _PASS_1 _PASS_2 _PASS_3 _PASS_4)) + (terminal _PASS_1 "21.1" (range "#x9#xA#xD#x20")) + (terminal _PASS_2 "21.2" (seq _PASS_5 _PASS_6)) + (terminal _PASS_5 "21.5" (alt _PASS_7 "//")) + (terminal _PASS_7 "21.7" (diff "#" "#x")) + (terminal _PASS_6 "21.6" (star _PASS_8)) + (terminal _PASS_8 "21.8" (range "^#xA#xD")) + (terminal _PASS_3 "21.3" (seq "/*" _PASS_9 "*/")) + (terminal _PASS_9 "21.9" (star _PASS_10)) + (terminal _PASS_10 "21.10" (alt _PASS_11 _PASS_12)) + (terminal _PASS_11 "21.11" (opt _PASS_13)) + (terminal _PASS_13 "21.13" (seq "*" _PASS_14)) + (terminal _PASS_14 "21.14" (range "^/")) + (terminal _PASS_12 "21.12" (range "^*")) + (terminal _PASS_4 "21.4" (seq "(*" _PASS_15 "*)")) + (terminal _PASS_15 "21.15" (star _PASS_16)) + (terminal _PASS_16 "21.16" (alt _PASS_17 _PASS_18)) + (terminal _PASS_17 "21.17" (opt _PASS_19)) + (terminal _PASS_19 "21.19" (seq "*" _PASS_20)) + (terminal _PASS_20 "21.20" (range "^)")) + (terminal _PASS_18 "21.18" (range "^*")) (pass _pass (seq PASS))) diff --git a/etc/ebnf.sxp b/etc/ebnf.sxp index e545b50..45e03ea 100644 --- a/etc/ebnf.sxp +++ b/etc/ebnf.sxp @@ -8,28 +8,30 @@ (rule diff "7" (seq postfix (opt (seq "-" postfix)))) (rule postfix "8" (seq primary (opt POSTFIX))) (rule primary "9" - (alt HEX SYMBOL ENUM O_ENUM RANGE O_RANGE STRING1 STRING2 (seq "(" expression ")"))) + (alt HEX SYMBOL RANGE O_RANGE STRING1 STRING2 (seq "(" expression ")"))) (rule pass "10" (seq "@pass" expression)) (terminals _terminals (seq)) (terminal LHS "11" (seq (opt (seq "[" SYMBOL "]" (plus " "))) SYMBOL (star " ") "::=")) (terminal SYMBOL "12" (plus (alt (range "a-z") (range "A-Z") (range "0-9") "_" "."))) (terminal HEX "13" (seq "#x" (plus (alt (range "a-f") (range "A-F") (range "0-9"))))) - (terminal ENUM "14" (diff (seq "[" (alt (plus R_CHAR) (plus HEX)) (opt "-") "]") LHS)) - (terminal O_ENUM "15" (seq "[^" (alt (plus R_CHAR) (plus HEX)) (opt "-") "]")) - (terminal RANGE "16" (seq "[" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) - (terminal O_RANGE "17" - (seq "[^" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX))) "]")) - (terminal STRING1 "18" (seq "\"" (star (diff CHAR "\"")) "\"")) - (terminal STRING2 "19" (seq "'" (star (diff CHAR "'")) "'")) - (terminal CHAR "20" + (terminal RANGE "14" + (seq "[" + (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX) R_CHAR HEX)) + (opt "-") + (diff "]" LHS)) ) + (terminal O_RANGE "15" + (seq "[^" (plus (alt (seq R_CHAR "-" R_CHAR) (seq HEX "-" HEX) R_CHAR HEX)) (opt "-") "]")) + (terminal STRING1 "16" (seq "\"" (star (diff CHAR "\"")) "\"")) + (terminal STRING2 "17" (seq "'" (star (diff CHAR "'")) "'")) + (terminal CHAR "18" (alt (range "#x9#xA#xD") (range "#x20-#xD7FF") (range "#xE000-#xFFFD") (range "#x10000-#x10FFFF")) ) - (terminal R_CHAR "21" (diff CHAR (alt "]" "-"))) - (terminal POSTFIX "22" (range "?*+")) - (terminal PASS "23" + (terminal R_CHAR "19" (diff CHAR (alt "]" "-" HEX))) + (terminal POSTFIX "20" (range "?*+")) + (terminal PASS "21" (alt (range "#x9#xA#xD#x20") (seq (alt (diff "#" "#x") "//") (star (range "^#xA#xD"))) diff --git a/etc/iso-ebnf.ebnf b/etc/iso-ebnf.ebnf index 05d6481..339e936 100644 --- a/etc/iso-ebnf.ebnf +++ b/etc/iso-ebnf.ebnf @@ -70,6 +70,7 @@ comment ::= start_comment_symbol comment_symbol* end_comment comment_symbol ::= comment | terminal_string | special_sequence | character letter ::= [a-zA-Z] +# gratuitous comment decimal_digit ::= [0-9] # Extended to allow '_' diff --git a/etc/iso-ebnf.sxp b/etc/iso-ebnf.sxp index 9aff3d5..38f5024 100644 --- a/etc/iso-ebnf.sxp +++ b/etc/iso-ebnf.sxp @@ -42,7 +42,7 @@ (terminal other_character (alt (range ":+_%@&$<>^` ̃#x20#x23") "\\")) (terminal gap_separator (range "#x9#xa#xb#xc#xd#x20")) (pass _pass (alt (plus gap_separator) comment)) - (terminal empty (seq "")) + (terminal empty (seq ())) (terminal defining_symbol (alt "=" ":")) (terminal definition_separator_symbol (alt "|" "/" "!")) (terminal terminator_symbol (alt ";" ".")) diff --git a/etc/sparql.sxp b/etc/sparql.sxp index 9fffee2..6621476 100644 --- a/etc/sparql.sxp +++ b/etc/sparql.sxp @@ -283,8 +283,7 @@ (rule PrefixedName "137" (alt PNAME_LN PNAME_NS)) (rule BlankNode "138" (alt BLANK_NODE_LABEL ANON)) (terminals _terminals (seq)) - (terminal IRIREF "139" - (seq "<" (star (diff (range "^<>\"{}|^`\\") (range "#x00-#x20"))) ">")) + (terminal IRIREF "139" (seq "<" (star (range "^<>\"{}|^`]-[#x00-#x20")) ">")) (terminal PNAME_NS "140" (seq (opt PN_PREFIX) ":")) (terminal PNAME_LN "141" (seq PNAME_NS PN_LOCAL)) (terminal BLANK_NODE_LABEL "142" @@ -312,10 +311,10 @@ (terminal STRING_LITERAL2 "157" (seq "\"" (star (alt (range "^#x22#x5C#xA#xD") ECHAR)) "\"")) (terminal STRING_LITERAL_LONG1 "158" - (seq "'''" (star (seq (opt (alt "'" "''")) (alt (range "^'\\") ECHAR))) "'''")) + (seq "'''" (seq (opt (alt "'" "''")) (range "^'] | ECHAR ))* \"'''\"")))) (terminal STRING_LITERAL_LONG2 "159" - (seq "\"\"\"" (star (seq (opt (alt "\"" "\"\"")) (alt (range "^\"\\") ECHAR))) "\"\"\"")) - (terminal ECHAR "160" (seq "\\" (range "tbnrf\\\"'"))) + (seq "\"\"\"" (seq (opt (alt "\"" "\"\"")) (range "^\"] | ECHAR ))* '\"\"\"'")))) + (terminal ECHAR "160" (seq "\\" (range "tbnrf\"'"))) (terminal NIL "161" (seq "(" (star WS) ")")) (terminal WS "162" (alt (hex "#x20") (hex "#x9") (hex "#xD") (hex "#xA"))) (terminal ANON "163" (seq "[" (star WS) "]")) diff --git a/etc/turtle.sxp b/etc/turtle.sxp index 720c758..aea60b5 100644 --- a/etc/turtle.sxp +++ b/etc/turtle.sxp @@ -30,8 +30,7 @@ (rule PrefixedName "136s" (alt PNAME_LN PNAME_NS)) (rule BlankNode "137s" (alt BLANK_NODE_LABEL ANON)) (terminals _terminals (seq)) - (terminal IRIREF "18" - (seq "<" (star (alt (diff (range "^<>\"{}|^`\\") (range "#x00-#x20")) UCHAR)) ">")) + (terminal IRIREF "18" (seq "<" (star (alt (range "^<>\"{}|^`]-[#x00-#x20") UCHAR)) ">")) (terminal PNAME_NS "139s" (seq (opt PN_PREFIX) ":")) (terminal PNAME_LN "140s" (seq PNAME_NS PN_LOCAL)) (terminal BLANK_NODE_LABEL "141s" @@ -54,12 +53,12 @@ (terminal STRING_LITERAL_SINGLE_QUOTE "23" (seq "'" (star (alt (range "^#x27#x5C#xA#xD") ECHAR UCHAR)) "'")) (terminal STRING_LITERAL_LONG_SINGLE_QUOTE "24" - (seq "'''" (star (seq (opt (alt "'" "''")) (alt (range "^'\\") ECHAR UCHAR))) "'''")) + (seq "'''" (seq (opt (alt "'" "''")) (range "^'] | ECHAR | UCHAR ))* \"'''\"")))) (terminal STRING_LITERAL_LONG_QUOTE "25" - (seq "\"\"\"" (star (seq (opt (alt "\"" "\"\"")) (alt (range "^\"\\") ECHAR UCHAR))) "\"\"\"")) + (seq "\"\"\"" (seq (opt (alt "\"" "\"\"")) (range "^\"] | ECHAR | UCHAR ))* '\"\"\"'")))) (terminal UCHAR "26" - (alt (seq "\\u" HEX HEX HEX HEX) (seq "\\U" HEX HEX HEX HEX HEX HEX HEX HEX))) - (terminal ECHAR "159s" (seq "\\" (range "tbnrf\\\"'"))) + (alt (seq "u" HEX HEX HEX HEX) (seq "U" HEX HEX HEX HEX HEX HEX HEX HEX))) + (terminal ECHAR "159s" (seq "\\" (range "tbnrf\"'"))) (terminal SPARQL_PREFIX "28t" (seq (range "Pp") (range "Rr") (range "Ee") (range "Ff") (range "Ii") (range "Xx"))) (terminal SPARQL_BASE "29t" (seq (range "Bb") (range "Aa") (range "Ss") (range "Ee"))) diff --git a/examples/ebnf-ll1-parser/doc/parser.html b/examples/ebnf-ll1-parser/doc/parser.html index 526fdeb..749c33e 100644 --- a/examples/ebnf-ll1-parser/doc/parser.html +++ b/examples/ebnf-ll1-parser/doc/parser.html @@ -590,7 +590,7 @@

Terminals

  terminal(:HEX, HEX) do |prod, token, input|
-    input[:terminal] = token.value
+    input[:terminal] = [:hex, token.value]
   end
@@ -599,41 +599,9 @@

Terminals

-

Terminal for ENUM is matched as part of a primary rule. Unescape the values to remove EBNF escapes in the input.

- -
[14] ENUM       ::= ('[' R_CHAR+ | HEX+ ']') - LHS
-
- - -
  terminal(:ENUM, ENUM, unescape: true) do |prod, token, input|
-    input[:terminal] = [:range, token.value[1..-2]]
-  end
- - - - -
- -
-

Terminal for O_ENUM is matched as part of a primary rule. Unescape the values to remove EBNF escapes in the input.

- -
[15] O_ENUM     ::= '[^' R_CHAR+ | HEX+ ']'
-
- - -
  terminal(:O_ENUM, O_ENUM, unescape: true) do |prod, token, input|
-    input[:terminal] = [:range, token.value[1..-2]]
-  end
- - - - -
- -

Terminal for RANGE is matched as part of a primary rule. Unescape the values to remove EBNF escapes in the input.

-
[16] `RANGE`      ::= '[' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']'
+
[14] `RANGE`      ::= '[' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']'
 
@@ -642,14 +610,14 @@

Terminals

end
- +
- +

Terminal for O_RANGE is matched as part of a primary rule. Unescape the values to remove EBNF escapes in the input.

-
[17] O_RANGE    ::= '[^' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']'
+
[15] O_RANGE    ::= '[^' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']'
 
@@ -658,10 +626,10 @@

Terminals

end
- +
- +

Strings have internal escape sequences expanded and are passed through without surrounding quotes as terminals

@@ -669,14 +637,14 @@

Terminals

- +
- +

Match double quote string

-
[18] STRING1    ::= '"' (CHAR - '"')* '"'
+
[16] STRING1    ::= '"' (CHAR - '"')* '"'
 
@@ -685,14 +653,14 @@

Terminals

end
- +
- +

Match single quote string

-
[19] STRING2    ::= "'" (CHAR - "'")* "'"
+
[17] STRING2    ::= "'" (CHAR - "'")* "'"
 
@@ -701,10 +669,10 @@

Terminals

end
- +
- +

The CHAR and R_CHAR productions are not used explicitly

@@ -712,14 +680,14 @@

Terminals

- +
- +

Match POSTFIX terminal

-
[22] POSTFIX    ::= [?*+]
+
[20] POSTFIX    ::= [?*+]
 
@@ -728,10 +696,10 @@

Terminals

end
- +
- +

The PASS productions is not used explicitly

@@ -739,10 +707,10 @@

Terminals

- +
- +

Make sure we recognize string terminals, even though they're not actually used in processing. This defines a "catch-all" terminal for the lexer.

@@ -771,10 +739,10 @@

Non-terminal productions

- +
- +

Production for end of declaration non-terminal.

@@ -789,27 +757,27 @@

Non-terminal productions

  production(:declaration) do |input, data, callback|
- +
- +

data contains a declaration. Invoke callback

    if data[:terminal]
-      callback.call(:terminal, data[:terminal])
+      callback.call(:terminals, data[:terminal])
     elsif data[:pass]
       callback.call(:pass, data[:pass])
     end
   end
- +
- +

Production for end of rule non-terminal. The input parameter includes information placed by previous productions at the same level, or at the start of the current production. @@ -825,10 +793,10 @@

Non-terminal productions

  production(:rule) do |input, data, callback|
- +
- +

data contains an expression. Invoke callback

@@ -839,10 +807,10 @@

Non-terminal productions

end - +
- +

Production for end of expression non-terminal. Passes through the optimized value of the alt production as follows:

@@ -859,10 +827,10 @@

Non-terminal productions

end - +
- +

Production for end of alt non-terminal. Passes through the optimized value of the seq production as follows:

@@ -886,10 +854,10 @@

Non-terminal productions

end - +
- +

Production for end of seq non-terminal. Passes through the optimized value of the diff production as follows:

@@ -914,10 +882,10 @@

Non-terminal productions

end - +
- +

Diff production returns concatenated postfix values

@@ -936,10 +904,10 @@

Non-terminal productions

end - +
- +

Production for end of postfix non-terminal. Either returns the primary production value, or as modified by the postfix.

@@ -956,10 +924,10 @@

Non-terminal productions

  production(:postfix) do |input, data, callback|
- +
- +

Push result onto input stack, as the diff production can have some number of postfix values that are applied recursively

@@ -974,10 +942,10 @@

Non-terminal productions

end - +
- +

Production for end of primary non-terminal. Places :primary on the stack

@@ -1001,10 +969,10 @@

Non-terminal productions

end - +
- +

Production for end of pass non-terminal.

@@ -1038,10 +1006,10 @@

Parser invocation.

  def initialize(input, **options, &block)
- +
- +

Read input, if necessary, which will be used in a Scanner which feads the Lexer.

@@ -1049,10 +1017,10 @@

Parser invocation.

    @input = input.respond_to?(:read) ? input.read : input.to_s
- +
- +

If the level option is set, instantiate a logger for collecting trace information.

@@ -1074,27 +1042,27 @@

Parser invocation.

**options ) do |context, *data| rule = case context - when :terminal + when :terminals - +
- +

After parsing @terminals This changes the state of the parser to treat subsequent rules as terminals.

        parsing_terminals = true
-        next
+        rule = EBNF::Rule.new(nil, nil, data.first, kind: :terminals)
       when :pass
- +
- +

After parsing @pass This defines a specific rule for whitespace.

@@ -1104,10 +1072,10 @@

Parser invocation.

when :rule - +
- +

A rule which has already been turned into a Rule object.

@@ -1122,10 +1090,10 @@

Parser invocation.

end - +
- +

Output formatted S-Expression of grammar

@@ -1134,10 +1102,10 @@

Parser invocation.

require 'sxp' unless defined?(SXP) - +
- +

Output rules as a formatted S-Expression

diff --git a/examples/ebnf-ll1-parser/meta.rb b/examples/ebnf-ll1-parser/meta.rb index 7cd0756..a0e98c5 100644 --- a/examples/ebnf-ll1-parser/meta.rb +++ b/examples/ebnf-ll1-parser/meta.rb @@ -5,9 +5,7 @@ module EBNFParserMeta BRANCH = { :alt => { "(" => [:seq, :_alt_1], - :ENUM => [:seq, :_alt_1], :HEX => [:seq, :_alt_1], - :O_ENUM => [:seq, :_alt_1], :O_RANGE => [:seq, :_alt_1], :RANGE => [:seq, :_alt_1], :STRING1 => [:seq, :_alt_1], @@ -33,9 +31,7 @@ module EBNFParserMeta }, :diff => { "(" => [:postfix, :_diff_1], - :ENUM => [:postfix, :_diff_1], :HEX => [:postfix, :_diff_1], - :O_ENUM => [:postfix, :_diff_1], :O_RANGE => [:postfix, :_diff_1], :RANGE => [:postfix, :_diff_1], :STRING1 => [:postfix, :_diff_1], @@ -48,10 +44,8 @@ module EBNFParserMeta "-" => [:_diff_2], "@pass" => [], "@terminals" => [], - :ENUM => [], :HEX => [], :LHS => [], - :O_ENUM => [], :O_RANGE => [], :RANGE => [], :STRING1 => [], @@ -79,9 +73,7 @@ module EBNFParserMeta }, :expression => { "(" => [:alt], - :ENUM => [:alt], :HEX => [:alt], - :O_ENUM => [:alt], :O_RANGE => [:alt], :RANGE => [:alt], :STRING1 => [:alt], @@ -93,9 +85,7 @@ module EBNFParserMeta }, :postfix => { "(" => [:primary, :_postfix_1], - :ENUM => [:primary, :_postfix_1], :HEX => [:primary, :_postfix_1], - :O_ENUM => [:primary, :_postfix_1], :O_RANGE => [:primary, :_postfix_1], :RANGE => [:primary, :_postfix_1], :STRING1 => [:primary, :_postfix_1], @@ -108,10 +98,8 @@ module EBNFParserMeta "-" => [], "@pass" => [], "@terminals" => [], - :ENUM => [], :HEX => [], :LHS => [], - :O_ENUM => [], :O_RANGE => [], :POSTFIX => [:POSTFIX], :RANGE => [], @@ -122,9 +110,7 @@ module EBNFParserMeta }, :primary => { "(" => [:_primary_1], - :ENUM => [:ENUM], :HEX => [:HEX], - :O_ENUM => [:O_ENUM], :O_RANGE => [:O_RANGE], :RANGE => [:RANGE], :STRING1 => [:STRING1], @@ -139,9 +125,7 @@ module EBNFParserMeta }, :seq => { "(" => [:diff, :_seq_1], - :ENUM => [:diff, :_seq_1], :HEX => [:diff, :_seq_1], - :O_ENUM => [:diff, :_seq_1], :O_RANGE => [:diff, :_seq_1], :RANGE => [:diff, :_seq_1], :STRING1 => [:diff, :_seq_1], @@ -153,10 +137,8 @@ module EBNFParserMeta ")" => [], "@pass" => [], "@terminals" => [], - :ENUM => [:_seq_2], :HEX => [:_seq_2], :LHS => [], - :O_ENUM => [:_seq_2], :O_RANGE => [:_seq_2], :RANGE => [:_seq_2], :STRING1 => [:_seq_2], @@ -166,9 +148,7 @@ module EBNFParserMeta }, :_seq_2 => { "(" => [:diff, :_seq_1], - :ENUM => [:diff, :_seq_1], :HEX => [:diff, :_seq_1], - :O_ENUM => [:diff, :_seq_1], :O_RANGE => [:diff, :_seq_1], :RANGE => [:diff, :_seq_1], :STRING1 => [:diff, :_seq_1], @@ -182,10 +162,8 @@ module EBNFParserMeta "-", "@pass", "@terminals", - :ENUM, :HEX, :LHS, - :O_ENUM, :O_RANGE, :POSTFIX, :RANGE, @@ -198,8 +176,6 @@ module EBNFParserMeta :alt => [ :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -221,8 +197,6 @@ module EBNFParserMeta :_alt_6 => [ :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -234,8 +208,6 @@ module EBNFParserMeta :diff => [ :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -252,8 +224,6 @@ module EBNFParserMeta :_diff_4 => [ :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -282,8 +252,6 @@ module EBNFParserMeta :expression => [ :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -294,8 +262,6 @@ module EBNFParserMeta :_pass_1 => [ :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -304,8 +270,6 @@ module EBNFParserMeta :postfix => [ :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -320,8 +284,6 @@ module EBNFParserMeta :primary => [ :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -332,8 +294,6 @@ module EBNFParserMeta :_primary_2 => [ :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -346,8 +306,6 @@ module EBNFParserMeta :_rule_1 => [ :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -356,8 +314,6 @@ module EBNFParserMeta :seq => [ :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -367,8 +323,6 @@ module EBNFParserMeta :_eps, :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -377,8 +331,6 @@ module EBNFParserMeta :_seq_2 => [ :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -388,8 +340,6 @@ module EBNFParserMeta :_eps, :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -399,8 +349,6 @@ module EBNFParserMeta :_eps, :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -463,8 +411,6 @@ module EBNFParserMeta :_eof, :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -479,8 +425,6 @@ module EBNFParserMeta :_eof, :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -495,8 +439,6 @@ module EBNFParserMeta :_eof, :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -511,8 +453,6 @@ module EBNFParserMeta :_eof, :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -527,8 +467,6 @@ module EBNFParserMeta :_eof, :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -571,8 +509,6 @@ module EBNFParserMeta :_eof, :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -588,8 +524,6 @@ module EBNFParserMeta :_eof, :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -605,8 +539,6 @@ module EBNFParserMeta :_eof, :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -623,8 +555,6 @@ module EBNFParserMeta :_eof, :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -641,8 +571,6 @@ module EBNFParserMeta :_eof, :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -659,8 +587,6 @@ module EBNFParserMeta :_eof, :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, @@ -677,8 +603,6 @@ module EBNFParserMeta :_eof, :HEX, :SYMBOL, - :ENUM, - :O_ENUM, :RANGE, :O_RANGE, :STRING1, diff --git a/examples/ebnf-ll1-parser/parser.rb b/examples/ebnf-ll1-parser/parser.rb index 2176c6c..e83555a 100644 --- a/examples/ebnf-ll1-parser/parser.rb +++ b/examples/ebnf-ll1-parser/parser.rb @@ -70,30 +70,16 @@ def inspect input[:terminal] = [:hex, token.value] end - # Terminal for `ENUM` is matched as part of a `primary` rule. Unescape the values to remove EBNF escapes in the input. - # - # [14] ENUM ::= ('[' R_CHAR+ | HEX+ ']') - LHS - terminal(:ENUM, ENUM, unescape: true) do |prod, token, input| - input[:terminal] = [:range, token.value[1..-2]] - end - - # Terminal for `O_ENUM` is matched as part of a `primary` rule. Unescape the values to remove EBNF escapes in the input. - # - # [15] O_ENUM ::= '[^' R_CHAR+ | HEX+ ']' - terminal(:O_ENUM, O_ENUM, unescape: true) do |prod, token, input| - input[:terminal] = [:range, token.value[1..-2]] - end - # Terminal for `RANGE` is matched as part of a `primary` rule. Unescape the values to remove EBNF escapes in the input. # - # [16] `RANGE` ::= '[' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']' + # [14] `RANGE` ::= '[' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']' terminal(:RANGE, RANGE, unescape: true) do |prod, token, input| input[:terminal] = [:range, token.value[1..-2]] end # Terminal for `O_RANGE` is matched as part of a `primary` rule. Unescape the values to remove EBNF escapes in the input. # - # [17] O_RANGE ::= '[^' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']' + # [15] O_RANGE ::= '[^' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']' terminal(:O_RANGE, O_RANGE, unescape: true) do |prod, token, input| input[:terminal] = [:range, token.value[1..-2]] end @@ -102,14 +88,14 @@ def inspect # Match double quote string # - # [18] STRING1 ::= '"' (CHAR - '"')* '"' + # [16] STRING1 ::= '"' (CHAR - '"')* '"' terminal(:STRING1, STRING1, unescape: true) do |prod, token, input| input[:terminal] = token.value[1..-2] end # Match single quote string # - # [19] STRING2 ::= "'" (CHAR - "'")* "'" + # [17] STRING2 ::= "'" (CHAR - "'")* "'" terminal(:STRING2, STRING2, unescape: true) do |prod, token, input| input[:terminal] = token.value[1..-2] end @@ -118,7 +104,7 @@ def inspect # Match `POSTFIX` terminal # - # [22] POSTFIX ::= [?*+] + # [20] POSTFIX ::= [?*+] terminal(:POSTFIX, POSTFIX) do |prod, token, input| input[:postfix] = token.value end diff --git a/examples/ebnf-peg-parser/doc/parser.html b/examples/ebnf-peg-parser/doc/parser.html index cefa617..02a18f0 100644 --- a/examples/ebnf-peg-parser/doc/parser.html +++ b/examples/ebnf-peg-parser/doc/parser.html @@ -562,7 +562,9 @@

Terminals

-
  terminal(:HEX, HEX)
+
  terminal(:HEX, HEX) do |value|
+    [:hex, value]
+  end
@@ -570,41 +572,9 @@

Terminals

-

Terminal for ENUM is matched as part of a primary rule.

- -
[14] ENUM       ::= ('[' R_CHAR+ | HEX+ ']') - LHS
-
- - -
  terminal(:ENUM, ENUM) do |value|
-    [:range, value[1..-2]]
-  end
- - - - -
- -
-

Terminal for O_ENUM is matched as part of a primary rule.

- -
[15] O_ENUM     ::= '[^' R_CHAR+ | HEX+ ']'
-
- - -
  terminal(:O_ENUM, O_ENUM) do |value|
-    [:range, value[1..-2]]
-  end
- - - - -
- -

Terminal for RANGE is matched as part of a primary rule.

-
[16] `RANGE`      ::= '[' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']'
+
[14] `RANGE`      ::= '[' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']'
 
@@ -613,14 +583,14 @@

Terminals

end
- +
- +

Terminal for O_RANGE is matched as part of a primary rule.

-
[17] O_RANGE    ::= '[^' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']'
+
[15] O_RANGE    ::= '[^' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']'
 
@@ -629,14 +599,14 @@

Terminals

end
- +
- +

Match double quote string

-
[18] STRING1    ::= '"' (CHAR - '"')* '"'
+
[16] STRING1    ::= '"' (CHAR - '"')* '"'
 
@@ -645,14 +615,14 @@

Terminals

end
- +
- +

Match single quote string

-
[19] STRING2    ::= "'" (CHAR - "'")* "'"
+
[17] STRING2    ::= "'" (CHAR - "'")* "'"
 
@@ -661,10 +631,10 @@

Terminals

end
- +
- +

The CHAR and R_CHAR productions are not used explicitly

@@ -672,24 +642,24 @@

Terminals

- +
- +

Match POSTFIX terminal

-
[22] POSTFIX    ::= [?*+]
+
[20] POSTFIX    ::= [?*+]
 
  terminal(:POSTFIX, POSTFIX)
- +
- +

The PASS productions is not used explicitly

@@ -720,10 +690,10 @@

Non-terminal productions

- +
- +

Production for end of declaration non-terminal.

@@ -740,24 +710,24 @@

Non-terminal productions

  production(:declaration, clear_packrat: true) do |value, data, callback|
- +
- +

value contains a declaration. Invoke callback

-
    callback.call(:terminal) if value == '@terminals'
+        
    callback.call(:terminals) if value == '@terminals'
     nil
   end
- +
- +

Production for end of rule non-terminal.

@@ -775,10 +745,10 @@

Non-terminal productions

production(:rule, clear_packrat: true) do |value, data, callback|
- +
- +

value contains an expression. Invoke callback

@@ -791,10 +761,10 @@

Non-terminal productions

end
- +
- +

Production for end of expression non-terminal. Passes through the optimized value of the alt production as follows:

@@ -813,10 +783,10 @@

Non-terminal productions

end - +
- +

Production for end of alt non-terminal. Passes through the optimized value of the seq production as follows:

@@ -843,10 +813,10 @@

Non-terminal productions

end - +
- +

Production for end of _alt_1 non-terminal. Used to collect the ('|' seq)* portion of the alt non-terminal:

@@ -862,10 +832,10 @@

Non-terminal productions

end - +
- +

Production for end of seq non-terminal. Passes through the optimized value of the diff production as follows:

@@ -887,10 +857,10 @@

Non-terminal productions

end - +
- +

Diff production returns concatenated postfix values

@@ -914,10 +884,10 @@

Non-terminal productions

end - +
- +

Production for end of postfix non-terminal. Either returns the primary production value, or as modified by the postfix.

@@ -937,10 +907,10 @@

Non-terminal productions

production(:postfix) do |value| - +
- +

Push result onto input stack, as the diff production can have some number of postfix values that are applied recursively

@@ -954,10 +924,10 @@

Non-terminal productions

end - +
- +

Production for end of primary non-terminal. Places :primary on the stack

@@ -983,10 +953,10 @@

Non-terminal productions

end - +
- +

Production for end of pass non-terminal.

@@ -997,10 +967,10 @@

Non-terminal productions

  production(:pass) do |value, data, callback|
- +
- +

Invoke callback

@@ -1028,10 +998,10 @@

Parser invocation.

  def initialize(input, **options, &block)
- +
- +

If the level option is set, instantiate a logger for collecting trace information.

@@ -1043,10 +1013,10 @@

Parser invocation.

end - +
- +

Read input, if necessary, which will be used in a Scanner.

@@ -1058,10 +1028,10 @@

Parser invocation.

parse(@input, :ebnf, EBNFPegMeta::RULES, - +
- +

Use an optimized Regexp for whitespace

@@ -1070,27 +1040,27 @@

Parser invocation.

**options ) do |context, *data| rule = case context - when :terminal + when :terminals - +
- +

After parsing @terminals This changes the state of the parser to treat subsequent rules as terminals.

        parsing_terminals = true
-        next
+        rule = EBNF::Rule.new(nil, nil, data.first, kind: :terminals)
       when :pass
- +
- +

After parsing @pass This defines a specific rule for whitespace.

@@ -1100,10 +1070,10 @@

Parser invocation.

when :rule - +
- +

A rule which has already been turned into a Rule object.

@@ -1118,10 +1088,10 @@

Parser invocation.

end - +
- +

Output formatted S-Expression of grammar

@@ -1130,10 +1100,10 @@

Parser invocation.

require 'sxp' unless defined?(SXP) - +
- +

Output rules as a formatted S-Expression

diff --git a/examples/ebnf-peg-parser/meta.rb b/examples/ebnf-peg-parser/meta.rb index 2aaa876..2969f09 100644 --- a/examples/ebnf-peg-parser/meta.rb +++ b/examples/ebnf-peg-parser/meta.rb @@ -16,9 +16,10 @@ module EBNFPegMeta EBNF::Rule.new(:_diff_2, "7.2", [:seq, "-", :postfix]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:postfix, "8", [:seq, :primary, :_postfix_1]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_postfix_1, "8.1", [:opt, :POSTFIX]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:primary, "9", [:alt, :HEX, :SYMBOL, :ENUM, :O_ENUM, :RANGE, :O_RANGE, :STRING1, :STRING2, :_primary_1]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:primary, "9", [:alt, :HEX, :SYMBOL, :RANGE, :O_RANGE, :STRING1, :STRING2, :_primary_1]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_primary_1, "9.1", [:seq, "(", :expression, ")"]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:pass, "10", [:seq, "@pass", :expression]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule), EBNF::Rule.new(:LHS, "11", [:seq, :_LHS_1, :SYMBOL, :_LHS_2, "::="], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_LHS_1, "11.1", [:opt, :_LHS_3], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_LHS_3, "11.3", [:seq, "[", :SYMBOL, "]", :_LHS_4], kind: :terminal).extend(EBNF::PEG::Rule), @@ -35,62 +36,54 @@ module EBNFPegMeta EBNF::Rule.new(:_HEX_3, "13.3", [:range, "a-f"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_HEX_4, "13.4", [:range, "A-F"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_HEX_5, "13.5", [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:ENUM, "14", [:diff, :_ENUM_1, :LHS], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_ENUM_1, "14.1", [:alt, :_ENUM_2, :_ENUM_3], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_ENUM_2, "14.2", [:seq, "[", :_ENUM_4], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_ENUM_4, "14.4", [:plus, :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_ENUM_3, "14.3", [:seq, :_ENUM_5, "]"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_ENUM_5, "14.5", [:plus, :HEX], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:O_ENUM, "15", [:alt, :_O_ENUM_1, :_O_ENUM_2], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_ENUM_1, "15.1", [:seq, "[^", :_O_ENUM_3], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_ENUM_3, "15.3", [:plus, :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_ENUM_2, "15.2", [:seq, :_O_ENUM_4, "]"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_ENUM_4, "15.4", [:plus, :HEX], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:RANGE, "16", [:seq, "[", :_RANGE_1, "]"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_RANGE_1, "16.1", [:plus, :_RANGE_2], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_RANGE_2, "16.2", [:alt, :_RANGE_3, :_RANGE_4], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_RANGE_3, "16.3", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_RANGE_4, "16.4", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:O_RANGE, "17", [:seq, "[^", :_O_RANGE_1, "]"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_RANGE_1, "17.1", [:plus, :_O_RANGE_2], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_RANGE_2, "17.2", [:alt, :_O_RANGE_3, :_O_RANGE_4], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_RANGE_3, "17.3", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_RANGE_4, "17.4", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:STRING1, "18", [:seq, "\"", :_STRING1_1, "\""], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_STRING1_1, "18.1", [:star, :_STRING1_2], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_STRING1_2, "18.2", [:diff, :CHAR, "\""], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:STRING2, "19", [:seq, "'", :_STRING2_1, "'"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_STRING2_1, "19.1", [:star, :_STRING2_2], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_STRING2_2, "19.2", [:diff, :CHAR, "'"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:CHAR, "20", [:alt, :_CHAR_1, :_CHAR_2, :_CHAR_3, :_CHAR_4], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_CHAR_1, "20.1", [:range, "#x9#xA#xD"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_CHAR_2, "20.2", [:range, "#x20-#xD7FF"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_CHAR_3, "20.3", [:range, "#xE000-#xFFFD"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_CHAR_4, "20.4", [:range, "#x10000-#x10FFFF"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:POSTFIX, "22", [:range, "?*+"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:PASS, "23", [:plus, :_PASS_1], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_1, "23.1", [:alt, :_PASS_2, :_PASS_3, :_PASS_4, :_PASS_5], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_2, "23.2", [:range, "#x00-#x20"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_3, "23.3", [:seq, :_PASS_6, :_PASS_7], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_6, "23.6", [:alt, :_PASS_8, "//"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_8, "23.8", [:diff, "#", "#x"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_7, "23.7", [:star, :_PASS_9], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_9, "23.9", [:range, "^#x0A#x0Dx"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_4, "23.4", [:seq, "/*", :_PASS_10, "*/"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_10, "23.10", [:star, :_PASS_11], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_11, "23.11", [:alt, :_PASS_12, :_PASS_13], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_12, "23.12", [:opt, :_PASS_14], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_14, "23.14", [:seq, "*", :_PASS_15], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_15, "23.15", [:range, "^/"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_13, "23.13", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_5, "23.5", [:seq, "(*", :_PASS_16, "*)"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_16, "23.16", [:star, :_PASS_17], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_17, "23.17", [:alt, :_PASS_18, :_PASS_19], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_18, "23.18", [:opt, :_PASS_20], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_20, "23.20", [:seq, "*", :_PASS_21], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_21, "23.21", [:range, "^)"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_19, "23.19", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:RANGE, "14", [:seq, "[", :_RANGE_1, :_RANGE_2, :_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_1, "14.1", [:plus, :_RANGE_4], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_4, "14.4", [:alt, :_RANGE_5, :_RANGE_6, :R_CHAR, :HEX], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_5, "14.5", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_6, "14.6", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_2, "14.2", [:opt, "-"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_3, "14.3", [:diff, "]", :LHS], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:O_RANGE, "15", [:seq, "[^", :_O_RANGE_1, :_O_RANGE_2, "]"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_1, "15.1", [:plus, :_O_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_3, "15.3", [:alt, :_O_RANGE_4, :_O_RANGE_5, :R_CHAR, :HEX], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_4, "15.4", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_5, "15.5", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_2, "15.2", [:opt, "-"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:STRING1, "16", [:seq, "\"", :_STRING1_1, "\""], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_STRING1_1, "16.1", [:star, :_STRING1_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_STRING1_2, "16.2", [:diff, :CHAR, "\""], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:STRING2, "17", [:seq, "'", :_STRING2_1, "'"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_STRING2_1, "17.1", [:star, :_STRING2_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_STRING2_2, "17.2", [:diff, :CHAR, "'"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:CHAR, "18", [:alt, :_CHAR_1, :_CHAR_2, :_CHAR_3, :_CHAR_4], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_CHAR_1, "18.1", [:range, "#x9#xA#xD"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_CHAR_2, "18.2", [:range, "#x20-#xD7FF"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_CHAR_3, "18.3", [:range, "#xE000-#xFFFD"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_CHAR_4, "18.4", [:range, "#x10000-#x10FFFF"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:R_CHAR, "19", [:diff, :CHAR, :_R_CHAR_1], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_R_CHAR_1, "19.1", [:alt, "]", "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:POSTFIX, "20", [:range, "?*+"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:PASS, "21", [:alt, :_PASS_1, :_PASS_2, :_PASS_3, :_PASS_4], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_1, "21.1", [:range, "#x9#xA#xD#x20"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_2, "21.2", [:seq, :_PASS_5, :_PASS_6], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_5, "21.5", [:alt, :_PASS_7, "//"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_7, "21.7", [:diff, "#", "#x"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_6, "21.6", [:star, :_PASS_8], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_8, "21.8", [:range, "^#xA#xD"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_3, "21.3", [:seq, "/*", :_PASS_9, "*/"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_9, "21.9", [:star, :_PASS_10], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_10, "21.10", [:alt, :_PASS_11, :_PASS_12], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_11, "21.11", [:opt, :_PASS_13], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_13, "21.13", [:seq, "*", :_PASS_14], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_14, "21.14", [:range, "^/"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_12, "21.12", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_4, "21.4", [:seq, "(*", :_PASS_15, "*)"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_15, "21.15", [:star, :_PASS_16], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_16, "21.16", [:alt, :_PASS_17, :_PASS_18], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_17, "21.17", [:opt, :_PASS_19], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_19, "21.19", [:seq, "*", :_PASS_20], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_20, "21.20", [:range, "^)"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_18, "21.18", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_pass, nil, [:seq, :PASS], kind: :pass).extend(EBNF::PEG::Rule), ] end diff --git a/examples/ebnf-peg-parser/parser.rb b/examples/ebnf-peg-parser/parser.rb index ce75ff8..0088051 100644 --- a/examples/ebnf-peg-parser/parser.rb +++ b/examples/ebnf-peg-parser/parser.rb @@ -51,44 +51,30 @@ class EBNFPegParser [:hex, value] end - # Terminal for `ENUM` is matched as part of a `primary` rule. - # - # [14] ENUM ::= ('[' R_CHAR+ | HEX+ ']') - LHS - terminal(:ENUM, ENUM) do |value| - [:range, value[1..-2]] - end - - # Terminal for `O_ENUM` is matched as part of a `primary` rule. - # - # [15] O_ENUM ::= '[^' R_CHAR+ | HEX+ ']' - terminal(:O_ENUM, O_ENUM) do |value| - [:range, value[1..-2]] - end - # Terminal for `RANGE` is matched as part of a `primary` rule. # - # [16] `RANGE` ::= '[' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']' + # [14] `RANGE` ::= '[' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']' terminal(:RANGE, RANGE) do |value| [:range, value[1..-2]] end # Terminal for `O_RANGE` is matched as part of a `primary` rule. # - # [17] O_RANGE ::= '[^' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']' + # [15] O_RANGE ::= '[^' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']' terminal(:O_RANGE, O_RANGE) do |value| [:range, value[1..-2]] end # Match double quote string # - # [18] STRING1 ::= '"' (CHAR - '"')* '"' + # [16] STRING1 ::= '"' (CHAR - '"')* '"' terminal(:STRING1, STRING1) do |value| value[1..-2] end # Match single quote string # - # [19] STRING2 ::= "'" (CHAR - "'")* "'" + # [17] STRING2 ::= "'" (CHAR - "'")* "'" terminal(:STRING2, STRING2) do |value| value[1..-2] end @@ -97,7 +83,7 @@ class EBNFPegParser # Match `POSTFIX` terminal # - # [22] POSTFIX ::= [?*+] + # [20] POSTFIX ::= [?*+] terminal(:POSTFIX, POSTFIX) # The `PASS` productions is not used explicitly diff --git a/lib/ebnf/base.rb b/lib/ebnf/base.rb index 2c685c2..243e1df 100644 --- a/lib/ebnf/base.rb +++ b/lib/ebnf/base.rb @@ -112,7 +112,6 @@ class Base def initialize(input, format: :ebnf, **options) @options = options.dup @lineno, @depth, @errors = 1, 0, [] - terminal = false @ast = [] input = input.respond_to?(:read) ? input.read : input.to_s @@ -128,6 +127,7 @@ def initialize(input, format: :ebnf, **options) iso = ISOEBNF.new(input, **options) @ast = iso.ast when :native + terminals = false scanner = StringScanner.new(input) eachRule(scanner) do |r| @@ -135,7 +135,9 @@ def initialize(input, format: :ebnf, **options) case r when /^@terminals/ # Switch mode to parsing terminals - terminal = true + terminals = true + rule = Rule.new(nil, nil, nil, kind: :terminals, ebnf: self) + @ast << rule when /^@pass\s*(.*)$/m expr = expression($1).first rule = Rule.new(nil, nil, expr, kind: :pass, ebnf: self) @@ -144,7 +146,7 @@ def initialize(input, format: :ebnf, **options) else rule = depth {ruleParts(r)} - rule.kind = :terminal if terminal # Override after we've parsed @terminals + rule.kind = :terminal if terminals # Override after we've parsed @terminals rule.orig = r @ast << rule end diff --git a/lib/ebnf/ebnf/meta.rb b/lib/ebnf/ebnf/meta.rb index 209136a..03a002c 100644 --- a/lib/ebnf/ebnf/meta.rb +++ b/lib/ebnf/ebnf/meta.rb @@ -1,5 +1,5 @@ # This file is automatically generated by ebnf version 2.0.0 -# Derived from ../etc/ebnf.ebnf +# Derived from etc/ebnf.ebnf module EBNFMeta RULES = [ EBNF::Rule.new(:ebnf, "1", [:star, :_ebnf_1]).extend(EBNF::PEG::Rule), @@ -16,7 +16,7 @@ module EBNFMeta EBNF::Rule.new(:_diff_2, "7.2", [:seq, "-", :postfix]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:postfix, "8", [:seq, :primary, :_postfix_1]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_postfix_1, "8.1", [:opt, :POSTFIX]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:primary, "9", [:alt, :HEX, :SYMBOL, :ENUM, :O_ENUM, :RANGE, :O_RANGE, :STRING1, :STRING2, :_primary_1]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:primary, "9", [:alt, :HEX, :SYMBOL, :RANGE, :O_RANGE, :STRING1, :STRING2, :_primary_1]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_primary_1, "9.1", [:seq, "(", :expression, ")"]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:pass, "10", [:seq, "@pass", :expression]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:LHS, "11", [:seq, :_LHS_1, :SYMBOL, :_LHS_2, "::="], kind: :terminal).extend(EBNF::PEG::Rule), @@ -35,60 +35,54 @@ module EBNFMeta EBNF::Rule.new(:_HEX_3, "13.3", [:range, "a-f"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_HEX_4, "13.4", [:range, "A-F"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_HEX_5, "13.5", [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:ENUM, "14", [:diff, :_ENUM_1, :LHS], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_ENUM_1, "14.1", [:alt, :_ENUM_2, :_ENUM_3], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_ENUM_2, "14.2", [:seq, "[", :_ENUM_4], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_ENUM_4, "14.4", [:plus, :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_ENUM_3, "14.3", [:seq, :_ENUM_5, "]"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_ENUM_5, "14.5", [:plus, :HEX], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:O_ENUM, "15", [:alt, :_O_ENUM_1, :_O_ENUM_2], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_ENUM_1, "15.1", [:seq, "[^", :_O_ENUM_3], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_ENUM_3, "15.3", [:plus, :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_ENUM_2, "15.2", [:seq, :_O_ENUM_4, "]"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_ENUM_4, "15.4", [:plus, :HEX], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:RANGE, "16", [:seq, "[", :_RANGE_1, "]"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_RANGE_1, "16.1", [:alt, :_RANGE_2, :_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_RANGE_2, "16.2", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_RANGE_3, "16.3", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:O_RANGE, "17", [:seq, "[^", :_O_RANGE_1, "]"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_RANGE_1, "17.1", [:alt, :_O_RANGE_2, :_O_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_RANGE_2, "17.2", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_O_RANGE_3, "17.3", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:STRING1, "18", [:seq, "\"", :_STRING1_1, "\""], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_STRING1_1, "18.1", [:star, :_STRING1_2], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_STRING1_2, "18.2", [:diff, :CHAR, "\""], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:STRING2, "19", [:seq, "'", :_STRING2_1, "'"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_STRING2_1, "19.1", [:star, :_STRING2_2], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_STRING2_2, "19.2", [:diff, :CHAR, "'"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:CHAR, "20", [:alt, :_CHAR_1, :_CHAR_2, :_CHAR_3, :_CHAR_4], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_CHAR_1, "20.1", [:range, "#x9#xA#xD"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_CHAR_2, "20.2", [:range, "#x20-#xD7FF"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_CHAR_3, "20.3", [:range, "#xE000-#xFFFD"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_CHAR_4, "20.4", [:range, "#x10000-#x10FFFF"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:R_CHAR, "21", [:diff, :CHAR, "]"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:POSTFIX, "22", [:range, "?*+"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:PASS, "23", [:plus, :_PASS_1], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_1, "23.1", [:alt, :_PASS_2, :_PASS_3, :_PASS_4, :_PASS_5], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_2, "23.2", [:range, "#x9#xA#xD#x20"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_3, "23.3", [:seq, :_PASS_6, :_PASS_7], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_6, "23.6", [:alt, :_PASS_8, "//"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_8, "23.8", [:diff, "#", "#x"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_7, "23.7", [:star, :_PASS_9], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_9, "23.9", [:range, "^#xA#xD"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_4, "23.4", [:seq, "/*", :_PASS_10, "*/"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_10, "23.10", [:star, :_PASS_11], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_11, "23.11", [:alt, :_PASS_12, :_PASS_13], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_12, "23.12", [:opt, :_PASS_14], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_14, "23.14", [:seq, "*", :_PASS_15], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_15, "23.15", [:range, "^/"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_13, "23.13", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_5, "23.5", [:seq, "(*", :_PASS_16, "*)"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_16, "23.16", [:star, :_PASS_17], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_17, "23.17", [:alt, :_PASS_18, :_PASS_19], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_18, "23.18", [:opt, :_PASS_20], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_20, "23.20", [:seq, "*", :_PASS_21], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_21, "23.21", [:range, "^)"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_PASS_19, "23.19", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:RANGE, "14", [:seq, "[", :_RANGE_1, :_RANGE_2, :_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_1, "14.1", [:plus, :_RANGE_4], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_4, "14.4", [:alt, :_RANGE_5, :_RANGE_6, :R_CHAR, :HEX], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_5, "14.5", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_6, "14.6", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_2, "14.2", [:opt, "-"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_RANGE_3, "14.3", [:diff, "]", :LHS], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:O_RANGE, "15", [:seq, "[^", :_O_RANGE_1, :_O_RANGE_2, "]"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_1, "15.1", [:plus, :_O_RANGE_3], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_3, "15.3", [:alt, :_O_RANGE_4, :_O_RANGE_5, :R_CHAR, :HEX], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_4, "15.4", [:seq, :R_CHAR, "-", :R_CHAR], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_5, "15.5", [:seq, :HEX, "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_O_RANGE_2, "15.2", [:opt, "-"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:STRING1, "16", [:seq, "\"", :_STRING1_1, "\""], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_STRING1_1, "16.1", [:star, :_STRING1_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_STRING1_2, "16.2", [:diff, :CHAR, "\""], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:STRING2, "17", [:seq, "'", :_STRING2_1, "'"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_STRING2_1, "17.1", [:star, :_STRING2_2], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_STRING2_2, "17.2", [:diff, :CHAR, "'"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:CHAR, "18", [:alt, :_CHAR_1, :_CHAR_2, :_CHAR_3, :_CHAR_4], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_CHAR_1, "18.1", [:range, "#x9#xA#xD"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_CHAR_2, "18.2", [:range, "#x20-#xD7FF"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_CHAR_3, "18.3", [:range, "#xE000-#xFFFD"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_CHAR_4, "18.4", [:range, "#x10000-#x10FFFF"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:R_CHAR, "19", [:diff, :CHAR, :_R_CHAR_1], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_R_CHAR_1, "19.1", [:alt, "]", "-"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:POSTFIX, "20", [:range, "?*+"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:PASS, "21", [:alt, :_PASS_1, :_PASS_2, :_PASS_3, :_PASS_4], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_1, "21.1", [:range, "#x9#xA#xD#x20"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_2, "21.2", [:seq, :_PASS_5, :_PASS_6], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_5, "21.5", [:alt, :_PASS_7, "//"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_7, "21.7", [:diff, "#", "#x"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_6, "21.6", [:star, :_PASS_8], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_8, "21.8", [:range, "^#xA#xD"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_3, "21.3", [:seq, "/*", :_PASS_9, "*/"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_9, "21.9", [:star, :_PASS_10], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_10, "21.10", [:alt, :_PASS_11, :_PASS_12], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_11, "21.11", [:opt, :_PASS_13], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_13, "21.13", [:seq, "*", :_PASS_14], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_14, "21.14", [:range, "^/"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_12, "21.12", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_4, "21.4", [:seq, "(*", :_PASS_15, "*)"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_15, "21.15", [:star, :_PASS_16], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_16, "21.16", [:alt, :_PASS_17, :_PASS_18], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_17, "21.17", [:opt, :_PASS_19], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_19, "21.19", [:seq, "*", :_PASS_20], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_20, "21.20", [:range, "^)"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_PASS_18, "21.18", [:range, "^*"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_pass, nil, [:seq, :PASS], kind: :pass).extend(EBNF::PEG::Rule), ] end diff --git a/lib/ebnf/ll1.rb b/lib/ebnf/ll1.rb index 17ec82d..34214e7 100644 --- a/lib/ebnf/ll1.rb +++ b/lib/ebnf/ll1.rb @@ -9,9 +9,7 @@ module EBNF # BRANCH = { # :alt => { # "(" => [:seq, :_alt_1], - # :ENUM => [:seq, :_alt_1], # :HEX => [:seq, :_alt_1], - # :O_ENUM => [:seq, :_alt_1], # :O_RANGE => [:seq, :_alt_1], # :RANGE => [:seq, :_alt_1], # :STRING1 => [:seq, :_alt_1], @@ -38,8 +36,6 @@ module EBNF # :alt => [ # :HEX, # :SYMBOL, - # :ENUM, - # :O_ENUM, # :RANGE, # :O_RANGE, # :STRING1, @@ -54,7 +50,7 @@ module EBNF # # TERMINALS = ["(", ")", "-", # "@pass", "@terminals", - # :ENUM, :HEX, :LHS, :O_ENUM, :O_RANGE,:POSTFIX, + # :HEX, :LHS, :O_RANGE,:POSTFIX, # :RANGE, :STRING1, :STRING2, :SYMBOL,"|" # ].freeze # diff --git a/lib/ebnf/parser.rb b/lib/ebnf/parser.rb index 78e5b31..c475a10 100644 --- a/lib/ebnf/parser.rb +++ b/lib/ebnf/parser.rb @@ -45,44 +45,30 @@ class Parser [:hex, value] end - # Terminal for `ENUM` is matched as part of a `primary` rule. - # - # [14] ENUM ::= ('[' R_CHAR+ | HEX+ ']') - LHS - terminal(:ENUM, ENUM) do |value| - [:range, value[1..-2]] - end - - # Terminal for `O_ENUM` is matched as part of a `primary` rule. - # - # [15] O_ENUM ::= '[^' R_CHAR+ | HEX+ ']' - terminal(:O_ENUM, O_ENUM) do |value| - [:range, value[1..-2]] - end - # Terminal for `RANGE` is matched as part of a `primary` rule. # - # [16] `RANGE` ::= '[' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']' + # [14] RANGE ::= '[' ((R_CHAR '-' R_CHAR) | (HEX '-' HEX) | R_CHAR | HEX)+ '-'? ']' - LHS terminal(:RANGE, RANGE) do |value| [:range, value[1..-2]] end # Terminal for `O_RANGE` is matched as part of a `primary` rule. # - # [17] O_RANGE ::= '[^' (R_CHAR '-' R_CHAR) | (HEX '-' HEX) ']' + # [15] O_RANGE ::= '[^' ((R_CHAR '-' R_CHAR) | (HEX '-' HEX) | R_CHAR | HEX)+ '-'? ']' terminal(:O_RANGE, O_RANGE) do |value| [:range, value[1..-2]] end # Match double quote string # - # [18] STRING1 ::= '"' (CHAR - '"')* '"' + # [16] STRING1 ::= '"' (CHAR - '"')* '"' terminal(:STRING1, STRING1) do |value| value[1..-2] end # Match single quote string # - # [19] STRING2 ::= "'" (CHAR - "'")* "'" + # [17] STRING2 ::= "'" (CHAR - "'")* "'" terminal(:STRING2, STRING2) do |value| value[1..-2] end @@ -91,7 +77,7 @@ class Parser # Match `POSTFIX` terminal # - # [22] POSTFIX ::= [?*+] + # [20] POSTFIX ::= [?*+] terminal(:POSTFIX, POSTFIX) # The `PASS` productions is not used explicitly @@ -252,9 +238,7 @@ class Parser # [9] primary ::= HEX # | SYMBOL # | RANGE - # | ENUM # | O_RANGE - # | O_ENUM # | STRING1 # | STRING2 # | '(' expression ')' diff --git a/lib/ebnf/rule.rb b/lib/ebnf/rule.rb index e215111..f01e9c5 100644 --- a/lib/ebnf/rule.rb +++ b/lib/ebnf/rule.rb @@ -150,7 +150,7 @@ def initialize(sym, id, expr, kind: nil, ebnf: nil, first: nil, follow: nil, sta # @example inputs # (pass _pass (plus (range "#x20\\t\\r\\n"))) # (rule ebnf "1" (star (alt declaration rule))) - # (terminal O_ENUM "17" (seq "[^" (plus CHAR) "]")) + # (terminal R_CHAR "19" (diff CHAR (alt "]" "-"))) # # Also may have `(first ...)`, `(follow ...)`, or `(start #t)`. # @@ -589,11 +589,28 @@ def validate!(ast, expr = @expr) str = str[1..-1] if str.start_with?('^') str = str[0..-2] if str.end_with?('-') # Allowed at end of range scanner = StringScanner.new(str) + hex = rchar = in_range = false while !scanner.eos? - scanner.scan(/#{Terminals::HEX}-#{Terminals::HEX}/) || - scanner.scan(/#{Terminals::R_CHAR}-#{Terminals::R_CHAR}/) || - scanner.scan(/#{Terminals::HEX}|#{Terminals::R_CHAR}/) || - raise(SyntaxError, "Range contains illegal components at offset #{scanner.pos}: was #{expr.last}") + begin + if scanner.scan(Terminals::HEX) + raise SyntaxError if in_range && rchar + rchar = in_range = false + hex = true + elsif scanner.scan(Terminals::R_CHAR) + raise SyntaxError if in_range && hex + hex = in_range = false + rchar = true + else + raise(SyntaxError, "Range contains illegal components at offset #{scanner.pos}: was #{expr.last}") + end + + if scanner.scan(/\-/) + raise SyntaxError if in_range + in_range = true + end + rescue SyntaxError + raise(SyntaxError, "Range contains illegal components at offset #{scanner.pos}: was #{expr.last}") + end end else ([:alt, :diff].include?(expr.first) ? expr[1..-1] : expr[1,1]).each do |sym| diff --git a/lib/ebnf/terminals.rb b/lib/ebnf/terminals.rb index cf047a2..d35c3e4 100644 --- a/lib/ebnf/terminals.rb +++ b/lib/ebnf/terminals.rb @@ -6,12 +6,9 @@ module EBNF::Terminals HEX = %r(\#x\h+)u.freeze CHAR = %r([\u0009\u000A\u000D\u0020-\uD7FF\u{10000}-\u{10FFFF}])u.freeze R_CHAR = %r([\u0009\u000A\u000D\u0020-\u002C\u002E-\u005C\u005E-\uD7FF\u{10000}-\u{10FFFF}])u.freeze - RANGE = %r(\[(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}-#{HEX}))+\])u.freeze - ENUM_BASE = %r(\[(?:(?:#{R_CHAR})+|(?:#{HEX})+)-?\])u.freeze - ENUM = %r(#{ENUM_BASE}(?!\s+#{SYMBOL_BASE}\s*::=))u.freeze + RANGE = %r(\[(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX})|#{R_CHAR}|#{HEX})+-?\](?!\s+#{SYMBOL_BASE}\s*::=))u.freeze LHS = %r((?:\[#{SYMBOL_BASE}\])?\s*#{SYMBOL_BASE}\s*::=)u.freeze - O_RANGE = %r(\[^(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}-#{HEX}))+\])u.freeze - O_ENUM = %r(\[^#{ENUM_BASE}\])u.freeze + O_RANGE = %r(\[^(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX}|#{R_CHAR}|#{HEX}))+-?\])u.freeze STRING1 = %r("[\u0009\u000A\u000D\u0020\u0021\u0023-\uD7FF\u{10000}-\u{10FFFF}]*")u.freeze STRING2 = %r('[\u0009\u000A\u000D\u0020-\u0026\u0028-\uD7FF\u{10000}-\u{10FFFF}]*')u.freeze POSTFIX = %r([?*+])u.freeze diff --git a/spec/base_spec.rb b/spec/base_spec.rb index 6735ce6..b649b20 100644 --- a/spec/base_spec.rb +++ b/spec/base_spec.rb @@ -20,14 +20,12 @@ %{ [9] primary ::= HEX | RANGE - | ENUM | O_RANGE - | O_ENUM | STRING1 | STRING2 | '(' expression ')' - } => %{((rule primary "9" (alt HEX RANGE ENUM O_RANGE O_ENUM STRING1 STRING2 (seq "(" expression ")"))))}, + } => %{((rule primary "9" (alt HEX RANGE O_RANGE STRING1 STRING2 (seq "(" expression ")"))))}, %{#[1] rule ::= 'FOO'} => %{()}, %{//[1] rule ::= 'FOO'} => %{()}, %{[18] SolutionModifier ::= _SolutionModifier_1 _SolutionModifier_2} => diff --git a/spec/bnf_spec.rb b/spec/bnf_spec.rb index 4fe65ab..137cfbb 100644 --- a/spec/bnf_spec.rb +++ b/spec/bnf_spec.rb @@ -16,16 +16,14 @@ %{ [9] primary ::= HEX | RANGE - | ENUM | O_RANGE - | O_ENUM | STRING1 | STRING2 | '(' expression ')' } => %{((rule _empty "0" (seq)) - (rule primary "9" (alt HEX RANGE ENUM O_RANGE O_ENUM STRING1 STRING2 _primary_1 )) + (rule primary "9" (alt HEX RANGE O_RANGE STRING1 STRING2 _primary_1 )) (rule _primary_1 "9.1" (seq "(" expression ")")))}, %{ [1] R1 ::= 1 2 diff --git a/spec/ebnf_spec.rb b/spec/ebnf_spec.rb index bb85579..1664976 100644 --- a/spec/ebnf_spec.rb +++ b/spec/ebnf_spec.rb @@ -17,23 +17,19 @@ %{ [9] primary ::= HEX | RANGE - | ENUM | O_RANGE - | O_ENUM | STRING1 | STRING2 | '(' expression ')' - } => %{((rule primary "9" (alt HEX RANGE ENUM O_RANGE O_ENUM STRING1 STRING2 (seq "(" expression ")"))))}, + } => %{((rule primary "9" (alt HEX RANGE O_RANGE STRING1 STRING2 (seq "(" expression ")"))))}, %{ primary ::= HEX | RANGE - | ENUM | O_RANGE - | O_ENUM | STRING1 | STRING2 | '(' expression ')' - } => %{((rule primary (alt HEX RANGE ENUM O_RANGE O_ENUM STRING1 STRING2 (seq "(" expression ")"))))}, + } => %{((rule primary (alt HEX RANGE O_RANGE STRING1 STRING2 (seq "(" expression ")"))))}, }.each do |input, expected| context input do subject {EBNF.parse(input)} diff --git a/spec/ll1/data/meta.rb b/spec/ll1/data/meta.rb index e143b86..96a33a9 100644 --- a/spec/ll1/data/meta.rb +++ b/spec/ll1/data/meta.rb @@ -1,14 +1,11 @@ -# This file is automatically generated by /Users/gregg/Projects/ebnf/bin/ebnf -# BRANCH derived from ../../etc/ebnf.ebnf +# This file is automatically generated by ebnf version 2.0.0 +# Derived from etc/ebnf.ebnf module EBNFParserMeta START = :ebnf - BRANCH = { :alt => { "(" => [:seq, :_alt_1], - :ENUM => [:seq, :_alt_1], :HEX => [:seq, :_alt_1], - :O_ENUM => [:seq, :_alt_1], :O_RANGE => [:seq, :_alt_1], :RANGE => [:seq, :_alt_1], :STRING1 => [:seq, :_alt_1], @@ -34,9 +31,7 @@ module EBNFParserMeta }, :diff => { "(" => [:postfix, :_diff_1], - :ENUM => [:postfix, :_diff_1], :HEX => [:postfix, :_diff_1], - :O_ENUM => [:postfix, :_diff_1], :O_RANGE => [:postfix, :_diff_1], :RANGE => [:postfix, :_diff_1], :STRING1 => [:postfix, :_diff_1], @@ -49,10 +44,8 @@ module EBNFParserMeta "-" => [:_diff_2], "@pass" => [], "@terminals" => [], - :ENUM => [], :HEX => [], :LHS => [], - :O_ENUM => [], :O_RANGE => [], :RANGE => [], :STRING1 => [], @@ -80,9 +73,7 @@ module EBNFParserMeta }, :expression => { "(" => [:alt], - :ENUM => [:alt], :HEX => [:alt], - :O_ENUM => [:alt], :O_RANGE => [:alt], :RANGE => [:alt], :STRING1 => [:alt], @@ -94,9 +85,7 @@ module EBNFParserMeta }, :postfix => { "(" => [:primary, :_postfix_1], - :ENUM => [:primary, :_postfix_1], :HEX => [:primary, :_postfix_1], - :O_ENUM => [:primary, :_postfix_1], :O_RANGE => [:primary, :_postfix_1], :RANGE => [:primary, :_postfix_1], :STRING1 => [:primary, :_postfix_1], @@ -109,10 +98,8 @@ module EBNFParserMeta "-" => [], "@pass" => [], "@terminals" => [], - :ENUM => [], :HEX => [], :LHS => [], - :O_ENUM => [], :O_RANGE => [], :POSTFIX => [:POSTFIX], :RANGE => [], @@ -123,9 +110,7 @@ module EBNFParserMeta }, :primary => { "(" => [:_primary_1], - :ENUM => [:ENUM], :HEX => [:HEX], - :O_ENUM => [:O_ENUM], :O_RANGE => [:O_RANGE], :RANGE => [:RANGE], :STRING1 => [:STRING1], @@ -140,9 +125,7 @@ module EBNFParserMeta }, :seq => { "(" => [:diff, :_seq_1], - :ENUM => [:diff, :_seq_1], :HEX => [:diff, :_seq_1], - :O_ENUM => [:diff, :_seq_1], :O_RANGE => [:diff, :_seq_1], :RANGE => [:diff, :_seq_1], :STRING1 => [:diff, :_seq_1], @@ -154,10 +137,8 @@ module EBNFParserMeta ")" => [], "@pass" => [], "@terminals" => [], - :ENUM => [:_seq_2], :HEX => [:_seq_2], :LHS => [], - :O_ENUM => [:_seq_2], :O_RANGE => [:_seq_2], :RANGE => [:_seq_2], :STRING1 => [:_seq_2], @@ -167,9 +148,7 @@ module EBNFParserMeta }, :_seq_2 => { "(" => [:diff, :_seq_1], - :ENUM => [:diff, :_seq_1], :HEX => [:diff, :_seq_1], - :O_ENUM => [:diff, :_seq_1], :O_RANGE => [:diff, :_seq_1], :RANGE => [:diff, :_seq_1], :STRING1 => [:diff, :_seq_1], @@ -183,10 +162,8 @@ module EBNFParserMeta "-", "@pass", "@terminals", - :ENUM, :HEX, :LHS, - :O_ENUM, :O_RANGE, :POSTFIX, :RANGE, @@ -200,9 +177,7 @@ module EBNFParserMeta :HEX, :SYMBOL, :RANGE, - :ENUM, :O_RANGE, - :O_ENUM, :STRING1, :STRING2, "("], @@ -223,9 +198,7 @@ module EBNFParserMeta :HEX, :SYMBOL, :RANGE, - :ENUM, :O_RANGE, - :O_ENUM, :STRING1, :STRING2, "("], @@ -236,9 +209,7 @@ module EBNFParserMeta :HEX, :SYMBOL, :RANGE, - :ENUM, :O_RANGE, - :O_ENUM, :STRING1, :STRING2, "("], @@ -254,9 +225,7 @@ module EBNFParserMeta :HEX, :SYMBOL, :RANGE, - :ENUM, :O_RANGE, - :O_ENUM, :STRING1, :STRING2, "("], @@ -284,9 +253,7 @@ module EBNFParserMeta :HEX, :SYMBOL, :RANGE, - :ENUM, :O_RANGE, - :O_ENUM, :STRING1, :STRING2, "("], @@ -296,9 +263,7 @@ module EBNFParserMeta :HEX, :SYMBOL, :RANGE, - :ENUM, :O_RANGE, - :O_ENUM, :STRING1, :STRING2, "("], @@ -306,9 +271,7 @@ module EBNFParserMeta :HEX, :SYMBOL, :RANGE, - :ENUM, :O_RANGE, - :O_ENUM, :STRING1, :STRING2, "("], @@ -322,9 +285,7 @@ module EBNFParserMeta :HEX, :SYMBOL, :RANGE, - :ENUM, :O_RANGE, - :O_ENUM, :STRING1, :STRING2, "("], @@ -334,9 +295,7 @@ module EBNFParserMeta :HEX, :SYMBOL, :RANGE, - :ENUM, :O_RANGE, - :O_ENUM, :STRING1, :STRING2, "("], @@ -348,9 +307,7 @@ module EBNFParserMeta :HEX, :SYMBOL, :RANGE, - :ENUM, :O_RANGE, - :O_ENUM, :STRING1, :STRING2, "("], @@ -358,9 +315,7 @@ module EBNFParserMeta :HEX, :SYMBOL, :RANGE, - :ENUM, :O_RANGE, - :O_ENUM, :STRING1, :STRING2, "("], @@ -369,9 +324,7 @@ module EBNFParserMeta :HEX, :SYMBOL, :RANGE, - :ENUM, :O_RANGE, - :O_ENUM, :STRING1, :STRING2, "("], @@ -379,9 +332,7 @@ module EBNFParserMeta :HEX, :SYMBOL, :RANGE, - :ENUM, :O_RANGE, - :O_ENUM, :STRING1, :STRING2, "("], @@ -390,9 +341,7 @@ module EBNFParserMeta :HEX, :SYMBOL, :RANGE, - :ENUM, :O_RANGE, - :O_ENUM, :STRING1, :STRING2, "("], @@ -401,9 +350,7 @@ module EBNFParserMeta :HEX, :SYMBOL, :RANGE, - :ENUM, :O_RANGE, - :O_ENUM, :STRING1, :STRING2, "("], @@ -465,9 +412,7 @@ module EBNFParserMeta :HEX, :SYMBOL, :RANGE, - :ENUM, :O_RANGE, - :O_ENUM, :STRING1, :STRING2, "@terminals", @@ -481,9 +426,7 @@ module EBNFParserMeta :HEX, :SYMBOL, :RANGE, - :ENUM, :O_RANGE, - :O_ENUM, :STRING1, :STRING2, "@terminals", @@ -497,9 +440,7 @@ module EBNFParserMeta :HEX, :SYMBOL, :RANGE, - :ENUM, :O_RANGE, - :O_ENUM, :STRING1, :STRING2, "@terminals", @@ -513,9 +454,7 @@ module EBNFParserMeta :HEX, :SYMBOL, :RANGE, - :ENUM, :O_RANGE, - :O_ENUM, :STRING1, :STRING2, "@terminals", @@ -529,9 +468,7 @@ module EBNFParserMeta :HEX, :SYMBOL, :RANGE, - :ENUM, :O_RANGE, - :O_ENUM, :STRING1, :STRING2, "@terminals", @@ -573,9 +510,7 @@ module EBNFParserMeta :HEX, :SYMBOL, :RANGE, - :ENUM, :O_RANGE, - :O_ENUM, :STRING1, :STRING2, "@terminals", @@ -590,9 +525,7 @@ module EBNFParserMeta :HEX, :SYMBOL, :RANGE, - :ENUM, :O_RANGE, - :O_ENUM, :STRING1, :STRING2, "@terminals", @@ -607,9 +540,7 @@ module EBNFParserMeta :HEX, :SYMBOL, :RANGE, - :ENUM, :O_RANGE, - :O_ENUM, :STRING1, :STRING2, "@terminals", @@ -625,9 +556,7 @@ module EBNFParserMeta :HEX, :SYMBOL, :RANGE, - :ENUM, :O_RANGE, - :O_ENUM, :STRING1, :STRING2, "@terminals", @@ -643,9 +572,7 @@ module EBNFParserMeta :HEX, :SYMBOL, :RANGE, - :ENUM, :O_RANGE, - :O_ENUM, :STRING1, :STRING2, "@terminals", @@ -661,9 +588,7 @@ module EBNFParserMeta :HEX, :SYMBOL, :RANGE, - :ENUM, :O_RANGE, - :O_ENUM, :STRING1, :STRING2, "@terminals", @@ -679,9 +604,7 @@ module EBNFParserMeta :HEX, :SYMBOL, :RANGE, - :ENUM, :O_RANGE, - :O_ENUM, :STRING1, :STRING2, "@terminals", @@ -734,6 +657,17 @@ module EBNFParserMeta :LHS, "@pass"], }.freeze + CLEANUP = { + :_alt_1 => :star, + :_alt_3 => :merge, + :_diff_1 => :opt, + :ebnf => :star, + :_ebnf_2 => :merge, + :_postfix_1 => :opt, + :seq => :plus, + :_seq_1 => :star, + :_seq_2 => :merge, + }.freeze PASS = [ :PASS ].freeze diff --git a/spec/ll1/data/parser.rb b/spec/ll1/data/parser.rb index 29e166d..e9a0079 100644 --- a/spec/ll1/data/parser.rb +++ b/spec/ll1/data/parser.rb @@ -29,14 +29,6 @@ class EBNFParser input[:terminal] = token.value end - terminal(:ENUM, ENUM, unescape: true) do |prod, token, input| - input[:terminal] = [:range, token.value[1..-2]] - end - - terminal(:O_ENUM, O_ENUM, unescape: true) do |prod, token, input| - input[:terminal] = [:range, token.value[1..-2]] - end - terminal(:RANGE, RANGE, unescape: true) do |prod, token, input| input[:terminal] = [:range, token.value[1..-2]] end diff --git a/spec/peg/data/parser.rb b/spec/peg/data/parser.rb index 687cdb8..b15964b 100644 --- a/spec/peg/data/parser.rb +++ b/spec/peg/data/parser.rb @@ -26,14 +26,6 @@ class EBNFPegParser terminal(:HEX, HEX) - terminal(:ENUM, ENUM, unescape: true) do |value| - [:range, value[1..-2]] - end - - terminal(:O_ENUM, O_ENUM, unescape: true) do |value| - [:range, value[1..-2]] - end - terminal(:RANGE, RANGE, unescape: true) do |value| [:range, value[1..-2]] end diff --git a/spec/peg_spec.rb b/spec/peg_spec.rb index e940757..a354203 100644 --- a/spec/peg_spec.rb +++ b/spec/peg_spec.rb @@ -14,15 +14,13 @@ %{ [9] primary ::= HEX | RANGE - | ENUM | O_RANGE - | O_ENUM | STRING1 | STRING2 | '(' expression ')' } => - %{((rule primary "9" (alt HEX RANGE ENUM O_RANGE O_ENUM STRING1 STRING2 _primary_1)) + %{((rule primary "9" (alt HEX RANGE O_RANGE STRING1 STRING2 _primary_1)) (rule _primary_1 "9.1" (seq "(" expression ")")))}, %{[1] start ::= A B C} => %{((rule start "1" (seq A B C)))}, diff --git a/spec/rule_spec.rb b/spec/rule_spec.rb index 8bb6a32..0bca146 100644 --- a/spec/rule_spec.rb +++ b/spec/rule_spec.rb @@ -24,10 +24,6 @@ %{(pass _pass (plus (range "#x9#xA#xD#x20")))}, EBNF::Rule.new(nil, nil, [:plus, [:range, "#x9#xA#xD#x20"]], kind: :pass) ], - "terminal": [ - %{(terminal O_ENUM "17" (seq "[^" (plus CHAR) "]"))}, - EBNF::Rule.new(:O_ENUM, "17", [:seq, "[^", [:plus, :CHAR], "]"]) - ], "alt": [ %{(rule alt (alt a b c))}, EBNF::Rule.new(:alt, nil, [:alt, :a, :b, :c], kind: :rule) @@ -120,10 +116,6 @@ EBNF::Rule.new(nil, nil, [:plus, [:range, "#x20\\t\\r\\n"]], kind: :pass), %{(pass _pass (plus (range "#x20\\\\t\\\\r\\\\n")))}, ], - "terminal": [ - EBNF::Rule.new(:O_ENUM, "17", [:seq, "[^", [:plus, :CHAR], "]"]), - %{(terminal O_ENUM "17" (seq "[^" (plus CHAR) "]"))}, - ], "alt": [ EBNF::Rule.new(:alt, nil, [:alt, :a, :b, :c], kind: :rule), %{(rule alt (alt a b c))}, @@ -190,13 +182,6 @@ :_pass rdfs:label "_pass"; g:plus [ re:matches "[\\\\u0020\\\\t\\\\r\\\\n]" ] .}, ], - "terminal": [ - EBNF::Rule.new(:O_ENUM, "17", [:seq, "[^", [:plus, :CHAR], "]"]), - %{ - :O_ENUM rdfs:label "O_ENUM"; - dc:identifier "17"; - re:seq ( "[^" [ re:plus :CHAR ] "]" ) .}, - ], "alt": [ EBNF::Rule.new(:alt, nil, [:alt, :a, :b, :c], kind: :rule), %{ @@ -285,10 +270,6 @@ EBNF::Rule.new(nil, nil, [:plus, [:range, "#x20\\t\\r\\n"]], kind: :pass), %{EBNF::Rule.new(:_pass, nil, [:plus, [:range, \"#x20\\\\t\\\\r\\\\n\"]], kind: :pass)}, ], - "terminal": [ - EBNF::Rule.new(:O_ENUM, "17", [:seq, "[^", [:plus, :CHAR], "]"]), - %{EBNF::Rule.new(:O_ENUM, "17", [:seq, "[^", [:plus, :CHAR], "]"], kind: :terminal)}, - ], "alt": [ EBNF::Rule.new(:alt, nil, [:alt, :a, :b, :c], kind: :rule), %{EBNF::Rule.new(:alt, nil, [:alt, :a, :b, :c])}, @@ -532,10 +513,6 @@ EBNF::Rule.new(nil, nil, [:plus, [:range, "#x20\\t\\r\\n"]], kind: :pass), false, ], - "terminal": [ - EBNF::Rule.new(:O_ENUM, "17", [:seq, "[^", [:plus, :CHAR], "]"]), - true, - ], "alt": [ EBNF::Rule.new(:alt, nil, [:alt, :a, :b, :c], kind: :rule), false, @@ -593,10 +570,6 @@ EBNF::Rule.new(nil, nil, [:plus, [:range, "#x20\\t\\r\\n"]], kind: :pass), true, ], - "terminal": [ - EBNF::Rule.new(:O_ENUM, "17", [:seq, "[^", [:plus, :CHAR], "]"]), - false, - ], "alt": [ EBNF::Rule.new(:alt, nil, [:alt, :a, :b, :c], kind: :rule), false, @@ -654,10 +627,6 @@ EBNF::Rule.new(nil, nil, [:plus, [:range, "#x20\\t\\r\\n"]], kind: :pass), false, ], - "terminal": [ - EBNF::Rule.new(:O_ENUM, "17", [:seq, "[^", [:plus, :CHAR], "]"]), - false, - ], "alt": [ EBNF::Rule.new(:alt, nil, [:alt, :a, :b, :c], kind: :rule), true, @@ -715,10 +684,6 @@ EBNF::Rule.new(nil, nil, [:plus, [:range, "#x20\\t\\r\\n"]], kind: :pass), false, ], - "terminal": [ - EBNF::Rule.new(:O_ENUM, "17", [:seq, "[^", [:plus, :CHAR], "]"]), - false, - ], "alt": [ EBNF::Rule.new(:alt, nil, [:alt, :a, :b, :c], kind: :rule), true, @@ -776,10 +741,6 @@ EBNF::Rule.new(nil, nil, [:plus, [:range, "#x20\\t\\r\\n"]], kind: :pass), false, ], - "terminal": [ - EBNF::Rule.new(:O_ENUM, "17", [:seq, "[^", [:plus, :CHAR], "]"]), - true, - ], "alt": [ EBNF::Rule.new(:alt, nil, [:alt, :a, :b, :c], kind: :rule), false, @@ -883,8 +844,6 @@ LHS: [], SYMBOL: [], HEX: [], - ENUM: [], - O_ENUM: [], RANGE: [], O_RANGE: [], STRING1: [], @@ -911,19 +870,17 @@ seq: [], diff: [], postfix: [], - primary: [:HEX, :SYMBOL, :ENUM, :O_ENUM, :RANGE, :O_RANGE, :STRING1, :STRING2, "("], + primary: [:HEX, :SYMBOL, :RANGE, :O_RANGE, :STRING1, :STRING2, "("], pass: ["@pass"], LHS: ["["], SYMBOL: ["a-z", "A-Z", "0-9", "_", "."], HEX: ["#x"], - ENUM: ["[", :LHS], - O_ENUM: ["[^"], RANGE: ["["], O_RANGE: ["[^"], STRING1: ['"'], STRING2: ["'"], CHAR: ["#x9#xA#xD", "#x20-#xD7FF", "#xE000-#xFFFD", "#x10000-#x10FFFF"], - R_CHAR: [:CHAR, "]", "-"], + R_CHAR: [:CHAR, "]", "-", :HEX], POSTFIX: ["?*+"], PASS: ["#x9#xA#xD#x20", "#", "#x", "//", "/*", "(*"] }.each do |sym, expected| @@ -944,19 +901,17 @@ seq: [:diff], diff: [:postfix], postfix: [:primary, :POSTFIX], - primary: [:HEX, :SYMBOL, :ENUM, :O_ENUM, :RANGE, :O_RANGE, :STRING1, :STRING2, :expression], + primary: [:HEX, :SYMBOL, :RANGE, :O_RANGE, :STRING1, :STRING2, :expression], pass: [:expression], LHS: [:SYMBOL], SYMBOL: [], HEX: [], - ENUM: [:R_CHAR, :HEX, :LHS], - O_ENUM: [:R_CHAR, :HEX], - RANGE: [:R_CHAR, :HEX], + RANGE: [:R_CHAR, :HEX, :LHS], O_RANGE: [:R_CHAR, :HEX], STRING1: [:CHAR], STRING2: [:CHAR], CHAR: [], - R_CHAR: [:CHAR], + R_CHAR: [:CHAR, :HEX], POSTFIX: [], PASS: [] }.each do |sym, expected| @@ -1001,19 +956,19 @@ ], "mixed range char and hex": [ "a ::= [b-#x20]", - /syntax error/ + /Range contains illegal components/ ], "mixed range char and hex (2)": [ "a ::= [#x20-b]", - /syntax error/ + /Range contains illegal components/ ], "incomplete range": [ "a ::= [-b]", - /syntax error/ + /syntax error,/ ], "extra range": [ "a ::= [a-b-c]", - /syntax error/ + /syntax error,/ ], }.each do |name, (rule, message)| it name do From 4e82fd53e5393ddccfe17d15b248db37e8c24da1 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Sat, 11 Jul 2020 16:58:00 -0700 Subject: [PATCH 42/50] Remove `(/` and `/)` as alternate start/end option symbols in ISO EBNF due to pathological regular expression matching. Create specs for ISO EBNF parser. --- etc/iso-ebnf.ebnf | 35 +++--- etc/iso-ebnf.isoebnf | 21 +++- etc/iso-ebnf.sxp | 24 ++-- examples/isoebnf/parser.rb | 10 +- lib/ebnf/isoebnf.rb | 8 +- spec/isoebnf_spec.rb | 233 +++++++++++++++++++++++++++++++++++++ 6 files changed, 292 insertions(+), 39 deletions(-) create mode 100644 spec/isoebnf_spec.rb diff --git a/etc/iso-ebnf.ebnf b/etc/iso-ebnf.ebnf index 339e936..7efa4b0 100644 --- a/etc/iso-ebnf.ebnf +++ b/etc/iso-ebnf.ebnf @@ -67,7 +67,10 @@ comment ::= start_comment_symbol comment_symbol* end_comment , , or *) -comment_symbol ::= comment | terminal_string | special_sequence | character +comment_symbol ::= comment | commentless_symbol | other_character + +commentless_symbol ::= terminal_character | meta_identifier | integer + | terminal_string | special_sequence letter ::= [a-zA-Z] # gratuitous comment @@ -111,23 +114,9 @@ gap_separator ::= [#x9#xa#xb#xc#xd#x20] empty ::= '' -# Simple terminals that are often extended -defining_symbol ::= '=' | ':' -definition_separator_symbol ::= '|' | '/' | '!' -terminator_symbol ::= ';' | '.' -start_option_symbol ::= '[' | '(/' -end_option_symbol ::= ']' | '/)' -start_repeat_symbol ::= '{' | '(:' -end_repeat_symbol ::= '}' | ':)' - -# Symbols described, but not actually used. - -gap_free_symbol ::= (terminal_character - ['"]) - | terminal_string - +concatenate_symbol ::= ',' repetition_symbol ::= '*' except_symbol ::= '-' -concatenate_symbol ::= ',' first_quote_symbol ::= "'" second_quote_symbol ::= '"' start_comment_symbol ::= '(*' @@ -135,3 +124,17 @@ end_comment_symbol ::= '*)' start_group_symbol ::= '(' end_group_symbol ::= ')' special_sequence_symbol ::= '?' + +# Simple terminals that are often extended +defining_symbol ::= '=' | ':' +definition_separator_symbol ::= '|' | '/' | '!' +terminator_symbol ::= ';' | '.' +start_option_symbol ::= '[' +end_option_symbol ::= ']' +start_repeat_symbol ::= '{' | '(:' +end_repeat_symbol ::= '}' | ':)' + +# Symbols described, but not actually used. + +gap_free_symbol ::= (terminal_character - ['"]) + | terminal_string diff --git a/etc/iso-ebnf.isoebnf b/etc/iso-ebnf.isoebnf index 8bcda08..96c940f 100644 --- a/etc/iso-ebnf.isoebnf +++ b/etc/iso-ebnf.isoebnf @@ -62,7 +62,11 @@ comment = '(*', {comment_symbol}, '*)' , , or *); -comment_symbol = comment | terminal_string | special_sequence | character; +comment_symbol = comment | commentless_symbol | other_character ; + +commentless_symbol = terminal_character | meta_identifier | integer + | terminal_string | special_sequence + ; letter = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" | "N" @@ -113,11 +117,22 @@ other_character = ' ' | ':' | '+' | '_' | '%' | '@' | '&' empty = ; +concatenate_symbol = ',' ; +repetition_symbol = '*' ; +except_symbol = '-' ; +first_quote_symbol = "'" ; +second_quote_symbol = '"' ; +start_comment_symbol = '(*' ; +end_comment_symbol = '*)' ; +start_group_symbol = '(' ; +end_group_symbol = ')' ; +special_sequence_symbol = '?' ; + (* Simple terminals that are often extended *) defining_symbol = '=' | ':' ; definition_separator_symbol = '|' | '/' | '!' ; terminator_symbol = ';' | '.' ; -start_option_symbol = '[' | '(/' ; -end_option_symbol = ']' | '/)' ; +start_option_symbol = '[' ; +end_option_symbol = ']' ; start_repeat_symbol = '{' | '(:' ; end_repeat_symbol = '}' | ':)' ; diff --git a/etc/iso-ebnf.sxp b/etc/iso-ebnf.sxp index 38f5024..84292e0 100644 --- a/etc/iso-ebnf.sxp +++ b/etc/iso-ebnf.sxp @@ -25,7 +25,9 @@ (terminal integer (plus decimal_digit)) (terminal special_sequence (seq "?" (star special_sequence_character) "?")) (terminal comment (seq start_comment_symbol (star comment_symbol) end_comment_symbol)) - (terminal comment_symbol (alt comment terminal_string special_sequence character)) + (terminal comment_symbol (alt comment commentless_symbol other_character)) + (terminal commentless_symbol + (alt terminal_character meta_identifier integer terminal_string special_sequence)) (terminal letter (range "a-zA-Z")) (terminal decimal_digit (range "0-9")) (terminal meta_identifier_character (alt letter decimal_digit "_")) @@ -43,21 +45,21 @@ (terminal gap_separator (range "#x9#xa#xb#xc#xd#x20")) (pass _pass (alt (plus gap_separator) comment)) (terminal empty (seq ())) - (terminal defining_symbol (alt "=" ":")) - (terminal definition_separator_symbol (alt "|" "/" "!")) - (terminal terminator_symbol (alt ";" ".")) - (terminal start_option_symbol (alt "[" "(/")) - (terminal end_option_symbol (alt "]" "/)")) - (terminal start_repeat_symbol (alt "{" "(:")) - (terminal end_repeat_symbol (alt "}" ":)")) - (terminal gap_free_symbol (alt (diff terminal_character (range "'\"")) terminal_string)) + (terminal concatenate_symbol (seq ",")) (terminal repetition_symbol (seq "*")) (terminal except_symbol (seq "-")) - (terminal concatenate_symbol (seq ",")) (terminal first_quote_symbol (seq "'")) (terminal second_quote_symbol (seq "\"")) (terminal start_comment_symbol (seq "(*")) (terminal end_comment_symbol (seq "*)")) (terminal start_group_symbol (seq "(")) (terminal end_group_symbol (seq ")")) - (terminal special_sequence_symbol (seq "?"))) + (terminal special_sequence_symbol (seq "?")) + (terminal defining_symbol (alt "=" ":")) + (terminal definition_separator_symbol (alt "|" "/" "!")) + (terminal terminator_symbol (alt ";" ".")) + (terminal start_option_symbol (seq "[")) + (terminal end_option_symbol (seq "]")) + (terminal start_repeat_symbol (alt "{" "(:")) + (terminal end_repeat_symbol (alt "}" ":)")) + (terminal gap_free_symbol (alt (diff terminal_character (range "'\"")) terminal_string))) diff --git a/examples/isoebnf/parser.rb b/examples/isoebnf/parser.rb index 80e5f6a..ea713ca 100644 --- a/examples/isoebnf/parser.rb +++ b/examples/isoebnf/parser.rb @@ -78,16 +78,16 @@ class ISOEBNFPegParser terminal(:empty, //) # `[26] definition_separator_symbol ::= '|' | '/' | '!'` - terminal(:definition_separator_symbol, /[\|\/!]/) + terminal(:definition_separator_symbol, /[\|!]|(?:\/(?<=\)))/) # `[27] terminator_symbol ::= ';' | '.'` terminal(:terminator_symbol, /[;\.]/) - # `[28] start_option_symbol ::= '[' | '(/'` - terminal(:start_option_symbol, /\[|\(\//) + # `[28] start_option_symbol ::= '[' + terminal(:start_option_symbol, /\[|(?:\(\/)/) - # `[29] end_option_symbol ::= ']' | '/)'` - terminal(:end_option_symbol, /[\]\/]/) + # `[29] end_option_symbol ::= ']'` + terminal(:end_option_symbol, /\]/) # `[30] start_repeat_symbol ::= '{' | '(:'` terminal(:start_repeat_symbol, /{|\(:/) diff --git a/lib/ebnf/isoebnf.rb b/lib/ebnf/isoebnf.rb index 36fe4c3..92987dd 100644 --- a/lib/ebnf/isoebnf.rb +++ b/lib/ebnf/isoebnf.rb @@ -79,11 +79,11 @@ class ISOEBNF # `[27] terminator_symbol ::= ';' | '.'` terminal(:terminator_symbol, /[;\.]/) - # `[28] start_option_symbol ::= '[' | '(/'` - terminal(:start_option_symbol, /\[|\(\//) + # `[28] start_option_symbol ::= '[' + terminal(:start_option_symbol, /\[|(?:\(\/)/) - # `[29] end_option_symbol ::= ']' | '/)'` - terminal(:end_option_symbol, /[\]\/]/) + # `[29] end_option_symbol ::= ']'` + terminal(:end_option_symbol, /\]/) # `[30] start_repeat_symbol ::= '{' | '(:'` terminal(:start_repeat_symbol, /{|\(:/) diff --git a/spec/isoebnf_spec.rb b/spec/isoebnf_spec.rb new file mode 100644 index 0000000..7f85f60 --- /dev/null +++ b/spec/isoebnf_spec.rb @@ -0,0 +1,233 @@ +# coding: utf-8 +$:.unshift "." +require 'spec_helper' +require 'ebnf' +require 'sxp' + +describe EBNF::ISOEBNF do + let(:logger) {RDF::Spec.logger} + after(:each) do |example| + puts logger.to_s if example.exception && !example.exception.is_a?(RSpec::Expectations::ExpectationNotMetError) + end + + context "rule variations" do + { + "legal meta_identifier": [ + 'rulename = "foo" ;', + %{((rule rulename (seq "foo")))} + ], + "digits": [ + %{ + digit_excluding_zero = "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; + digit = "0" | digit_excluding_zero ; + }, + %{((rule digit_excluding_zero (alt "1" "2" "3" "4" "5" "6" "7" "8" "9")) + (rule digit (alt "0" digit_excluding_zero)))} + ], + "sequence of numbers": [ + %{ + twelve = "1", "2" ; + two_hundred_one = "2", "0", "1" ; + three_hundred_twelve = "3", twelve ; + twelve_thousand_two_hundred_one = twelve, two_hundred_one ; + }, + %{((rule twelve (seq "1" "2")) + (rule two_hundred_one (seq "2" "0" "1")) + (rule three_hundred_twelve (seq "3" twelve)) + (rule twelve_thousand_two_hundred_one (seq twelve two_hundred_one)))} + ], + "natural number": [ + %{natural_number = digit_excluding_zero, { digit } ;}, + %{((rule natural_number (seq digit_excluding_zero (star digit))))} + ], + "integer": [ + %{integer = "0" | [ "-" ], natural_number ;}, + %{((rule integer (alt "0" (seq (opt "-") natural_number))))} + ], + "simple grammar": [ + %q{ + letter = "A" | "B" | "C" | "D" | "E" | "F" | "G" + | "H" | "I" | "J" | "K" | "L" | "M" | "N" + | "O" | "P" | "Q" | "R" | "S" | "T" | "U" + | "V" | "W" | "X" | "Y" | "Z" | "a" | "b" + | "c" | "d" | "e" | "f" | "g" | "h" | "i" + | "j" | "k" | "l" | "m" | "n" | "o" | "p" + | "q" | "r" | "s" | "t" | "u" | "v" | "w" + | "x" | "y" | "z" ; + digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; + symbol = "[" | "]" | "{" | "}" | "(" | ")" | "<" | ">" + | "'" | '"' | "=" | "|" | "." | "," | ";" ; + character = letter | digit | symbol | "_" ; + + identifier = letter , { letter | digit | "_" } ; + terminal = "'" , character , { character } , "'" + | '"' , character , { character } , '"' ; + + lhs = identifier ; + rhs = identifier + | terminal + | "[" , rhs , "]" + | "{" , rhs , "}" + | "(" , rhs , ")" + | rhs , "|" , rhs + | rhs , "," , rhs ; + + rule = lhs , "=" , rhs , ";" ; + grammar = { rule } ; + }, + %q{((rule letter + (alt "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" + "S" "T" "U" "V" "W" "X" "Y" "Z" "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" + "l" "m" "n" "o" "p" "q" "r" "s" "t" "u" "v" "w" "x" "y" "z" )) + (rule digit (alt "0" "1" "2" "3" "4" "5" "6" "7" "8" "9")) + (rule symbol (alt "[" "]" "{" "}" "(" ")" "<" ">" "'" "\"" "=" "|" "." "," ";")) + (rule character (alt letter digit symbol "_")) + (rule identifier (seq letter (star (alt letter digit "_")))) + (rule terminal + (alt (seq "'" character (star character) "'") (seq "\"" character (star character) "\""))) + (rule lhs (seq identifier)) + (rule rhs + (alt identifier terminal + (seq "[" rhs "]") + (seq "{" rhs "}") + (seq "(" rhs ")") + (seq rhs "|" rhs) + (seq rhs "," rhs)) ) + (rule rule (seq lhs "=" rhs ";")) + (rule grammar (star rule)))} + ], + "pascal": [ + %q{ + (* a simple program syntax in EBNF − Wikipedia *) + program = 'PROGRAM', white_space, identifier, white_space, + 'BEGIN', white_space, + { assignment, ";", white_space }, + 'END.' ; + identifier = alphabetic_character, { alphabetic_character | digit } ; + number = [ "-" ], digit, { digit } ; + string = '"' , { all_characters - '"' }, '"' ; + assignment = identifier , ":=" , ( number | identifier | string ) ; + alphabetic_character = "A" | "B" | "C" | "D" | "E" | "F" | "G" + | "H" | "I" | "J" | "K" | "L" | "M" | "N" + | "O" | "P" | "Q" | "R" | "S" | "T" | "U" + | "V" | "W" | "X" | "Y" | "Z" ; + digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; + white_space = ? white_space characters ? ; + all_characters = ? all visible characters ? ; + }, + %q{((rule program + (seq "PROGRAM" white_space identifier white_space "BEGIN" white_space + (star (seq assignment ";" white_space)) "END." )) + (rule identifier (seq alphabetic_character (star (alt alphabetic_character digit)))) + (rule number (seq (opt "-") digit (star digit))) + (rule string (seq "\"" (star all_characters) "\"")) + (rule assignment (seq identifier ":=" (seq (alt number identifier string)))) + (rule alphabetic_character + (alt "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" + "S" "T" "U" "V" "W" "X" "Y" "Z" )) + (rule digit (alt "0" "1" "2" "3" "4" "5" "6" "7" "8" "9")) + (rule white_space (seq "? white_space characters ?")) + (rule all_characters (seq "? all visible characters ?")))} + ], + "AA": [ + %{AA = "A";}, + %{((terminal AA (seq "A")))} + ], + "BB": [ + %{BB = 3 * AA, "B";}, + %{ ((terminal BB (seq (rept 3 3 AA) "B")))} + ], + "CC": [ + %{CC = 3 * [AA], "C";}, + %{((terminal CC (seq (rept 3 3 (opt AA)) "C")))} + ], + "DD": [ + %{DD = {AA}, "D";}, + %{((terminal DD (seq (star AA) "D")))} + ], + "EE": [ + %{EE = AA, {AA}, "E";}, + %{((terminal EE (seq AA (star AA) "E")))} + ], + "FF": [ + %{FF = 3 * AA, 3 * [AA], "F";}, + %{((terminal FF (seq (rept 3 3 AA) (rept 3 3 (opt AA)) "F")))} + ], + "GG": [ + %{GG = {3 * AA}, "G";}, + %{((terminal GG (seq (star (rept 3 3 AA)) "G")))} + ], + "space": [ + %{space = ? US-ASCII character 32 ?;}, + %{((rule space (seq "? US-ASCII character 32 ?")))} # XXX probably not + ], + "something": [ + %{something = foo, ( bar );}, + %{((rule something (seq foo (seq bar))))} + ] + }.each do |title, (input, expect)| + it title do + input << "\n" unless input.end_with?("\n") + expect(parse(input).to_sxp).to produce(expect, logger) + end + end + end + + context "alternate terminal characters" do + { + "digits /": [ + %{ + digit_excluding_zero = "1" / "2" / "3" / "4" / "5" / "6" / "7" / "8" / "9" ; + digit = "0" / digit_excluding_zero ; + }, + %{((rule digit_excluding_zero (alt "1" "2" "3" "4" "5" "6" "7" "8" "9")) + (rule digit (alt "0" digit_excluding_zero)))} + ], + "digits !": [ + %{ + digit_excluding_zero = "1" ! "2" ! "3" ! "4" ! "5" ! "6" ! "7" ! "8" ! "9" ; + digit = "0" ! digit_excluding_zero ; + }, + %{((rule digit_excluding_zero (alt "1" "2" "3" "4" "5" "6" "7" "8" "9")) + (rule digit (alt "0" digit_excluding_zero)))} + ], + #"integer (/ /)": [ + # %{integer = "0" | (/ "-" /), natural_number ;}, + # %{((rule integer (alt "0" (seq (opt "-") natural_number))))} + #], + "natural number (: :)": [ + %{natural_number = digit_excluding_zero, (: digit :) ;}, + %{((rule natural_number (seq digit_excluding_zero (star digit))))} + ], + "legal meta_identifier .": [ + 'rulename = "foo" .', + %{((rule rulename (seq "foo")))} + ], + }.each do |title, (input, expect)| + it title do + input << "\n" unless input.end_with?("\n") + expect(parse(input).to_sxp).to produce(expect, logger) + end + end + end + + context "illegal syntax" do + { + "something": "something = foo ( bar );" + }.each do |title, input| + it title do + expect {parse(input)}.to raise_error(SyntaxError) + end + end + end + + it "parses ISO EBNF grammar" do + gram = parse(File.open(File.expand_path("../../etc/iso-ebnf.isoebnf", __FILE__))) + expect(gram).to be_valid + end + + def parse(input, **options) + @debug = [] + EBNF.parse(input, debug: @debug, format: :isoebnf, **options) + end +end From b1de2c016d80d0fa97bf7760e1859961f0f71e9a Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Sun, 12 Jul 2020 14:08:41 -0700 Subject: [PATCH 43/50] Parse O_RANGE before RANGE as "^" is an R_CHAR. --- etc/ebnf.ebnf | 2 +- etc/ebnf.html | 4 +-- etc/ebnf.ll1.rb | 54 ++++++++++++++++---------------- etc/ebnf.ll1.sxp | 2 +- etc/ebnf.peg.rb | 2 +- etc/ebnf.peg.sxp | 2 +- etc/ebnf.sxp | 2 +- examples/ebnf-ll1-parser/meta.rb | 54 ++++++++++++++++---------------- examples/ebnf-peg-parser/meta.rb | 2 +- lib/ebnf/ebnf/meta.rb | 5 +-- lib/ebnf/terminals.rb | 2 +- spec/rule_spec.rb | 4 +-- 12 files changed, 68 insertions(+), 67 deletions(-) diff --git a/etc/ebnf.ebnf b/etc/ebnf.ebnf index b2e9a34..a46cdd8 100644 --- a/etc/ebnf.ebnf +++ b/etc/ebnf.ebnf @@ -22,8 +22,8 @@ [9] primary ::= HEX | SYMBOL - | RANGE | O_RANGE + | RANGE | STRING1 | STRING2 | '(' expression ')' diff --git a/etc/ebnf.html b/etc/ebnf.html index d1d9104..d1c4316 100644 --- a/etc/ebnf.html +++ b/etc/ebnf.html @@ -65,13 +65,13 @@ [9] | - RANGE + O_RANGE [9] | - O_RANGE + RANGE [9] diff --git a/etc/ebnf.ll1.rb b/etc/ebnf.ll1.rb index f4000fc..9dc00c1 100644 --- a/etc/ebnf.ll1.rb +++ b/etc/ebnf.ll1.rb @@ -176,8 +176,8 @@ module Meta :alt => [ :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "("], @@ -197,8 +197,8 @@ module Meta :_alt_6 => [ :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "("], @@ -208,8 +208,8 @@ module Meta :diff => [ :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "("], @@ -224,8 +224,8 @@ module Meta :_diff_4 => [ :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "("], @@ -252,8 +252,8 @@ module Meta :expression => [ :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "("], @@ -262,16 +262,16 @@ module Meta :_pass_1 => [ :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "("], :postfix => [ :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "("], @@ -284,8 +284,8 @@ module Meta :primary => [ :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "("], @@ -294,8 +294,8 @@ module Meta :_primary_2 => [ :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "("], @@ -306,16 +306,16 @@ module Meta :_rule_1 => [ :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "("], :seq => [ :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "("], @@ -323,16 +323,16 @@ module Meta :_eps, :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "("], :_seq_2 => [ :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "("], @@ -340,8 +340,8 @@ module Meta :_eps, :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "("], @@ -349,8 +349,8 @@ module Meta :_eps, :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "("], @@ -411,8 +411,8 @@ module Meta :_eof, :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "@terminals", @@ -425,8 +425,8 @@ module Meta :_eof, :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "@terminals", @@ -439,8 +439,8 @@ module Meta :_eof, :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "@terminals", @@ -453,8 +453,8 @@ module Meta :_eof, :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "@terminals", @@ -467,8 +467,8 @@ module Meta :_eof, :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "@terminals", @@ -509,8 +509,8 @@ module Meta :_eof, :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "@terminals", @@ -524,8 +524,8 @@ module Meta :_eof, :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "@terminals", @@ -539,8 +539,8 @@ module Meta :_eof, :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "@terminals", @@ -555,8 +555,8 @@ module Meta :_eof, :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "@terminals", @@ -571,8 +571,8 @@ module Meta :_eof, :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "@terminals", @@ -587,8 +587,8 @@ module Meta :_eof, :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "@terminals", @@ -603,8 +603,8 @@ module Meta :_eof, :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "@terminals", diff --git a/etc/ebnf.ll1.sxp b/etc/ebnf.ll1.sxp index fee71b2..1efb787 100644 --- a/etc/ebnf.ll1.sxp +++ b/etc/ebnf.ll1.sxp @@ -88,7 +88,7 @@ (first "(" HEX O_RANGE RANGE STRING1 STRING2 SYMBOL) (follow "(" ")" "-" "@pass" "@terminals" HEX LHS O_RANGE POSTFIX RANGE STRING1 STRING2 SYMBOL _eof "|" ) - (alt HEX SYMBOL RANGE O_RANGE STRING1 STRING2 _primary_1)) + (alt HEX SYMBOL O_RANGE RANGE STRING1 STRING2 _primary_1)) (rule _primary_1 "9.1" (first "(") (follow "(" ")" "-" "@pass" "@terminals" HEX LHS O_RANGE POSTFIX RANGE diff --git a/etc/ebnf.peg.rb b/etc/ebnf.peg.rb index af660ef..7492826 100644 --- a/etc/ebnf.peg.rb +++ b/etc/ebnf.peg.rb @@ -16,7 +16,7 @@ module EBNFMeta EBNF::Rule.new(:_diff_2, "7.2", [:seq, "-", :postfix]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:postfix, "8", [:seq, :primary, :_postfix_1]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_postfix_1, "8.1", [:opt, :POSTFIX]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:primary, "9", [:alt, :HEX, :SYMBOL, :RANGE, :O_RANGE, :STRING1, :STRING2, :_primary_1]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:primary, "9", [:alt, :HEX, :SYMBOL, :O_RANGE, :RANGE, :STRING1, :STRING2, :_primary_1]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_primary_1, "9.1", [:seq, "(", :expression, ")"]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:pass, "10", [:seq, "@pass", :expression]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule), diff --git a/etc/ebnf.peg.sxp b/etc/ebnf.peg.sxp index 35ae320..b30b54b 100644 --- a/etc/ebnf.peg.sxp +++ b/etc/ebnf.peg.sxp @@ -13,7 +13,7 @@ (rule _diff_2 "7.2" (seq "-" postfix)) (rule postfix "8" (seq primary _postfix_1)) (rule _postfix_1 "8.1" (opt POSTFIX)) - (rule primary "9" (alt HEX SYMBOL RANGE O_RANGE STRING1 STRING2 _primary_1)) + (rule primary "9" (alt HEX SYMBOL O_RANGE RANGE STRING1 STRING2 _primary_1)) (rule _primary_1 "9.1" (seq "(" expression ")")) (rule pass "10" (seq "@pass" expression)) (terminals _terminals (seq)) diff --git a/etc/ebnf.sxp b/etc/ebnf.sxp index 45e03ea..1fded9e 100644 --- a/etc/ebnf.sxp +++ b/etc/ebnf.sxp @@ -8,7 +8,7 @@ (rule diff "7" (seq postfix (opt (seq "-" postfix)))) (rule postfix "8" (seq primary (opt POSTFIX))) (rule primary "9" - (alt HEX SYMBOL RANGE O_RANGE STRING1 STRING2 (seq "(" expression ")"))) + (alt HEX SYMBOL O_RANGE RANGE STRING1 STRING2 (seq "(" expression ")"))) (rule pass "10" (seq "@pass" expression)) (terminals _terminals (seq)) (terminal LHS "11" (seq (opt (seq "[" SYMBOL "]" (plus " "))) SYMBOL (star " ") "::=")) diff --git a/examples/ebnf-ll1-parser/meta.rb b/examples/ebnf-ll1-parser/meta.rb index a0e98c5..2264f33 100644 --- a/examples/ebnf-ll1-parser/meta.rb +++ b/examples/ebnf-ll1-parser/meta.rb @@ -176,8 +176,8 @@ module EBNFParserMeta :alt => [ :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "("], @@ -197,8 +197,8 @@ module EBNFParserMeta :_alt_6 => [ :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "("], @@ -208,8 +208,8 @@ module EBNFParserMeta :diff => [ :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "("], @@ -224,8 +224,8 @@ module EBNFParserMeta :_diff_4 => [ :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "("], @@ -252,8 +252,8 @@ module EBNFParserMeta :expression => [ :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "("], @@ -262,16 +262,16 @@ module EBNFParserMeta :_pass_1 => [ :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "("], :postfix => [ :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "("], @@ -284,8 +284,8 @@ module EBNFParserMeta :primary => [ :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "("], @@ -294,8 +294,8 @@ module EBNFParserMeta :_primary_2 => [ :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "("], @@ -306,16 +306,16 @@ module EBNFParserMeta :_rule_1 => [ :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "("], :seq => [ :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "("], @@ -323,16 +323,16 @@ module EBNFParserMeta :_eps, :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "("], :_seq_2 => [ :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "("], @@ -340,8 +340,8 @@ module EBNFParserMeta :_eps, :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "("], @@ -349,8 +349,8 @@ module EBNFParserMeta :_eps, :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "("], @@ -411,8 +411,8 @@ module EBNFParserMeta :_eof, :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "@terminals", @@ -425,8 +425,8 @@ module EBNFParserMeta :_eof, :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "@terminals", @@ -439,8 +439,8 @@ module EBNFParserMeta :_eof, :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "@terminals", @@ -453,8 +453,8 @@ module EBNFParserMeta :_eof, :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "@terminals", @@ -467,8 +467,8 @@ module EBNFParserMeta :_eof, :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "@terminals", @@ -509,8 +509,8 @@ module EBNFParserMeta :_eof, :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "@terminals", @@ -524,8 +524,8 @@ module EBNFParserMeta :_eof, :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "@terminals", @@ -539,8 +539,8 @@ module EBNFParserMeta :_eof, :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "@terminals", @@ -555,8 +555,8 @@ module EBNFParserMeta :_eof, :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "@terminals", @@ -571,8 +571,8 @@ module EBNFParserMeta :_eof, :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "@terminals", @@ -587,8 +587,8 @@ module EBNFParserMeta :_eof, :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "@terminals", @@ -603,8 +603,8 @@ module EBNFParserMeta :_eof, :HEX, :SYMBOL, - :RANGE, :O_RANGE, + :RANGE, :STRING1, :STRING2, "@terminals", diff --git a/examples/ebnf-peg-parser/meta.rb b/examples/ebnf-peg-parser/meta.rb index 2969f09..2f28b89 100644 --- a/examples/ebnf-peg-parser/meta.rb +++ b/examples/ebnf-peg-parser/meta.rb @@ -16,7 +16,7 @@ module EBNFPegMeta EBNF::Rule.new(:_diff_2, "7.2", [:seq, "-", :postfix]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:postfix, "8", [:seq, :primary, :_postfix_1]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_postfix_1, "8.1", [:opt, :POSTFIX]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:primary, "9", [:alt, :HEX, :SYMBOL, :RANGE, :O_RANGE, :STRING1, :STRING2, :_primary_1]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:primary, "9", [:alt, :HEX, :SYMBOL, :O_RANGE, :RANGE, :STRING1, :STRING2, :_primary_1]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_primary_1, "9.1", [:seq, "(", :expression, ")"]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:pass, "10", [:seq, "@pass", :expression]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule), diff --git a/lib/ebnf/ebnf/meta.rb b/lib/ebnf/ebnf/meta.rb index 03a002c..7492826 100644 --- a/lib/ebnf/ebnf/meta.rb +++ b/lib/ebnf/ebnf/meta.rb @@ -16,9 +16,10 @@ module EBNFMeta EBNF::Rule.new(:_diff_2, "7.2", [:seq, "-", :postfix]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:postfix, "8", [:seq, :primary, :_postfix_1]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_postfix_1, "8.1", [:opt, :POSTFIX]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:primary, "9", [:alt, :HEX, :SYMBOL, :RANGE, :O_RANGE, :STRING1, :STRING2, :_primary_1]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:primary, "9", [:alt, :HEX, :SYMBOL, :O_RANGE, :RANGE, :STRING1, :STRING2, :_primary_1]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_primary_1, "9.1", [:seq, "(", :expression, ")"]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:pass, "10", [:seq, "@pass", :expression]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule), EBNF::Rule.new(:LHS, "11", [:seq, :_LHS_1, :SYMBOL, :_LHS_2, "::="], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_LHS_1, "11.1", [:opt, :_LHS_3], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_LHS_3, "11.3", [:seq, "[", :SYMBOL, "]", :_LHS_4], kind: :terminal).extend(EBNF::PEG::Rule), @@ -60,7 +61,7 @@ module EBNFMeta EBNF::Rule.new(:_CHAR_3, "18.3", [:range, "#xE000-#xFFFD"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_CHAR_4, "18.4", [:range, "#x10000-#x10FFFF"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:R_CHAR, "19", [:diff, :CHAR, :_R_CHAR_1], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_R_CHAR_1, "19.1", [:alt, "]", "-"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_R_CHAR_1, "19.1", [:alt, "]", "-", :HEX], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:POSTFIX, "20", [:range, "?*+"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:PASS, "21", [:alt, :_PASS_1, :_PASS_2, :_PASS_3, :_PASS_4], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_PASS_1, "21.1", [:range, "#x9#xA#xD#x20"], kind: :terminal).extend(EBNF::PEG::Rule), diff --git a/lib/ebnf/terminals.rb b/lib/ebnf/terminals.rb index d35c3e4..24e498b 100644 --- a/lib/ebnf/terminals.rb +++ b/lib/ebnf/terminals.rb @@ -8,7 +8,7 @@ module EBNF::Terminals R_CHAR = %r([\u0009\u000A\u000D\u0020-\u002C\u002E-\u005C\u005E-\uD7FF\u{10000}-\u{10FFFF}])u.freeze RANGE = %r(\[(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX})|#{R_CHAR}|#{HEX})+-?\](?!\s+#{SYMBOL_BASE}\s*::=))u.freeze LHS = %r((?:\[#{SYMBOL_BASE}\])?\s*#{SYMBOL_BASE}\s*::=)u.freeze - O_RANGE = %r(\[^(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX}|#{R_CHAR}|#{HEX}))+-?\])u.freeze + O_RANGE = %r(\[\^(?:(?:#{R_CHAR}\-#{R_CHAR})|(?:#{HEX}\-#{HEX}|#{R_CHAR}|#{HEX}))+-?\])u.freeze STRING1 = %r("[\u0009\u000A\u000D\u0020\u0021\u0023-\uD7FF\u{10000}-\u{10FFFF}]*")u.freeze STRING2 = %r('[\u0009\u000A\u000D\u0020-\u0026\u0028-\uD7FF\u{10000}-\u{10FFFF}]*')u.freeze POSTFIX = %r([?*+])u.freeze diff --git a/spec/rule_spec.rb b/spec/rule_spec.rb index 0bca146..36b2d5e 100644 --- a/spec/rule_spec.rb +++ b/spec/rule_spec.rb @@ -870,7 +870,7 @@ seq: [], diff: [], postfix: [], - primary: [:HEX, :SYMBOL, :RANGE, :O_RANGE, :STRING1, :STRING2, "("], + primary: [:HEX, :SYMBOL, :O_RANGE, :RANGE, :STRING1, :STRING2, "("], pass: ["@pass"], LHS: ["["], SYMBOL: ["a-z", "A-Z", "0-9", "_", "."], @@ -901,7 +901,7 @@ seq: [:diff], diff: [:postfix], postfix: [:primary, :POSTFIX], - primary: [:HEX, :SYMBOL, :RANGE, :O_RANGE, :STRING1, :STRING2, :expression], + primary: [:HEX, :SYMBOL, :O_RANGE, :RANGE, :STRING1, :STRING2, :expression], pass: [:expression], LHS: [:SYMBOL], SYMBOL: [], From fff1b7e3a3d701044be2be3a132683307fac75eb Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Sun, 12 Jul 2020 14:24:29 -0700 Subject: [PATCH 44/50] Minor ABNF tweaks. --- etc/abnf-core.ebnf | 2 +- examples/abnf/Rakefile | 8 +-- examples/abnf/abnf-core.ebnf | 52 -------------- examples/abnf/abnf.abnf | 121 --------------------------------- examples/abnf/abnf.ebnf | 124 ---------------------------------- examples/abnf/abnf.peg.sxp | 8 +-- examples/abnf/abnf.sxp | 7 +- examples/abnf/core.rb | 2 +- examples/abnf/doc/parser.html | 119 ++++++++++++++++++++++++-------- examples/abnf/meta.rb | 6 +- spec/abnf_spec.rb | 8 +++ 11 files changed, 117 insertions(+), 340 deletions(-) delete mode 100644 examples/abnf/abnf-core.ebnf delete mode 100644 examples/abnf/abnf.abnf delete mode 100644 examples/abnf/abnf.ebnf diff --git a/etc/abnf-core.ebnf b/etc/abnf-core.ebnf index 5856444..063cfae 100644 --- a/etc/abnf-core.ebnf +++ b/etc/abnf-core.ebnf @@ -21,7 +21,7 @@ DIGIT ::= [#x30-#x39] DQUOTE ::= #x22 # " (Double Quote) -HEXDIG ::= DIGIT | [A-F] +HEXDIG ::= DIGIT | [A-F] # [0-9A-F] HTAB ::= #x09 # horizontal tab diff --git a/examples/abnf/Rakefile b/examples/abnf/Rakefile index fb20295..0d41f50 100644 --- a/examples/abnf/Rakefile +++ b/examples/abnf/Rakefile @@ -1,6 +1,6 @@ task default: ['abnf.sxp', 'abnf.peg.sxp', "meta.rb", "core.rb", :doc] -file "meta.rb" => "abnf.ebnf" do |t| +file "meta.rb" => "../../etc/abnf.ebnf" do |t| sh %{ ebnf --peg --format rb \ --mod-name ABNFMeta \ @@ -9,7 +9,7 @@ file "meta.rb" => "abnf.ebnf" do |t| } end -file "core.rb" => "abnf-core.ebnf" do |t| +file "core.rb" => "../../etc/abnf-core.ebnf" do |t| sh %{ ebnf --format rb \ --mod-name ABNFCore \ @@ -18,13 +18,13 @@ file "core.rb" => "abnf-core.ebnf" do |t| } end -file 'abnf.sxp' => "abnf.ebnf" do |t| +file 'abnf.sxp' => "../../etc/abnf.ebnf" do |t| sh %{ ebnf --output abnf.sxp #{t.prerequisites.first} } end -file 'abnf.peg.sxp' => "abnf.ebnf" do |t| +file 'abnf.peg.sxp' => "../../etc/abnf.ebnf" do |t| sh %{ ebnf --peg --output abnf.peg.sxp #{t.prerequisites.first} } diff --git a/examples/abnf/abnf-core.ebnf b/examples/abnf/abnf-core.ebnf deleted file mode 100644 index 5856444..0000000 --- a/examples/abnf/abnf-core.ebnf +++ /dev/null @@ -1,52 +0,0 @@ -# Core terminals available in uses of ABNF -ALPHA ::= [#x41-#x5A#x61-#x7A] # A-Z | a-z - -BIT ::= '0' | '1' - -CHAR ::= [#x01-#x7F] - # any 7-bit US-ASCII character, - # excluding NUL -CR ::= #x0D - # carriage return - -CRLF ::= CR? LF - # Internet standard newline - -CTL ::= [#x00-#x1F] | #x7F - # controls - -DIGIT ::= [#x30-#x39] - # 0-9 - -DQUOTE ::= #x22 - # " (Double Quote) - -HEXDIG ::= DIGIT | [A-F] - -HTAB ::= #x09 - # horizontal tab - -LF ::= #x0A - # linefeed - -LWSP ::= (WSP | CRLF WSP)* - # Use of this linear-white-space rule - # permits lines containing only white - # space that are no longer legal in - # mail headers and have caused - # interoperability problems in other - # contexts. - # Do not use when defining mail - # headers and use with caution in - # other contexts. - -OCTET ::= [#x00-#xFF] - # 8 bits of data - -SP ::= #x20 - -VCHAR ::= [#x21-#x7E] - # visible (printing) characters - -WSP ::= SP | HTAB - # white space diff --git a/examples/abnf/abnf.abnf b/examples/abnf/abnf.abnf deleted file mode 100644 index 9acd3fb..0000000 --- a/examples/abnf/abnf.abnf +++ /dev/null @@ -1,121 +0,0 @@ -rulelist = 1*( rule / (*c-wsp c-nl) ) - -rule = rulename defined-as elements c-nl - ; continues if next line starts - ; with white space - -rulename = ALPHA *(ALPHA / DIGIT / "-") - -defined-as = *c-wsp ("=" / "=/") *c-wsp - ; basic rules definition and - ; incremental alternatives - -elements = alternation *c-wsp - -c-wsp = WSP / (c-nl WSP) - -c-nl = comment / CRLF - ; comment or newline - -comment = ";" *(WSP / VCHAR) CRLF - -alternation = concatenation - *(*c-wsp "/" *c-wsp concatenation) - -concatenation = repetition *(1*c-wsp repetition) - -repetition = [repeat] element - -repeat = (*DIGIT "*" *DIGIT) / 1*DIGIT - -element = rulename / group / option / - char-val / num-val / prose-val - -group = "(" *c-wsp alternation *c-wsp ")" - -option = "[" *c-wsp alternation *c-wsp "]" - -char-val = case-insensitive-string / - case-sensitive-string - -case-insensitive-string = - [ "%i" ] quoted-string - -case-sensitive-string = - "%s" quoted-string - -quoted-string = DQUOTE *(%x20-21 / %x23-7E) DQUOTE - ; quoted string of SP and VCHAR - ; without DQUOTE - -num-val = "%" (bin-val / dec-val / hex-val) - -bin-val = "b" 1*BIT - [ 1*("." 1*BIT) / ("-" 1*BIT) ] - ; series of concatenated bit values - ; or single ONEOF range - -dec-val = "d" 1*DIGIT - [ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ] - -hex-val = "x" 1*HEXDIG - [ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ] - -prose-val = "<" *(%x20-3D / %x3F-7E) ">" - ; bracketed string of SP and VCHAR - ; without angles - ; prose description, to be used as - ; last resort - -ALPHA = %x41-5A / %x61-7A ; A-Z / a-z - -BIT = "0" / "1" - -CHAR = %x01-7F - ; any 7-bit US-ASCII character, - ; excluding NUL -CR = %x0D - ; carriage return - -CRLF = [CR] LF - ; Internet standard newline - ; Extended to allow only newline - -CTL = %x00-1F / %x7F - ; controls - -DIGIT = %x30-39 - ; 0-9 - -DQUOTE = %x22 - ; " (Double Quote) - -HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" - -HTAB = %x09 - ; horizontal tab - -LF = %x0A - ; linefeed - -LWSP = *(WSP / CRLF WSP) - ; Use of this linear-white-space rule - ; permits lines containing only white - ; space that are no longer legal in - ; mail headers and have caused - ; interoperability problems in other - ; contexts. - ; Do not use when defining mail - ; headers and use with caution in - ; other contexts. - -OCTET = %x00-FF - ; 8 bits of data - -SP = %x20 - -VCHAR = %x21-7E - ; visible (printing) characters - -WSP = SP / HTAB - ; white space diff --git a/examples/abnf/abnf.ebnf b/examples/abnf/abnf.ebnf deleted file mode 100644 index 6e8d708..0000000 --- a/examples/abnf/abnf.ebnf +++ /dev/null @@ -1,124 +0,0 @@ -rulelist ::= ( rule | (c_wsp* c_nl) )+ - -rule ::= rulename defined_as elements c_nl - # continues if next line starts - # with white space - -elements ::= alternation c_wsp* - -alternation ::= concatenation - (c_wsp* "/" c_wsp* concatenation)* - -concatenation::= repetition (c_wsp+ repetition)* - -repetition ::= repeat? element - -repeat ::= (DIGIT* "*" DIGIT*) | DIGIT+ - -element ::= rulename | group | option | - char_val | num_val | prose_val - -group ::= "(" c_wsp* alternation c_wsp* ")" - -option ::= "[" c_wsp* alternation c_wsp* "]" - -char_val ::= case_insensitive_string | - case_sensitive_string - -case_insensitive_string ::= - "%i"? quoted_string - -case_sensitive_string ::= - "%s" quoted_string - -num_val ::= "%" (bin_val | dec_val | hex_val) - -@terminals - -# Terminals used in ABNF, itself -rulename ::= ALPHA (ALPHA | DIGIT | "-")* - -defined_as ::= c_wsp* ("=" | "=/") c_wsp* - # basic rules definition and - # incremental alternatives - -c_wsp ::= WSP | (c_nl WSP) - -c_nl ::= COMMENT | CRLF - # comment or newline - -comment ::= ";" (WSP | VCHAR)* CRLF - -quoted_string::= DQUOTE [#x20-#x21#x23-#x7E]* DQUOTE - # quoted string of SP and VCHAR - # without DQUOTE - -bin_val ::= "b" BIT+ - (("." BIT+)+ | ("-" BIT+))? - # series of concatenated bit values - # or single ONEOF range - -dec_val ::= "d" DIGIT+ - (("." DIGIT+)+ | ("-" DIGIT+))? - -hex_val ::= "x" HEXDIG+ - (("." HEXDIG+)+ | ("-" HEXDIG+))? - -prose_val ::= "<" [#x20-#x3D#x3F-#x7E]* ">" - # bracketed string of SP and VCHAR - # without angles - # prose description, to be used as - # last resort - -# Core terminals available in uses of ABNF -ALPHA ::= [#x41-#x5A#x61-#x7A] # A-Z | a-z - -BIT ::= '0' | '1' - -CHAR ::= [#x01-#x7F] - # any 7-bit US-ASCII character, - # excluding NUL -CR ::= #x0D - # carriage return - -CRLF ::= CR? LF - # Internet standard newline - -CTL ::= [#x00-#x1F] | #x7F - # controls - -DIGIT ::= [#x30-#x39] - # 0-9 - -DQUOTE ::= #x22 - # " (Double Quote) - -HEXDIG ::= DIGIT | "A" | "B" | "C" | "D" | "E" | "F" - -HTAB ::= #x09 - # horizontal tab - -LF ::= #x0A - # linefeed - -LWSP ::= (WSP | CRLF WSP)* - # Use of this linear-white-space rule - # permits lines containing only white - # space that are no longer legal in - # mail headers and have caused - # interoperability problems in other - # contexts. - # Do not use when defining mail - # headers and use with caution in - # other contexts. - -OCTET ::= [#x00-#xFF] - # 8 bits of data - -SP ::= #x20 - -VCHAR ::= [#x21-#x7E] - # visible (printing) characters - -WSP ::= SP | HTAB - # white space diff --git a/examples/abnf/abnf.peg.sxp b/examples/abnf/abnf.peg.sxp index 3373889..ee554f3 100644 --- a/examples/abnf/abnf.peg.sxp +++ b/examples/abnf/abnf.peg.sxp @@ -1,5 +1,5 @@ ( - (terminal WSP (alt SP HTAB)) + (rule rulelist (plus _rulelist_1)) (rule _rulelist_1 (alt rule _rulelist_2)) (rule _rulelist_2 (seq _rulelist_3 c_nl)) (rule _rulelist_3 (star c_wsp)) @@ -35,6 +35,7 @@ (rule case_sensitive_string (seq "%s" quoted_string)) (rule num_val (seq "%" _num_val_1)) (rule _num_val_1 (alt bin_val dec_val hex_val)) + (terminals _terminals (seq)) (terminal rulename (seq ALPHA _rulename_1)) (rule _rulename_1 (star _rulename_2)) (rule _rulename_2 (alt ALPHA DIGIT "-")) @@ -92,8 +93,7 @@ (terminal _CTL_2 (hex "#x7F")) (terminal DIGIT (range "#x30-#x39")) (terminal DQUOTE (hex "#x22")) - (terminal HEXDIG (alt DIGIT _HEXDIG_1)) - (terminal _HEXDIG_1 (range "A-F")) + (terminal HEXDIG (alt DIGIT "A" "B" "C" "D" "E" "F")) (terminal HTAB (hex "#x09")) (terminal LF (hex "#x0A")) (terminal LWSP (star _LWSP_1)) @@ -102,4 +102,4 @@ (terminal OCTET (range "#x00-#xFF")) (terminal SP (hex "#x20")) (terminal VCHAR (range "#x21-#x7E")) - (rule rulelist (plus _rulelist_1))) + (terminal WSP (alt SP HTAB))) diff --git a/examples/abnf/abnf.sxp b/examples/abnf/abnf.sxp index 4b96a3e..b7aa3d9 100644 --- a/examples/abnf/abnf.sxp +++ b/examples/abnf/abnf.sxp @@ -1,5 +1,5 @@ ( - (terminal WSP (alt SP HTAB)) + (rule rulelist (plus (alt rule (seq (star c_wsp) c_nl)))) (rule rule (seq rulename defined_as elements c_nl)) (rule elements (seq alternation (star c_wsp))) (rule alternation @@ -14,6 +14,7 @@ (rule case_insensitive_string (seq (opt "%i") quoted_string)) (rule case_sensitive_string (seq "%s" quoted_string)) (rule num_val (seq "%" (alt bin_val dec_val hex_val))) + (terminals _terminals (seq)) (terminal rulename (seq ALPHA (star (alt ALPHA DIGIT "-")))) (terminal defined_as (seq (star c_wsp) (alt "=" "=/") (star c_wsp))) (terminal c_wsp (alt WSP (seq c_nl WSP))) @@ -34,11 +35,11 @@ (terminal CTL (alt (range "#x00-#x1F") (hex "#x7F"))) (terminal DIGIT (range "#x30-#x39")) (terminal DQUOTE (hex "#x22")) - (terminal HEXDIG (alt DIGIT (range "A-F"))) + (terminal HEXDIG (alt DIGIT "A" "B" "C" "D" "E" "F")) (terminal HTAB (hex "#x09")) (terminal LF (hex "#x0A")) (terminal LWSP (star (alt WSP (seq CRLF WSP)))) (terminal OCTET (range "#x00-#xFF")) (terminal SP (hex "#x20")) (terminal VCHAR (range "#x21-#x7E")) - (rule rulelist (plus (alt rule (seq (star c_wsp) c_nl))))) + (terminal WSP (alt SP HTAB))) diff --git a/examples/abnf/core.rb b/examples/abnf/core.rb index d4e73d0..25590ba 100644 --- a/examples/abnf/core.rb +++ b/examples/abnf/core.rb @@ -1,5 +1,5 @@ # This file is automatically generated by ebnf version 2.0.0 -# Derived from abnf-core.ebnf +# Derived from ../../etc/abnf-core.ebnf module ABNFCore RULES = [ EBNF::Rule.new(:ALPHA, nil, [:range, "#x41-#x5A#x61-#x7A"], kind: :terminal), diff --git a/examples/abnf/doc/parser.html b/examples/abnf/doc/parser.html index 5c26cae..2aa0d98 100644 --- a/examples/abnf/doc/parser.html +++ b/examples/abnf/doc/parser.html @@ -578,10 +578,10 @@

EBNF Parser for EBNF.

-

Interpret segments in binary creating a string

+

Interpret segments in binary creating a sequence of hex characters or a string

-
      value[1..-1].split('.').map {|b| b.to_i(base=2).chr}.join("")
+        
      hex_or_string(value[1..-1].split('.').map {|b| b.to_i(base=2).chr(Encoding::UTF_8)})
     elsif value.include?('-')
@@ -627,10 +627,10 @@

EBNF Parser for EBNF.

-

Interpret segments in decimal creating a string

+

Interpret segments in decimal creating a sequence of hex characters or a string

-
      value[1..-1].split('.').map {|d| d.to_i.chr}.join("")
+        
      hex_or_string(value[1..-1].split('.').map {|b| b.to_i.chr(Encoding::UTF_8)})
     elsif value.include?('-')
@@ -667,7 +667,7 @@

EBNF Parser for EBNF.

hex_val ::= "x" HEXDIG+ (("." HEXDIG+)+ | ("-" HEXDIG+))?

-
  terminal(:hex_val, /x[0-9A-F]+(?:(?:(?:\.[0-9A-F]+)+)|(?:-[0-9A-F]+))?/) do |value|
+        
  terminal(:hex_val, /x[0-9A-F]+(?:(?:(?:\.[0-9A-F]+)+)|(?:-[0-9A-F]+))?/i) do |value|
     if value.include?('.')
@@ -676,10 +676,10 @@

EBNF Parser for EBNF.

-

Interpret segments in hexadecimal creating a string

+

Interpret segments in hexadecimal creating a sequence of hex characters or a string

-
      value[1..-1].split('.').map {|h| h.to_i(base=16).chr}.join("")
+        
      hex_or_string(value[1..-1].split('.').map {|b| b.to_i(base=16).chr(Encoding::UTF_8)})
     elsif value.include?('-')
@@ -838,6 +838,7 @@

Non-terminal productions

      raise "Redefining rule #{sym}" if parsed_rules.has_key?(sym)
       parsed_rules[sym] = EBNF::Rule.new(sym.to_sym, nil, elements)
     end
+    progress(:rule, level: 2) {parsed_rules[sym].to_sxp}
     sym
   end
@@ -1000,10 +1001,12 @@

Non-terminal productions

-

char_val ::= case_insensitive_string | case_sensitive_string

+

case_insensitive_string ::= "%i"? quoted_string

-
  production(:char_val) do |value|
+
  production(:case_insensitive_string) do |value|
+    str = value.last[:quoted_string]
+    if str.match?(/[[:alpha:]]/)
@@ -1011,10 +1014,13 @@

Non-terminal productions

-

FIXME: need rule logic for insensitive matching of strings

+

Only need to use case-insensitive if there are alphabetic characters in the string.

-
    value.last[:quoted_string]
+        
      [:istr, value.last[:quoted_string]]
+    else
+      value.last[:quoted_string]
+    end
   end
@@ -1023,6 +1029,19 @@

Non-terminal productions

+

case_sensitive_string ::= "%s" quoted_string

+ + +
  production(:case_sensitive_string) do |value|
+    value.last[:quoted_string]
+  end
+ + + + +
+ +

num_val ::= "%" (bin_val | dec_val | hex_val)

@@ -1050,10 +1069,10 @@

Parser invocation.

  def initialize(input, **options, &block)
- +
- +

If the level option is set, instantiate a logger for collecting trace information.

@@ -1065,10 +1084,10 @@

Parser invocation.

end
- +
- +

Read input, if necessary, which will be used in a Scanner.

@@ -1078,10 +1097,10 @@

Parser invocation.

@parsed_rules = {}
- +
- +

Parses into @parsed_rules

@@ -1094,10 +1113,10 @@

Parser invocation.

end
- +
- +

The AST includes the parsed rules along with built-in rules for ABNF used within the parsed grammar.

@@ -1107,27 +1126,27 @@

Parser invocation.

  def ast
- +
- +

Add built-in rules for standard ABNF rules not

    parsed_rules.values.map(&:symbols).flatten.uniq.each do |sym|
       rule = ABNFCore::RULES.detect {|r| r.sym == sym}
-      parsed_rules[sym] ||= rule
+      parsed_rules[sym] ||= rule if rule
     end
 
     parsed_rules.values
   end
- +
- +

Output formatted S-Expression of grammar

@@ -1136,17 +1155,63 @@

Parser invocation.

require 'sxp' unless defined?(SXP)
- +
- +

Output rules as a formatted S-Expression

-
    SXP::Generator.string(ast.map(&:for_sxp))
   end
+
+private
+ + + + +
+ +
+

Generate a combination of seq and string to represent a sequence of characters

+ +

@param [Array] characters +@return [String,Array]

+ + +
  def hex_or_string(characters)
+    seq = [:seq]
+    str_result = ""
+    characters.each do |c|
+      if VCHAR.match?(c)
+        str_result << c
+      else
+        if str_result.length > 0
+          seq << str_result
+          str_result = ""
+        end
+        seq << [:hex, "#x%x" % c.hex]
+      end
+    end
+    seq << str_result if str_result.length > 0
+ + + + +
+ +
+

Either return the sequence, or a string

+ + + +
    if seq.length == 2 && seq.last.is_a?(String)
+      seq.last
+    else
+      seq
+    end
+  end
 end
diff --git a/examples/abnf/meta.rb b/examples/abnf/meta.rb index b5ce638..d0eb78d 100644 --- a/examples/abnf/meta.rb +++ b/examples/abnf/meta.rb @@ -1,5 +1,5 @@ # This file is automatically generated by ebnf version 2.0.0 -# Derived from abnf.ebnf +# Derived from ../../etc/abnf.ebnf module ABNFMeta RULES = [ EBNF::Rule.new(:rulelist, nil, [:plus, :_rulelist_1]).extend(EBNF::PEG::Rule), @@ -38,6 +38,7 @@ module ABNFMeta EBNF::Rule.new(:case_sensitive_string, nil, [:seq, "%s", :quoted_string]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:num_val, nil, [:seq, "%", :_num_val_1]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_num_val_1, nil, [:alt, :bin_val, :dec_val, :hex_val]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule), EBNF::Rule.new(:rulename, nil, [:seq, :ALPHA, :_rulename_1], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_rulename_1, nil, [:star, :_rulename_2]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_rulename_2, nil, [:alt, :ALPHA, :DIGIT, "-"]).extend(EBNF::PEG::Rule), @@ -95,8 +96,7 @@ module ABNFMeta EBNF::Rule.new(:_CTL_2, nil, [:hex, "#x7F"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:DIGIT, nil, [:range, "#x30-#x39"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:DQUOTE, nil, [:hex, "#x22"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:HEXDIG, nil, [:alt, :DIGIT, :_HEXDIG_1], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_HEXDIG_1, nil, [:range, "A-F"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:HEXDIG, nil, [:alt, :DIGIT, "A", "B", "C", "D", "E", "F"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:HTAB, nil, [:hex, "#x09"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:LF, nil, [:hex, "#x0A"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:LWSP, nil, [:star, :_LWSP_1], kind: :terminal).extend(EBNF::PEG::Rule), diff --git a/spec/abnf_spec.rb b/spec/abnf_spec.rb index c10f883..91eae38 100644 --- a/spec/abnf_spec.rb +++ b/spec/abnf_spec.rb @@ -32,6 +32,10 @@ "bin = %b1100010.1.1101110", %{((rule bin (seq "b" (hex "#x1") "n")))} ], + "binary range": [ + "bin = %b1100010-1101110", + %{((terminal bin (range "#x62-#x6e")))} + ], "decimal char": [ "dec = %d22", %{((terminal dec (hex "#x16")))} @@ -76,6 +80,10 @@ %(baz = foo / bar), %{((rule baz (alt foo bar)))} ], + "aliteration 2": [ + %(buzz = foo / bar / baz), + %{((rule buzz (alt foo bar baz)))} + ], "incremental alternatives": [ %(ruleset = alt1 / alt2\nruleset =/ alt3\nruleset =/ alt4 / alt5), %{((rule ruleset (alt alt1 alt2 alt3 alt4 alt5)))} From 3ffbca933a79afa70b77bc2a28fd9ddbba8621f7 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Sun, 12 Jul 2020 14:25:02 -0700 Subject: [PATCH 45/50] Fix isoebnf term production. --- examples/isoebnf/Rakefile | 6 +- examples/isoebnf/doc/parser.html | 26 ++-- examples/isoebnf/examples/iso-ebnf.isoebnf | 123 ------------------- examples/isoebnf/iso-ebnf.ebnf | 136 --------------------- examples/isoebnf/iso-ebnf.peg.sxp | 35 +++--- examples/isoebnf/iso-ebnf.sxp | 29 +++-- examples/isoebnf/meta.rb | 32 ++--- examples/isoebnf/parser.rb | 16 +-- lib/ebnf/isoebnf.rb | 8 +- lib/ebnf/isoebnf/meta.rb | 27 ++-- spec/isoebnf_spec.rb | 8 +- 11 files changed, 102 insertions(+), 344 deletions(-) delete mode 100644 examples/isoebnf/examples/iso-ebnf.isoebnf delete mode 100644 examples/isoebnf/iso-ebnf.ebnf diff --git a/examples/isoebnf/Rakefile b/examples/isoebnf/Rakefile index 764d441..ee6b779 100644 --- a/examples/isoebnf/Rakefile +++ b/examples/isoebnf/Rakefile @@ -3,7 +3,7 @@ task default: ['iso-ebnf.sxp', 'iso-ebnf.peg.sxp', :meta, :doc] desc 'Build rules table' task meta: "meta.rb" -file "meta.rb" => "iso-ebnf.ebnf" do |t| +file "meta.rb" => "../../etc/iso-ebnf.ebnf" do |t| sh %{ ebnf --peg --format rb \ --mod-name ISOEBNFMeta \ @@ -12,13 +12,13 @@ file "meta.rb" => "iso-ebnf.ebnf" do |t| } end -file 'iso-ebnf.sxp' => "iso-ebnf.ebnf" do |t| +file 'iso-ebnf.sxp' => "../../etc/iso-ebnf.ebnf" do |t| sh %{ ebnf --output iso-ebnf.sxp #{t.prerequisites.first} } end -file 'iso-ebnf.peg.sxp' => "iso-ebnf.ebnf" do |t| +file 'iso-ebnf.peg.sxp' => "../../etc/iso-ebnf.ebnf" do |t| sh %{ ebnf --peg --output iso-ebnf.peg.sxp #{t.prerequisites.first} } diff --git a/examples/isoebnf/doc/parser.html b/examples/isoebnf/doc/parser.html index acc74f0..1186a99 100644 --- a/examples/isoebnf/doc/parser.html +++ b/examples/isoebnf/doc/parser.html @@ -461,12 +461,12 @@ - +
- +
-

EBNF Parser for EBNF.

+

EBNF Parser for EISO BNF.

Produces an Abstract Synatx Tree in S-Expression form for the input grammar file

@@ -640,7 +640,7 @@

EBNF Parser for EBNF.

[25] empty ::= ''

-
  terminal(:start_option_symbol, /\[|\(\//)
+
  terminal(:start_option_symbol, /\[|(?:\(\/)/)
@@ -651,7 +651,7 @@

EBNF Parser for EBNF.

[26] definition_separator_symbol ::= '|' | '/' | '!'

-
  terminal(:end_option_symbol, /[\]\/]/)
+
  terminal(:end_option_symbol, /\]/)
@@ -670,7 +670,7 @@

EBNF Parser for EBNF.

-

[28] start_option_symbol ::= '[' | '(/'

+

`[28] startoptionsymbol ::= '['

  terminal(:end_repeat_symbol, /}|:\)/)
@@ -681,7 +681,7 @@

EBNF Parser for EBNF.

-

[29] end_option_symbol ::= ']' | '/)'

+

[29] end_option_symbol ::= ']'

@@ -774,8 +774,8 @@

Non-terminal productions

  start_production(:term, as_hash: true)
   production(:term) do |value|
-    if value[:_diff_1]
-      [:diff, value[:postfix], value[:_term_1]]
+    if value[:_term_1]
+      [:diff, value[:factor], value[:_term_1]]
     else
       value[:factor]
     end
@@ -899,9 +899,11 @@ 

Non-terminal productions

parsing_terminals = false @ast = [] - parse(@input, :syntax, ISOEBNFMeta::RULES, - whitespace: %r{([\x09-\x0d\x20]|(?:\(\*(?:(?:\*[^\)])|[^*])*\*\)))+}, - **options + parse(@input, + :syntax, + ISOEBNFMeta::RULES, + whitespace: %r{([\x09-\x0d\x20]|(?:\(\*(?:(?:\*[^\)])|[^*])*\*\)))+}, + **options ) do |context, *data| rule = case context when :rule
diff --git a/examples/isoebnf/examples/iso-ebnf.isoebnf b/examples/isoebnf/examples/iso-ebnf.isoebnf deleted file mode 100644 index 8bcda08..0000000 --- a/examples/isoebnf/examples/iso-ebnf.isoebnf +++ /dev/null @@ -1,123 +0,0 @@ -(* W3C EBNF for ISO/IEC 14977 : 1996 EBNF *) -(* Scoured from https://www.cl.cam.ac.uk/~mgk25/iso-14977.pdf *) - -syntax = syntax_rule, {syntax_rule} ; - -syntax_rule = meta_identifier, defining_symbol, definitions_list, terminator_symbol - (* A defines the sequences of - symbols represented by a *); - -definitions_list = single_definition, {definition_separator_symbol, definitions_list} - (* | separates alternative *); - -single_definition = term, {',', term} - (* , separates successive *); - -term = factor, ['-', exception] - (* A represents any sequence of symbols that is defined by the but - not defined by the *); - -exception = factor - (* A may be used as an - if it could be replaced by a - containingno *); - -factor = [integer, '*'], primary - (* The specifies the number of repetitions of the *); - -primary = optional_sequence - | repeated_sequence - | special_sequence - | grouped_sequence - | meta_identifier - | terminal_string - | empty - ; - -optional_sequence = start_option_symbol, definitions_list, end_option_symbol - (* The brackets [ and ] enclose symbols which are optional *); - -repeated_sequence = start_repeat_symbol, definitions_list, end_repeat_symbol - (* The brackets { and } enclose symbols - which may be repeated any number of times *); - -grouped_sequence = '(', definitions_list, ')' - (* The brackets ( and ) allow any to be a *); - -terminal_string = ("'", first_terminal_character, {first_terminal_character}, "'") - | ('"', second_terminal_character, {second_terminal_character}, '"') - (* A represents the - between the quote symbols '_' or "_" *); - -meta_identifier = letter, {meta_identifier_character} - (* A is the name of a syntactic element of the language being defined *); - -integer = decimal_digit, {decimal_digit} ; - -special_sequence = '?', {special_sequence_character}, '?' - (* The meaning of a is not defined in the standard metalanguage. *); - -comment = '(*', {comment_symbol}, '*)' - (* A comment is allowed anywhere outside a - , , - or *); - -comment_symbol = comment | terminal_string | special_sequence | character; - -letter = "A" | "B" | "C" | "D" | "E" | "F" | "G" - | "H" | "I" | "J" | "K" | "L" | "M" | "N" - | "O" | "P" | "Q" | "R" | "S" | "T" | "U" - | "V" | "W" | "X" | "Y" | "Z" | "a" | "b" - | "c" | "d" | "e" | "f" | "g" | "h" | "i" - | "j" | "k" | "l" | "m" | "n" | "o" | "p" - | "q" | "r" | "s" | "t" | "u" | "v" | "w" - | "x" | "y" | "z" - ; - -decimal_digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; - -(* Extended to allow '_' *) -meta_identifier_character = letter | decimal_digit | '_' ; - -first_terminal_character = terminal_character - "'" ; - -second_terminal_character = terminal_character - '"' ; - -special_sequence_character = terminal_character - '?' ; - -terminal_character = letter - | decimal_digit - | concatenate_symbol - | defining_symbol - | definition_separator_symbol - | end_comment_symbol - | end_group_symbol - | end_option_symbol - | end_repeat_symbol - | except_symbol - | first_quote_symbol - | repetition_symbol - | second_quote_symbol - | special_sequence_symbol - | start_comment_symbol - | start_group_symbol - | start_option_symbol - | start_repeat_symbol - | terminator_symbol - | other_character - ; - -other_character = ' ' | ':' | '+' | '_' | '%' | '@' | '&' - | '#' | '$' | '<' | '>' | '\' | '^' | '`' - | '~' ; - -empty = ; - -(* Simple terminals that are often extended *) -defining_symbol = '=' | ':' ; -definition_separator_symbol = '|' | '/' | '!' ; -terminator_symbol = ';' | '.' ; -start_option_symbol = '[' | '(/' ; -end_option_symbol = ']' | '/)' ; -start_repeat_symbol = '{' | '(:' ; -end_repeat_symbol = '}' | ':)' ; diff --git a/examples/isoebnf/iso-ebnf.ebnf b/examples/isoebnf/iso-ebnf.ebnf deleted file mode 100644 index 05d6481..0000000 --- a/examples/isoebnf/iso-ebnf.ebnf +++ /dev/null @@ -1,136 +0,0 @@ -# W3C EBNF for ISO/IEC 14977 : 1996 EBNF -# (Scoured from https://www.cl.cam.ac.uk/~mgk25/iso-14977.pdf) - -# Extended to allow no syntax_rule to be valid. -syntax ::= syntax_rule* - -syntax_rule ::= meta_identifier defining_symbol definitions_list terminator_symbol - (* A defines the sequences of - symbols represented by a *) - -definitions_list ::= single_definition (definition_separator_symbol definitions_list)* - (* | separates alternative *) - -single_definition ::= term (',' term)* - (* , separates successive *) - -term ::= factor ('-' exception)? - (* A represents any sequence of symbols that is defined by the but - not defined by the *) - -exception ::= factor - (* A may be used as an - if it could be replaced by a - containingno *) - -factor ::= (integer '*')? primary - (* The specifies the number of repetitions of the *) - -primary ::= optional_sequence - | repeated_sequence - | special_sequence - | grouped_sequence - | meta_identifier - | terminal_string - | empty - -optional_sequence ::= start_option_symbol definitions_list end_option_symbol - (* The brackets [ and ] enclose symbols which are optional *) - -repeated_sequence ::= start_repeat_symbol definitions_list end_repeat_symbol - (* The brackets { and } enclose symbols - which may be repeated any number of times *) - -grouped_sequence ::= '(' definitions_list ')' - (* The brackets ( and ) allow any to be a *) - -# Note, the following are nominally terminal rules, -# although ISO EBNF does not really distinguish between non-terminal and terminal rules. - -@terminals - -terminal_string ::= ("'" first_terminal_character+ "'") - | ('"' second_terminal_character+ '"') - (* A represents the - between the quote symbols '_' or "_" *) - -meta_identifier ::= letter meta_identifier_character* - (* A is the name of a syntactic element of the language being defined *) - -integer ::= decimal_digit+ - -special_sequence ::= '?' special_sequence_character* '?' - (* The meaning of a is not defined in the standard metalanguage. *) - -comment ::= start_comment_symbol comment_symbol* end_comment_symbol - (* A comment is allowed anywhere outside a - , , - or *) - -comment_symbol ::= comment | terminal_string | special_sequence | character - -letter ::= [a-zA-Z] -decimal_digit ::= [0-9] - -# Extended to allow '_' -meta_identifier_character ::= letter | decimal_digit | '_' - -first_terminal_character ::= terminal_character - "'" - -second_terminal_character ::= terminal_character - '"' - -special_sequence_character ::= terminal_character - '?' - -terminal_character ::= letter - | decimal_digit - | concatenate_symbol - | defining_symbol - | definition_separator_symbol - | end_comment_symbol - | end_group_symbol - | end_option_symbol - | end_repeat_symbol - | except_symbol - | first_quote_symbol - | repetition_symbol - | second_quote_symbol - | special_sequence_symbol - | start_comment_symbol - | start_group_symbol - | start_option_symbol - | start_repeat_symbol - | terminator_symbol - | other_character - -other_character ::= [:+_%@&$<>^` ̃#x20#x23] | '\' - -gap_separator ::= [#x9#xa#xb#xc#xd#x20] - -@pass gap_separator+ | comment - -empty ::= '' - -# Simple terminals that are often extended -defining_symbol ::= '=' | ':' -definition_separator_symbol ::= '|' | '/' | '!' -terminator_symbol ::= ';' | '.' -start_option_symbol ::= '[' | '(/' -end_option_symbol ::= ']' | '/)' -start_repeat_symbol ::= '{' | '(:' -end_repeat_symbol ::= '}' | ':)' - -# Symbols described, but not actually used. - -gap_free_symbol ::= (terminal_character - ['"]) - | terminal_string - -repetition_symbol ::= '*' -except_symbol ::= '-' -concatenate_symbol ::= ',' -first_quote_symbol ::= "'" -second_quote_symbol ::= '"' -start_comment_symbol ::= '(*' -end_comment_symbol ::= '*)' -start_group_symbol ::= '(' -end_group_symbol ::= ')' -special_sequence_symbol ::= '?' diff --git a/examples/isoebnf/iso-ebnf.peg.sxp b/examples/isoebnf/iso-ebnf.peg.sxp index 691c9ba..e82bc11 100644 --- a/examples/isoebnf/iso-ebnf.peg.sxp +++ b/examples/isoebnf/iso-ebnf.peg.sxp @@ -1,5 +1,5 @@ ( - (terminal special_sequence_symbol (seq "?")) + (rule syntax (star syntax_rule)) (rule syntax_rule (seq meta_identifier defining_symbol definitions_list terminator_symbol)) (rule definitions_list (seq single_definition _definitions_list_1)) @@ -23,6 +23,7 @@ (rule repeated_sequence (seq start_repeat_symbol definitions_list end_repeat_symbol)) (rule grouped_sequence (seq "(" definitions_list ")")) + (terminals _terminals (seq)) (terminal terminal_string (alt _terminal_string_1 _terminal_string_2)) (rule _terminal_string_1 (seq "'" _terminal_string_3 "'")) (rule _terminal_string_3 (plus first_terminal_character)) @@ -35,7 +36,9 @@ (rule _special_sequence_1 (star special_sequence_character)) (terminal comment (seq start_comment_symbol _comment_1 end_comment_symbol)) (rule _comment_1 (star comment_symbol)) - (terminal comment_symbol (alt comment terminal_string special_sequence character)) + (terminal comment_symbol (alt comment commentless_symbol other_character)) + (terminal commentless_symbol + (alt terminal_character meta_identifier integer terminal_string special_sequence)) (terminal letter (range "a-zA-Z")) (terminal decimal_digit (range "0-9")) (terminal meta_identifier_character (alt letter decimal_digit "_")) @@ -54,25 +57,25 @@ (terminal gap_separator (range "#x9#xa#xb#xc#xd#x20")) (pass _pass (alt __pass_1 comment)) (rule __pass_1 (plus gap_separator)) - (terminal empty (seq ())) - (terminal defining_symbol (alt "=" ":")) - (terminal definition_separator_symbol (alt "|" "/" "!")) - (terminal terminator_symbol (alt ";" ".")) - (terminal start_option_symbol (alt "[" "(/")) - (terminal end_option_symbol (alt "]" "/)")) - (terminal start_repeat_symbol (alt "{" "(:")) - (terminal end_repeat_symbol (alt "}" ":)")) - (terminal gap_free_symbol (alt _gap_free_symbol_1 terminal_string)) - (rule _gap_free_symbol_1 (seq _gap_free_symbol_3 terminal_character)) - (rule _gap_free_symbol_3 (not _gap_free_symbol_2)) - (terminal _gap_free_symbol_2 (range "'\"")) + (terminal empty (seq "")) + (terminal concatenate_symbol (seq ",")) (terminal repetition_symbol (seq "*")) (terminal except_symbol (seq "-")) - (terminal concatenate_symbol (seq ",")) (terminal first_quote_symbol (seq "'")) (terminal second_quote_symbol (seq "\"")) (terminal start_comment_symbol (seq "(*")) (terminal end_comment_symbol (seq "*)")) (terminal start_group_symbol (seq "(")) (terminal end_group_symbol (seq ")")) - (rule syntax (star syntax_rule))) + (terminal special_sequence_symbol (seq "?")) + (terminal defining_symbol (alt "=" ":")) + (terminal definition_separator_symbol (alt "|" "/" "!")) + (terminal terminator_symbol (alt ";" ".")) + (terminal start_option_symbol (seq "[")) + (terminal end_option_symbol (seq "]")) + (terminal start_repeat_symbol (alt "{" "(:")) + (terminal end_repeat_symbol (alt "}" ":)")) + (terminal gap_free_symbol (alt _gap_free_symbol_1 terminal_string)) + (rule _gap_free_symbol_1 (seq _gap_free_symbol_3 terminal_character)) + (rule _gap_free_symbol_3 (not _gap_free_symbol_2)) + (terminal _gap_free_symbol_2 (range "'\""))) diff --git a/examples/isoebnf/iso-ebnf.sxp b/examples/isoebnf/iso-ebnf.sxp index d4abf3a..7f6fe5c 100644 --- a/examples/isoebnf/iso-ebnf.sxp +++ b/examples/isoebnf/iso-ebnf.sxp @@ -1,5 +1,5 @@ ( - (terminal special_sequence_symbol (seq "?")) + (rule syntax (star syntax_rule)) (rule syntax_rule (seq meta_identifier defining_symbol definitions_list terminator_symbol)) (rule definitions_list @@ -16,6 +16,7 @@ (rule repeated_sequence (seq start_repeat_symbol definitions_list end_repeat_symbol)) (rule grouped_sequence (seq "(" definitions_list ")")) + (terminals _terminals (seq)) (terminal terminal_string (alt (seq "'" (plus first_terminal_character) "'") @@ -24,7 +25,9 @@ (terminal integer (plus decimal_digit)) (terminal special_sequence (seq "?" (star special_sequence_character) "?")) (terminal comment (seq start_comment_symbol (star comment_symbol) end_comment_symbol)) - (terminal comment_symbol (alt comment terminal_string special_sequence character)) + (terminal comment_symbol (alt comment commentless_symbol other_character)) + (terminal commentless_symbol + (alt terminal_character meta_identifier integer terminal_string special_sequence)) (terminal letter (range "a-zA-Z")) (terminal decimal_digit (range "0-9")) (terminal meta_identifier_character (alt letter decimal_digit "_")) @@ -41,22 +44,22 @@ (terminal other_character (alt (range ":+_%@&$<>^` ̃#x20#x23") "\\")) (terminal gap_separator (range "#x9#xa#xb#xc#xd#x20")) (pass _pass (alt (plus gap_separator) comment)) - (terminal empty (seq ())) - (terminal defining_symbol (alt "=" ":")) - (terminal definition_separator_symbol (alt "|" "/" "!")) - (terminal terminator_symbol (alt ";" ".")) - (terminal start_option_symbol (alt "[" "(/")) - (terminal end_option_symbol (alt "]" "/)")) - (terminal start_repeat_symbol (alt "{" "(:")) - (terminal end_repeat_symbol (alt "}" ":)")) - (terminal gap_free_symbol (alt (diff terminal_character (range "'\"")) terminal_string)) + (terminal empty (seq "")) + (terminal concatenate_symbol (seq ",")) (terminal repetition_symbol (seq "*")) (terminal except_symbol (seq "-")) - (terminal concatenate_symbol (seq ",")) (terminal first_quote_symbol (seq "'")) (terminal second_quote_symbol (seq "\"")) (terminal start_comment_symbol (seq "(*")) (terminal end_comment_symbol (seq "*)")) (terminal start_group_symbol (seq "(")) (terminal end_group_symbol (seq ")")) - (rule syntax (star syntax_rule))) + (terminal special_sequence_symbol (seq "?")) + (terminal defining_symbol (alt "=" ":")) + (terminal definition_separator_symbol (alt "|" "/" "!")) + (terminal terminator_symbol (alt ";" ".")) + (terminal start_option_symbol (seq "[")) + (terminal end_option_symbol (seq "]")) + (terminal start_repeat_symbol (alt "{" "(:")) + (terminal end_repeat_symbol (alt "}" ":)")) + (terminal gap_free_symbol (alt (diff terminal_character (range "'\"")) terminal_string))) diff --git a/examples/isoebnf/meta.rb b/examples/isoebnf/meta.rb index 3c943c6..85b067a 100644 --- a/examples/isoebnf/meta.rb +++ b/examples/isoebnf/meta.rb @@ -1,5 +1,5 @@ # This file is automatically generated by ebnf version 2.0.0 -# Derived from iso-ebnf.ebnf +# Derived from ../../etc/iso-ebnf.ebnf module ISOEBNFMeta RULES = [ EBNF::Rule.new(:syntax, nil, [:star, :syntax_rule]).extend(EBNF::PEG::Rule), @@ -21,6 +21,7 @@ module ISOEBNFMeta EBNF::Rule.new(:optional_sequence, nil, [:seq, :start_option_symbol, :definitions_list, :end_option_symbol]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:repeated_sequence, nil, [:seq, :start_repeat_symbol, :definitions_list, :end_repeat_symbol]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:grouped_sequence, nil, [:seq, "(", :definitions_list, ")"]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule), EBNF::Rule.new(:terminal_string, nil, [:alt, :_terminal_string_1, :_terminal_string_2], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_terminal_string_1, nil, [:seq, "'", :_terminal_string_3, "'"]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_terminal_string_3, nil, [:plus, :first_terminal_character]).extend(EBNF::PEG::Rule), @@ -33,7 +34,8 @@ module ISOEBNFMeta EBNF::Rule.new(:_special_sequence_1, nil, [:star, :special_sequence_character]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:comment, nil, [:seq, :start_comment_symbol, :_comment_1, :end_comment_symbol], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_comment_1, nil, [:star, :comment_symbol]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:comment_symbol, nil, [:alt, :comment, :terminal_string, :special_sequence, :character], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:comment_symbol, nil, [:alt, :comment, :commentless_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:commentless_symbol, nil, [:alt, :terminal_character, :meta_identifier, :integer, :terminal_string, :special_sequence], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:letter, nil, [:range, "a-zA-Z"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:decimal_digit, nil, [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:meta_identifier_character, nil, [:alt, :letter, :decimal_digit, "_"], kind: :terminal).extend(EBNF::PEG::Rule), @@ -46,21 +48,10 @@ module ISOEBNFMeta EBNF::Rule.new(:gap_separator, nil, [:range, "#x9#xa#xb#xc#xd#x20"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_pass, nil, [:alt, :__pass_1, :comment], kind: :pass).extend(EBNF::PEG::Rule), EBNF::Rule.new(:__pass_1, nil, [:plus, :gap_separator]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:empty, nil, [:seq, []], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:defining_symbol, nil, [:alt, "=", ":"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:definition_separator_symbol, nil, [:alt, "|", "/", "!"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:terminator_symbol, nil, [:alt, ";", "."], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:start_option_symbol, nil, [:alt, "[", "(/"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:end_option_symbol, nil, [:alt, "]", "/)"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:start_repeat_symbol, nil, [:alt, "{", "(:"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:end_repeat_symbol, nil, [:alt, "}", ":)"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:gap_free_symbol, nil, [:alt, :_gap_free_symbol_1, :terminal_string], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_gap_free_symbol_1, nil, [:seq, :_gap_free_symbol_3, :terminal_character]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_gap_free_symbol_3, nil, [:not, :_gap_free_symbol_2]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_gap_free_symbol_2, nil, [:range, "'\""], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:empty, nil, [:seq, ""], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:concatenate_symbol, nil, [:seq, ","], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:repetition_symbol, nil, [:seq, "*"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:except_symbol, nil, [:seq, "-"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:concatenate_symbol, nil, [:seq, ","], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:first_quote_symbol, nil, [:seq, "'"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:second_quote_symbol, nil, [:seq, "\""], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:start_comment_symbol, nil, [:seq, "(*"], kind: :terminal).extend(EBNF::PEG::Rule), @@ -68,6 +59,17 @@ module ISOEBNFMeta EBNF::Rule.new(:start_group_symbol, nil, [:seq, "("], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:end_group_symbol, nil, [:seq, ")"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:special_sequence_symbol, nil, [:seq, "?"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:defining_symbol, nil, [:alt, "=", ":"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:definition_separator_symbol, nil, [:alt, "|", "/", "!"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:terminator_symbol, nil, [:alt, ";", "."], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:start_option_symbol, nil, [:seq, "["], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:end_option_symbol, nil, [:seq, "]"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:start_repeat_symbol, nil, [:alt, "{", "(:"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:end_repeat_symbol, nil, [:alt, "}", ":)"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:gap_free_symbol, nil, [:alt, :_gap_free_symbol_1, :terminal_string], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_gap_free_symbol_1, nil, [:seq, :_gap_free_symbol_3, :terminal_character]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_gap_free_symbol_3, nil, [:not, :_gap_free_symbol_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_gap_free_symbol_2, nil, [:range, "'\""], kind: :terminal).extend(EBNF::PEG::Rule), ] end diff --git a/examples/isoebnf/parser.rb b/examples/isoebnf/parser.rb index ea713ca..3123876 100644 --- a/examples/isoebnf/parser.rb +++ b/examples/isoebnf/parser.rb @@ -1,4 +1,4 @@ -# # EBNF Parser for EBNF. +# # EBNF Parser for EISO BNF. # # Produces an Abstract Synatx Tree in S-Expression form for the input grammar file require 'ebnf' @@ -78,7 +78,7 @@ class ISOEBNFPegParser terminal(:empty, //) # `[26] definition_separator_symbol ::= '|' | '/' | '!'` - terminal(:definition_separator_symbol, /[\|!]|(?:\/(?<=\)))/) + terminal(:definition_separator_symbol, /[\|\/!]/) # `[27] terminator_symbol ::= ';' | '.'` terminal(:terminator_symbol, /[;\.]/) @@ -146,8 +146,8 @@ class ISOEBNFPegParser # `[5] term ::= factor ('-' exception)?` start_production(:term, as_hash: true) production(:term) do |value| - if value[:_diff_1] - [:diff, value[:postfix], value[:_term_1]] + if value[:_term_1] + [:diff, value[:factor], value[:_term_1]] else value[:factor] end @@ -211,9 +211,11 @@ def initialize(input, **options, &block) parsing_terminals = false @ast = [] - parse(@input, :syntax, ISOEBNFMeta::RULES, - whitespace: %r{([\x09-\x0d\x20]|(?:\(\*(?:(?:\*[^\)])|[^*])*\*\)))+}, - **options + parse(@input, + :syntax, + ISOEBNFMeta::RULES, + whitespace: %r{([\x09-\x0d\x20]|(?:\(\*(?:(?:\*[^\)])|[^*])*\*\)))+}, + **options ) do |context, *data| rule = case context when :rule diff --git a/lib/ebnf/isoebnf.rb b/lib/ebnf/isoebnf.rb index 92987dd..8ad644a 100644 --- a/lib/ebnf/isoebnf.rb +++ b/lib/ebnf/isoebnf.rb @@ -1,8 +1,8 @@ require_relative 'isoebnf/meta' require 'logger' -# ABNF parser -# Parses ABNF into an array of {EBNF::Rule}. +# ISO EBNF parser +# Parses ISO EBNF into an array of {EBNF::Rule}. module EBNF class ISOEBNF include EBNF::PEG::Parser @@ -142,8 +142,8 @@ class ISOEBNF # `[5] term ::= factor ('-' exception)?` start_production(:term, as_hash: true) production(:term) do |value| - if value[:_diff_1] - [:diff, value[:postfix], value[:_term_1]] + if value[:_term_1] + [:diff, value[:factor], value[:_term_1]] else value[:factor] end diff --git a/lib/ebnf/isoebnf/meta.rb b/lib/ebnf/isoebnf/meta.rb index 6a88b03..753398f 100644 --- a/lib/ebnf/isoebnf/meta.rb +++ b/lib/ebnf/isoebnf/meta.rb @@ -34,7 +34,8 @@ module ISOEBNFMeta EBNF::Rule.new(:_special_sequence_1, nil, [:star, :special_sequence_character]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:comment, nil, [:seq, :start_comment_symbol, :_comment_1, :end_comment_symbol], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:_comment_1, nil, [:star, :comment_symbol]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:comment_symbol, nil, [:alt, :comment, :terminal_string, :special_sequence, :character], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:comment_symbol, nil, [:alt, :comment, :commentless_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:commentless_symbol, nil, [:alt, :terminal_character, :meta_identifier, :integer, :terminal_string, :special_sequence], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:letter, nil, [:range, "a-zA-Z"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:decimal_digit, nil, [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:meta_identifier_character, nil, [:alt, :letter, :decimal_digit, "_"], kind: :terminal).extend(EBNF::PEG::Rule), @@ -48,20 +49,9 @@ module ISOEBNFMeta EBNF::Rule.new(:_pass, nil, [:alt, :__pass_1, :comment], kind: :pass).extend(EBNF::PEG::Rule), EBNF::Rule.new(:__pass_1, nil, [:plus, :gap_separator]).extend(EBNF::PEG::Rule), EBNF::Rule.new(:empty, nil, [:seq, ""], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:defining_symbol, nil, [:alt, "=", ":"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:definition_separator_symbol, nil, [:alt, "|", "/", "!"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:terminator_symbol, nil, [:alt, ";", "."], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:start_option_symbol, nil, [:alt, "[", "(/"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:end_option_symbol, nil, [:alt, "]", "/)"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:start_repeat_symbol, nil, [:alt, "{", "(:"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:end_repeat_symbol, nil, [:alt, "}", ":)"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:gap_free_symbol, nil, [:alt, :_gap_free_symbol_1, :terminal_string], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_gap_free_symbol_1, nil, [:seq, :_gap_free_symbol_3, :terminal_character]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_gap_free_symbol_3, nil, [:not, :_gap_free_symbol_2]).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:_gap_free_symbol_2, nil, [:range, "'\""], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:concatenate_symbol, nil, [:seq, ","], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:repetition_symbol, nil, [:seq, "*"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:except_symbol, nil, [:seq, "-"], kind: :terminal).extend(EBNF::PEG::Rule), - EBNF::Rule.new(:concatenate_symbol, nil, [:seq, ","], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:first_quote_symbol, nil, [:seq, "'"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:second_quote_symbol, nil, [:seq, "\""], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:start_comment_symbol, nil, [:seq, "(*"], kind: :terminal).extend(EBNF::PEG::Rule), @@ -69,6 +59,17 @@ module ISOEBNFMeta EBNF::Rule.new(:start_group_symbol, nil, [:seq, "("], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:end_group_symbol, nil, [:seq, ")"], kind: :terminal).extend(EBNF::PEG::Rule), EBNF::Rule.new(:special_sequence_symbol, nil, [:seq, "?"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:defining_symbol, nil, [:alt, "=", ":"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:definition_separator_symbol, nil, [:alt, "|", "/", "!"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:terminator_symbol, nil, [:alt, ";", "."], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:start_option_symbol, nil, [:seq, "["], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:end_option_symbol, nil, [:seq, "]"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:start_repeat_symbol, nil, [:alt, "{", "(:"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:end_repeat_symbol, nil, [:alt, "}", ":)"], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:gap_free_symbol, nil, [:alt, :_gap_free_symbol_1, :terminal_string], kind: :terminal).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_gap_free_symbol_1, nil, [:seq, :_gap_free_symbol_3, :terminal_character]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_gap_free_symbol_3, nil, [:not, :_gap_free_symbol_2]).extend(EBNF::PEG::Rule), + EBNF::Rule.new(:_gap_free_symbol_2, nil, [:range, "'\""], kind: :terminal).extend(EBNF::PEG::Rule), ] end diff --git a/spec/isoebnf_spec.rb b/spec/isoebnf_spec.rb index 7f85f60..ddb7fd1 100644 --- a/spec/isoebnf_spec.rb +++ b/spec/isoebnf_spec.rb @@ -120,7 +120,7 @@ (star (seq assignment ";" white_space)) "END." )) (rule identifier (seq alphabetic_character (star (alt alphabetic_character digit)))) (rule number (seq (opt "-") digit (star digit))) - (rule string (seq "\"" (star all_characters) "\"")) + (rule string (seq "\"" (star (diff all_characters "\"")) "\"")) (rule assignment (seq identifier ":=" (seq (alt number identifier string)))) (rule alphabetic_character (alt "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" @@ -164,7 +164,11 @@ "something": [ %{something = foo, ( bar );}, %{((rule something (seq foo (seq bar))))} - ] + ], + "diff": [ + %{first_terminal_character = terminal_character - "'" ;}, + %{((rule first_terminal_character (diff terminal_character "'")))}, + ], }.each do |title, (input, expect)| it title do input << "\n" unless input.end_with?("\n") From 0c908ffe09ed15876c26ac0d7a9ef83afb823c34 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Sun, 12 Jul 2020 14:43:03 -0700 Subject: [PATCH 46/50] Test writer generates EBNF which parses to expected SXP. --- Rakefile | 14 +++++++------- etc/iso-ebnf.sxp | 2 +- etc/sparql.sxp | 9 +++++---- etc/turtle.sxp | 11 ++++++----- lib/ebnf/abnf/core.rb | 2 +- spec/writer_spec.rb | 3 +++ 6 files changed, 23 insertions(+), 18 deletions(-) diff --git a/Rakefile b/Rakefile index e06ed3a..63b905b 100755 --- a/Rakefile +++ b/Rakefile @@ -64,7 +64,7 @@ file "lib/ebnf/abnf/meta.rb" => "etc/abnf.ebnf" do end file "lib/ebnf/abnf/core.rb" => "etc/abnf-core.ebnf" do - %x(bin/ebnf --peg -f rb --mod-name ABNFCore -o lib/ebnf/abnf/core.rb etc/abnf-core.ebnf) + %x(bin/ebnf -f rb --mod-name ABNFCore -o lib/ebnf/abnf/core.rb etc/abnf-core.ebnf) end file "lib/ebnf/ebnf/meta.rb" => "etc/ebnf.peg.rb" do @@ -80,7 +80,7 @@ end rule ".sxp" => %w{.ebnf} do |t| puts "build #{t.name}" File.open(t.name, "w") do |f| - IO.popen(%(bin/ebnf --input-format native #{t.source})).each_line do |line| + IO.popen(%(bin/ebnf #{t.source})).each_line do |line| f.puts ' ' + line end end @@ -89,7 +89,7 @@ end rule ".peg.sxp" => %w{.ebnf} do |t| puts "build #{t.name}" File.open(t.name, "w") do |f| - IO.popen(%(bin/ebnf --input-format native --peg #{t.source})).each_line do |line| + IO.popen(%(bin/ebnf --peg #{t.source})).each_line do |line| f.puts ' ' + line end end @@ -97,13 +97,13 @@ end rule ".html" => %w{.ebnf} do |t| puts "build #{t.name}" - %x(bin/ebnf --input-format native --format html -o #{t.name} #{t.source}) + %x(bin/ebnf --format html -o #{t.name} #{t.source}) end file "etc/ebnf.ll1.sxp" => "etc/ebnf.ebnf" do |t| puts "build #{t.name}" File.open(t.name, "w") do |f| - IO.popen(%(bin/ebnf --input-format native --ll1 ebnf #{t.source})).each_line do |line| + IO.popen(%(bin/ebnf --ll1 ebnf #{t.source})).each_line do |line| f.puts ' ' + line end end @@ -111,10 +111,10 @@ end file "etc/ebnf.peg.rb" => "etc/ebnf.ebnf" do |t| puts "build #{t.name}" - %x(bin/ebnf --input-format native --peg --mod-name EBNFMeta --input-format native -f rb -o etc/ebnf.peg.rb etc/ebnf.ebnf) + %x(bin/ebnf --peg --mod-name EBNFMeta -f rb -o etc/ebnf.peg.rb etc/ebnf.ebnf) end file "etc/ebnf.ll1.rb" => "etc/ebnf.ebnf" do |t| puts "build #{t.name}" - %x(bin/ebnf --input-format native --ll1 ebnf -f rb -o etc/ebnf.ll1.rb etc/ebnf.ebnf) + %x(bin/ebnf --ll1 ebnf -f rb -o etc/ebnf.ll1.rb etc/ebnf.ebnf) end diff --git a/etc/iso-ebnf.sxp b/etc/iso-ebnf.sxp index 84292e0..099e7c4 100644 --- a/etc/iso-ebnf.sxp +++ b/etc/iso-ebnf.sxp @@ -44,7 +44,7 @@ (terminal other_character (alt (range ":+_%@&$<>^` ̃#x20#x23") "\\")) (terminal gap_separator (range "#x9#xa#xb#xc#xd#x20")) (pass _pass (alt (plus gap_separator) comment)) - (terminal empty (seq ())) + (terminal empty (seq "")) (terminal concatenate_symbol (seq ",")) (terminal repetition_symbol (seq "*")) (terminal except_symbol (seq "-")) diff --git a/etc/sparql.sxp b/etc/sparql.sxp index 6621476..9fffee2 100644 --- a/etc/sparql.sxp +++ b/etc/sparql.sxp @@ -283,7 +283,8 @@ (rule PrefixedName "137" (alt PNAME_LN PNAME_NS)) (rule BlankNode "138" (alt BLANK_NODE_LABEL ANON)) (terminals _terminals (seq)) - (terminal IRIREF "139" (seq "<" (star (range "^<>\"{}|^`]-[#x00-#x20")) ">")) + (terminal IRIREF "139" + (seq "<" (star (diff (range "^<>\"{}|^`\\") (range "#x00-#x20"))) ">")) (terminal PNAME_NS "140" (seq (opt PN_PREFIX) ":")) (terminal PNAME_LN "141" (seq PNAME_NS PN_LOCAL)) (terminal BLANK_NODE_LABEL "142" @@ -311,10 +312,10 @@ (terminal STRING_LITERAL2 "157" (seq "\"" (star (alt (range "^#x22#x5C#xA#xD") ECHAR)) "\"")) (terminal STRING_LITERAL_LONG1 "158" - (seq "'''" (seq (opt (alt "'" "''")) (range "^'] | ECHAR ))* \"'''\"")))) + (seq "'''" (star (seq (opt (alt "'" "''")) (alt (range "^'\\") ECHAR))) "'''")) (terminal STRING_LITERAL_LONG2 "159" - (seq "\"\"\"" (seq (opt (alt "\"" "\"\"")) (range "^\"] | ECHAR ))* '\"\"\"'")))) - (terminal ECHAR "160" (seq "\\" (range "tbnrf\"'"))) + (seq "\"\"\"" (star (seq (opt (alt "\"" "\"\"")) (alt (range "^\"\\") ECHAR))) "\"\"\"")) + (terminal ECHAR "160" (seq "\\" (range "tbnrf\\\"'"))) (terminal NIL "161" (seq "(" (star WS) ")")) (terminal WS "162" (alt (hex "#x20") (hex "#x9") (hex "#xD") (hex "#xA"))) (terminal ANON "163" (seq "[" (star WS) "]")) diff --git a/etc/turtle.sxp b/etc/turtle.sxp index aea60b5..720c758 100644 --- a/etc/turtle.sxp +++ b/etc/turtle.sxp @@ -30,7 +30,8 @@ (rule PrefixedName "136s" (alt PNAME_LN PNAME_NS)) (rule BlankNode "137s" (alt BLANK_NODE_LABEL ANON)) (terminals _terminals (seq)) - (terminal IRIREF "18" (seq "<" (star (alt (range "^<>\"{}|^`]-[#x00-#x20") UCHAR)) ">")) + (terminal IRIREF "18" + (seq "<" (star (alt (diff (range "^<>\"{}|^`\\") (range "#x00-#x20")) UCHAR)) ">")) (terminal PNAME_NS "139s" (seq (opt PN_PREFIX) ":")) (terminal PNAME_LN "140s" (seq PNAME_NS PN_LOCAL)) (terminal BLANK_NODE_LABEL "141s" @@ -53,12 +54,12 @@ (terminal STRING_LITERAL_SINGLE_QUOTE "23" (seq "'" (star (alt (range "^#x27#x5C#xA#xD") ECHAR UCHAR)) "'")) (terminal STRING_LITERAL_LONG_SINGLE_QUOTE "24" - (seq "'''" (seq (opt (alt "'" "''")) (range "^'] | ECHAR | UCHAR ))* \"'''\"")))) + (seq "'''" (star (seq (opt (alt "'" "''")) (alt (range "^'\\") ECHAR UCHAR))) "'''")) (terminal STRING_LITERAL_LONG_QUOTE "25" - (seq "\"\"\"" (seq (opt (alt "\"" "\"\"")) (range "^\"] | ECHAR | UCHAR ))* '\"\"\"'")))) + (seq "\"\"\"" (star (seq (opt (alt "\"" "\"\"")) (alt (range "^\"\\") ECHAR UCHAR))) "\"\"\"")) (terminal UCHAR "26" - (alt (seq "u" HEX HEX HEX HEX) (seq "U" HEX HEX HEX HEX HEX HEX HEX HEX))) - (terminal ECHAR "159s" (seq "\\" (range "tbnrf\"'"))) + (alt (seq "\\u" HEX HEX HEX HEX) (seq "\\U" HEX HEX HEX HEX HEX HEX HEX HEX))) + (terminal ECHAR "159s" (seq "\\" (range "tbnrf\\\"'"))) (terminal SPARQL_PREFIX "28t" (seq (range "Pp") (range "Rr") (range "Ee") (range "Ff") (range "Ii") (range "Xx"))) (terminal SPARQL_BASE "29t" (seq (range "Bb") (range "Aa") (range "Ss") (range "Ee"))) diff --git a/lib/ebnf/abnf/core.rb b/lib/ebnf/abnf/core.rb index d4e73d0..935343e 100644 --- a/lib/ebnf/abnf/core.rb +++ b/lib/ebnf/abnf/core.rb @@ -1,5 +1,5 @@ # This file is automatically generated by ebnf version 2.0.0 -# Derived from abnf-core.ebnf +# Derived from etc/abnf-core.ebnf module ABNFCore RULES = [ EBNF::Rule.new(:ALPHA, nil, [:range, "#x41-#x5A#x61-#x7A"], kind: :terminal), diff --git a/spec/writer_spec.rb b/spec/writer_spec.rb index ed7ddf0..1ed2f9a 100644 --- a/spec/writer_spec.rb +++ b/spec/writer_spec.rb @@ -238,6 +238,9 @@ it "outputs grammar as text" do expect {EBNF.parse(File.read(file)).to_s}.to_not raise_error end + it "parses to equivalent rules" do + expect(EBNF.parse(File.read(file)).to_sxp).to produce(File.read(file.sub('.ebnf', '.sxp'))) + end it "outputs grammar as html" do expect {EBNF.parse(File.read(file)).to_html}.to_not raise_error end From efeb62c458f66c39a350cef957dbab1393147ecc Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Sun, 12 Jul 2020 16:45:23 -0700 Subject: [PATCH 47/50] Better test ABNF wrier. --- lib/ebnf/writer.rb | 113 ++++++++++++++++--------- spec/writer_spec.rb | 201 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 258 insertions(+), 56 deletions(-) diff --git a/lib/ebnf/writer.rb b/lib/ebnf/writer.rb index 364230b..f597896 100644 --- a/lib/ebnf/writer.rb +++ b/lib/ebnf/writer.rb @@ -218,13 +218,9 @@ def initialize(rules, out: $stdout, html: false, format: :ebnf, **options) def format_ebnf(expr, sep: nil, embedded: false) return (@options[:html] ? %(#{expr}) : expr.to_s) if expr.is_a?(Symbol) if expr.is_a?(String) - if expr.length == 1 - return format_ebnf_char(expr) - elsif expr =~ /\A#x\h+/ - return escape_ebnf_hex(expr[2..-1].hex.chr) - else - return format_ebnf_string(expr, expr.include?('"') ? "'" : '"') - end + return expr.length == 1 ? + format_ebnf_char(expr) : + format_ebnf_string(expr, expr.include?('"') ? "'" : '"') end parts = { alt: (@options[:html] ? "| " : "| "), @@ -306,7 +302,6 @@ def format_ebnf_char(c) def format_ebnf_range(string) lbrac = (@options[:html] ? "[ " : "[") rbrac = (@options[:html] ? "] " : "]") - dash = (@options[:html] ? "- " : "-") buffer = lbrac s = StringScanner.new(string) @@ -316,8 +311,6 @@ def format_ebnf_range(string) buffer << (@options[:html] ? %(#{s.matched}) : s.matched) when s.scan(/\A#x\h+/) buffer << escape_ebnf_hex(s.matched[2..-1].hex.chr(Encoding::UTF_8)) - when s.scan(/\A-/) - buffer << dash else buffer << escape_ebnf_hex(s.getch) end @@ -349,7 +342,7 @@ def escape_ebnf_hex(u) if @options[:html] if u.ord <= 0x20 char = %(#{char}) - elsif u.ord <= 0x7F + elsif u.ord < 0x7F char = %(#{char}) elsif u.ord == 0x7F char = %(#{char}) @@ -374,6 +367,9 @@ def format_abnf(expr, sep: nil, embedded: false, sensitive: true) if expr.is_a?(String) if expr.length == 1 return format_abnf_char(expr) + elsif expr.start_with?('%') + # Already encoded + return expr elsif expr =~ /"/ # Split into segments segments = expr.split('"') @@ -383,9 +379,6 @@ def format_abnf(expr, sep: nil, embedded: false, sensitive: true) seq = segments.inject([]) {|memo, s| memo.concat([[:hex, "#x22"], s])}[1..-1] seq.unshift(:seq) return format_abnf(seq, sep: nil, embedded: false) - elsif expr.match?(/[\x00-\x1F\u{7F}-\u{10FFFF}]/) - # Express using %d notation - return format_abnf_range(expr) else return (@options[:html] ? %("#{'%s' if sensitive}#{expr}") : %(#{'%s' if sensitive}"#{expr}")) end @@ -410,7 +403,7 @@ def format_abnf(expr, sep: nil, embedded: false, sensitive: true) res = expr[1..-1].map {|e| format_abnf(e, embedded: true)}.join(this_sep) embedded ? (lparen + res + rparen) : res when :diff - raise "ABNF does not support the diff operator" + raise RangeError, "ABNF does not support the diff operator" when :opt char = parts[expr.first.to_sym] r = format_abnf(expr[1], embedded: true) @@ -422,7 +415,12 @@ def format_abnf(expr, sep: nil, embedded: false, sensitive: true) when :hex escape_abnf_hex(expr.last[2..-1].hex.chr) when :range - format_abnf_range(expr.last) + # Returns an [:alt] or [:not [:alt]] if composed of multiple sequences + # Note: ABNF does not support the `not` operator + res = format_abnf_range(expr.last) + res.is_a?(Array) ? + format_abnf(res, embedded: true) : + res when :seq this_sep = (sep ? sep : " ") res = expr[1..-1].map do |e| @@ -457,38 +455,75 @@ def format_abnf_char(c) end # Format a range - # FIXME: O_RANGE + # + # Presumes range has already been validated def format_abnf_range(string) - if string.include?('-') && !string.end_with?('-') - # Might include multiple ranges - # #x01-#x03#x05-#x06 - # a-bc-d - dash = (@options[:html] ? "- " : "-") - # Split into separate range segments - if string.start_with?('#x') - ranges = [] - scanner = StringScanner.new(string) - while !scanner.eos? - ranges << scanner.scan(/#x\h+-#x\h+/) + alt, o_range, o_dash = [:alt], false, false + + raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^') + + if string.end_with?('-') + o_dash = true + string = string[0..-2] + end + + scanner = StringScanner.new(string) + hexes, deces = [], [] + in_range = false + # Build op (alt) from different ranges/enums + while !scanner.eos? + if hex = scanner.scan(Terminals::HEX) + # Append any decimal values + alt << "%d" + deces.join(".") unless deces.empty? + deces = [] + + if in_range + # Add "." sequences for any previous hexes + alt << "%x" + hexes[0..-2].join(".") if hexes.length > 1 + alt << "%x#{hexes.last}-#{hex[2..-1]}" + in_range, hexes = false, [] + else + hexes << hex[2..-1] + end + elsif dec = scanner.scan(Terminals::R_CHAR) + # Append any hexadecimal values + alt << "%x" + hexes.join(".") unless hexes.empty? + hexes = [] + + if in_range + # Add "." sequences for any previous hexes + alt << "%d" + deces[0..-2].join(".") if deces.length > 1 + alt << "%d#{deces.last}-#{dec.codepoints.first}" + in_range, deces = false, [] + else + deces << dec.codepoints.first.to_s end - ranges.map {|range|"%x" + range.gsub('#x', '').sub('-', dash)}.join(" / ") - else - '%d' + string.gsub(/[^-]/) {|c| c.ord} end + + in_range = true if scanner.scan(/\-/) + end + + deces << '45' if o_dash + + # Append hexes and deces as "." sequences (should be only one) + alt << "%d" + deces.join(".") unless deces.empty? + alt << "%x" + hexes.join(".") unless hexes.empty? + + # FIXME: HTML abbreviations? + if alt.length == 2 && !o_range + # Just return the range or enum + alt.last else - if string.start_with?('#x') - "%x" + string.split('#x').join('.') - else - "%d" + string.chars.map(&:ord).join(".") - end + # Return the alt, which will be further formatted + o_range ? [:not, alt] : alt end end def escape_abnf_hex(u) fmt = case u.ord - when 0x0000..0x00ff then "#x%02X" - when 0x0100..0xffff then "#x%04X" - else "#x%08X" + when 0x0000..0x00ff then "%02X" + when 0x0100..0xffff then "%04X" + else "%08X" end char = "%x" + (fmt % u.ord) if @options[:html] diff --git a/spec/writer_spec.rb b/spec/writer_spec.rb index 1ed2f9a..14d5e60 100644 --- a/spec/writer_spec.rb +++ b/spec/writer_spec.rb @@ -39,23 +39,6 @@ end end - describe "#initialize" do - { - prolog: [ - %{[2] Prolog ::= BaseDecl? PrefixDecl*}, - %{[2] Prolog ::= BaseDecl? PrefixDecl*\n} - ], - }.each do |title, (grammar, plain)| - context title do - subject {EBNF::Base.new(grammar).ast} - - it "generates plain" do - expect {EBNF::Writer.new(subject)}.to write(plain).to(:output) - end - end - end - end - describe ".string" do { prolog: [ @@ -115,6 +98,23 @@ end context "EBNF" do + describe "#initialize" do + { + prolog: [ + %{[2] Prolog ::= BaseDecl? PrefixDecl*}, + %{[2] Prolog ::= BaseDecl? PrefixDecl*\n} + ], + }.each do |title, (grammar, plain)| + context title do + subject {EBNF::Base.new(grammar).ast} + + it "generates plain" do + expect {EBNF::Writer.new(subject)}.to write(plain).to(:output) + end + end + end + end + describe "#format_ebnf" do subject {EBNF::Writer.new([])} @@ -172,6 +172,10 @@ [:rept, 1, 3, :A], "A (A A?)?" ], + "rept 2 *": [ + [:rept, 2, "*", :A], + "A A A*" + ], "rept 1 3 (A B)": [ [:rept, 1, 3, [:seq, :A, :B]], "(A B) ((A B) (A B)?)?" @@ -204,6 +208,10 @@ [:seq, '\''], %{"'"} ], + "string \"\€\"": [ + [:seq, '€'], + %{"€"} + ], "n3 path": [ [:seq, :pathItem, [:alt, [:seq, "!", :path], [:seq, "^", :path]]], %{pathItem (("!" path) | ("^" path))} @@ -248,4 +256,163 @@ end end end + + context "ABNF" do + describe "#initialize" do + { + prolog: [ + %{rulelist = 1*( rule / (*c-wsp c-nl) )\n}, + %{rulelist = 1*(rule / (*c-wsp c-nl))\n} + ], + }.each do |title, (grammar, plain)| + context title do + subject {EBNF::Base.new(grammar, format: :abnf).ast} + + it "generates plain" do + expect {EBNF::Writer.new(subject, format: :abnf)}.to write(plain).to(:output) + end + end + end + end + + describe "#format_abnf" do + subject {EBNF::Writer.new([])} + + context "legal expressions" do + { + "alt": [ + [:alt, :A, :B], + "A / B" + ], + "enum": [ + [:range, "abc-"], + "%d97.98.99.45" + ], + "hex": [ + [:hex, "#x20"], + "%x20" + ], + "istr": [ + [:istr, "foo"], + %("foo") + ], + "opt": [ + [:opt, :A], + "[A]" + ], + "plus": [ + [:plus, :A], + "1*A" + ], + "range": [ + [:range, "a-z"], + "%d97-122" + ], + "range 2": [ + [:range, "a-zA-Z"], + %{(%d97-122 / %d65-90)} + ], + "rept 0 1": [ + [:rept, 0, 1, :A], + "0*1A" + ], + "rept 0 *": [ + [:rept, 0, '*', :A], + "*A" + ], + "rept 1 1": [ + [:rept, 1, 1, :A], + "1A" + ], + "rept 1 *": [ + [:rept, 1, '*', :A], + "1*A" + ], + "rept 1 2": [ + [:rept, 1, 2, :A], + "1*2A" + ], + "rept 1 3": [ + [:rept, 1, 3, :A], + "1*3A" + ], + "rept 2 *": [ + [:rept, 2, "*", :A], + "2*A" + ], + "rept 1 3 (A B)": [ + [:rept, 1, 3, [:seq, :A, :B]], + "1*3(A B)" + ], + "rept 1 3 (A | B)": [ + [:rept, 1, 3, [:alt, :A, :B]], + "1*3(A / B)" + ], + "star": [ + [:star, :A], + "*A" + ], + "string '\\r'": [ + [:seq, "\r"], + %{%x0D} + ], + "string ' '": [ + [:seq, " "], + %{" "} + ], + "string 'a'": [ + [:seq, "a"], + %{"a"} + ], + "string '\"'": [ + [:seq, '"'], + %{%x22} + ], + "string \"'\"": [ + [:seq, '\''], + %{"'"} + ], + "string \"\€\"": [ + [:seq, '€'], + %{%x20AC} + ], + "n3 path": [ + [:seq, :pathItem, [:alt, [:seq, "!", :path], [:seq, "^", :path]]], + %{pathItem (("!" path) / ("^" path))} + ], + }.each do |title, (expr, result)| + it title do + expect(subject.send(:format_abnf, expr)).to eql result + end + end + end + + context "illegal expressions" do + { + "[^abc]": [:range, "^abc"], + "A - B": [:diff, :A, :B], + }.each do |title, expr| + it title do + expect {subject.send(:format_abnf, expr)}.to raise_error RangeError + end + end + end + end + + context "Existing grammars" do + { + "ABNF Grammar" => File.expand_path("../../etc/abnf.abnf", __FILE__), + "HTTP Grammar" => File.expand_path("../../examples/abnf/examples/http.abnf", __FILE__) + }.each do |name, file| + context name do + it "outputs grammar as text" do + expect {EBNF.parse(File.read(file), format: :abnf).to_s(format: :abnf)}.to_not raise_error + end + it "outputs grammar as html" do + expect {EBNF.parse(File.read(file), format: :abnf).to_html(format: :abnf)}.to_not raise_error + end + end + end + end + end end From bf519caa135ba32d3585d0d33c564263f4e823a7 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Mon, 13 Jul 2020 10:46:50 -0700 Subject: [PATCH 48/50] Update and test ISO EBNF writer. --- .../{pascall.isoebnf => pascal.isoebnf} | 0 lib/ebnf/writer.rb | 91 ++++------ spec/writer_spec.rb | 169 +++++++++++++++++- 3 files changed, 204 insertions(+), 56 deletions(-) rename examples/isoebnf/examples/{pascall.isoebnf => pascal.isoebnf} (100%) diff --git a/examples/isoebnf/examples/pascall.isoebnf b/examples/isoebnf/examples/pascal.isoebnf similarity index 100% rename from examples/isoebnf/examples/pascall.isoebnf rename to examples/isoebnf/examples/pascal.isoebnf diff --git a/lib/ebnf/writer.rb b/lib/ebnf/writer.rb index f597896..e64cf62 100644 --- a/lib/ebnf/writer.rb +++ b/lib/ebnf/writer.rb @@ -458,7 +458,7 @@ def format_abnf_char(c) # # Presumes range has already been validated def format_abnf_range(string) - alt, o_range, o_dash = [:alt], false, false + alt, o_dash = [:alt], false raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^') @@ -510,12 +510,12 @@ def format_abnf_range(string) alt << "%x" + hexes.join(".") unless hexes.empty? # FIXME: HTML abbreviations? - if alt.length == 2 && !o_range + if alt.length == 2 # Just return the range or enum alt.last else # Return the alt, which will be further formatted - o_range ? [:not, alt] : alt + alt end end @@ -590,7 +590,10 @@ def format_isoebnf(expr, sep: nil, embedded: false) when :hex format_isoebnf(expr[1], embedded: true) when :range - format_isoebnf_range(expr.last) + res = format_isoebnf_range(expr.last) + res.is_a?(Array) ? + format_isoebnf(res, embedded: true) : + res when :seq this_sep = "," + (sep ? sep : " ") res = expr[1..-1].map do |e| @@ -606,8 +609,6 @@ def format_isoebnf(expr, sep: nil, embedded: false) format_isoebnf([:star, value], sep: sep, embedded: embedded) elsif min == 1 && max == '*' format_isoebnf([:plus, value], sep: sep, embedded: embedded) - elsif min > 0 && min == max - "#{min}*" + format_isoebnf(value, sep: sep, embedded: embedded) else val2 = [:seq] while min > 0 @@ -634,58 +635,44 @@ def format_isoebnf(expr, sep: nil, embedded: false) # Format a range # Range is formatted as a aliteration of characters - # FIXME: O_RANGE def format_isoebnf_range(string) chars = [] + o_dash = false + + raise RangeError, "cannot format #{string.inspect} an ABNF range" if string.start_with?('^') + + if string.end_with?('-') + o_dash = true + string = string[0..-2] + end + scanner = StringScanner.new(string) - if string.include?('-') && !string.end_with?('-') - ranges = [] - # Might include multiple ranges - # #x01-#x03#x05-#x06 - # a-bc-d - # Split into separate range segments - if string.start_with?('#x') - while !scanner.eos? - ranges << scanner.scan(/#x\h+-#x\h+/) - end - ranges.each do |range| - first, last = range.split('-').map {|h| h[2..-1].hex.ord} - while first <= last - c = first.chr(Encoding::UTF_8) - raise RangeError, "cannot format #{string.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless - ISOEBNF::TERMINAL_CHARACTER.match?(c) - chars << c - first += 1 - end - end - else - while !scanner.eos? - r = scanner.scan(/.-./) - ranges << r - end - ranges.each do |range| - first, last = range.split('-').map {|c| c.ord} - while first <= last - c = first.chr(Encoding::UTF_8) - raise RangeError, "cannot format #{string.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless - ISOEBNF::TERMINAL_CHARACTER.match?(c) - chars << c - first += 1 - end - end + in_range = false + # Build chars from different ranges/enums + while !scanner.eos? + char = if hex = scanner.scan(Terminals::HEX) + hex[2..-1].hex.ord.char(Encoding::UTF_8) + else scanner.scan(Terminals::R_CHAR) end - else - while !scanner.eos? - c = if hex = scanner.scan(/#x\h+/) - hex[2..-1].hex.ord.chr(Encoding::UTF_8) - else - scanner.scan(/./) - end + raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration: #{char.inspect} is out of range" unless + char && ISOEBNF::TERMINAL_CHARACTER.match?(char) + + if in_range + # calculate characters from chars.last to this char + raise RangeError, "cannot format #{string.inspect} as an ISO EBNF Aliteration" unless chars.last < char + chars.concat (chars.last..char).to_a[1..-1] + in_range = false + else + chars << char end - raise RangeError, "cannot format #{string.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless - ISOEBNF::TERMINAL_CHARACTER.match?(c) - chars << c + + in_range = true if scanner.scan(/\-/) end + + chars << '-' if o_dash + + # Possibly only a single character (no character?) + chars.length == 1 ? chars.last.inspect : chars.unshift(:alt) end ERB_DESC = %q( diff --git a/spec/writer_spec.rb b/spec/writer_spec.rb index 14d5e60..c14a230 100644 --- a/spec/writer_spec.rb +++ b/spec/writer_spec.rb @@ -124,9 +124,9 @@ [:alt, :A, :B], "A | B" ], - "enum": [ - [:range, "abc-"], - "[abc-]" + "diff": [ + [:diff, :A, :B], + "A - B" ], "hex": [ [:hex, "#x20"], @@ -402,7 +402,10 @@ context "Existing grammars" do { "ABNF Grammar" => File.expand_path("../../etc/abnf.abnf", __FILE__), - "HTTP Grammar" => File.expand_path("../../examples/abnf/examples/http.abnf", __FILE__) + "HTTP Grammar" => File.expand_path("../../examples/abnf/examples/http.abnf", __FILE__), + "JSON Grammar" => File.expand_path("../../examples/abnf/examples/json.abnf", __FILE__), + "Postal Address" => File.expand_path("../../examples/abnf/examples/postal-address.abnf", __FILE__), + "URI Grammar" => File.expand_path("../../examples/abnf/examples/uri.abnf", __FILE__), }.each do |name, file| context name do it "outputs grammar as text" do @@ -415,4 +418,162 @@ end end end + + context "ISOEBNF" do + describe "#initialize" do + { + prolog: [ + %{syntax = syntax_rule, {syntax_rule} ;}, + %{syntax = syntax_rule, {syntax_rule} ;\n} + ], + }.each do |title, (grammar, plain)| + context title do + subject {EBNF::Base.new(grammar, format: :isoebnf).ast} + + it "generates plain" do + expect {EBNF::Writer.new(subject, format: :isoebnf)}.to write(plain).to(:output) + end + end + end + end + + describe "#format_isoebnf" do + subject {EBNF::Writer.new([])} + + context "legal expressions" do + { + "alt": [ + [:alt, :A, :B], + "A | B" + ], + "diff": [ + [:diff, :A, :B], + "A - B" + ], + "enum": [ + [:range, "abc-"], + %{("a" | "b" | "c" | "-")} + ], + "hex": [ + [:hex, "#x20"], + %(" ") + ], + "istr": [ + [:istr, "foo"], + %("foo") + ], + "opt": [ + [:opt, :A], + "[A]" + ], + "plus": [ + [:plus, :A], + "A, {A}" + ], + "range": [ + [:range, "a-z"], + %{("a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z")} + ], + "range 2": [ + [:range, "a-zA-Z"], + %{("a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" | "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z")}], + "rept 0 1": [ + [:rept, 0, 1, :A], + "[A]" + ], + "rept 0 *": [ + [:rept, 0, '*', :A], + "{A}" + ], + "rept 1 1": [ + [:rept, 1, 1, :A], + "A" + ], + "rept 1 *": [ + [:rept, 1, '*', :A], + "A, {A}" + ], + "rept 1 2": [ + [:rept, 1, 2, :A], + "A, [A]" + ], + "rept 1 3": [ + [:rept, 1, 3, :A], + "A, [(A, [A])]" + ], + "rept 2 *": [ + [:rept, 2, "*", :A], + "A, A, {A}" + ], + "rept 1 3 (A B)": [ + [:rept, 1, 3, [:seq, :A, :B]], + "(A, B), [((A, B), [(A, B)])]" + ], + "rept 1 3 (A | B)": [ + [:rept, 1, 3, [:alt, :A, :B]], + "(A | B), [((A | B), [(A | B)])]" + ], + "star": [ + [:star, :A], + "{A}" + ], + "string ' '": [ + [:seq, " "], + %{" "} + ], + "string 'a'": [ + [:seq, "a"], + %{"a"} + ], + "string '\"'": [ + [:seq, '"'], + %{'"'} + ], + "string \"'\"": [ + [:seq, '\''], + %{"'"} + ], + "n3 path": [ + [:seq, :pathItem, [:alt, [:seq, "!", :path], [:seq, "^", :path]]], + %{pathItem, (("!", path) | ("^", path))} + ], + }.each do |title, (expr, result)| + it title do + expect(subject.send(:format_isoebnf, expr)).to eql result + end + end + end + + context "illegal expressions" do + { + "[^abc]": [:range, "^abc"], + "string '\\r'": [:seq, "\r"], + "string \"\€\"": [:seq, '€'], + }.each do |title, expr| + it title do + expect {subject.send(:format_isoebnf, expr)}.to raise_error RangeError + end + end + end + end + + context "Existing grammars" do + { + "ISO EBNF Grammar" => File.expand_path("../../etc/iso-ebnf.isoebnf", __FILE__), + "Simiple EBNF Grammar" => File.expand_path("../../examples/isoebnf/examples/ebnf.isoebnf", __FILE__), + "HTML Grammar" => File.expand_path("../../examples/isoebnf/examples/html.isoebnf", __FILE__), + "Pascal Grammar" => File.expand_path("../../examples/isoebnf/examples/pascal.isoebnf", __FILE__), + "Postal Address" => File.expand_path("../../examples/isoebnf/examples/postal-address.isoebnf", __FILE__), + }.each do |name, file| + context name do + it "outputs grammar as text" do + expect {EBNF.parse(File.read(file), format: :isoebnf).to_s(format: :isoebnf)}.to_not raise_error + end + it "outputs grammar as html" do + expect {EBNF.parse(File.read(file), format: :isoebnf).to_html(format: :isoebnf)}.to_not raise_error + end + end + end + end + end end From 12aa51ae54cfec16cf143e77dace84ca42cb2743 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Mon, 13 Jul 2020 11:09:49 -0700 Subject: [PATCH 49/50] Add grammar rule identifier renumbering. --- bin/ebnf | 8 ++++++-- lib/ebnf/base.rb | 10 +++++++++- spec/base_spec.rb | 14 ++++++++++++++ 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/bin/ebnf b/bin/ebnf index d762f1a..ece8d9a 100755 --- a/bin/ebnf +++ b/bin/ebnf @@ -24,15 +24,16 @@ OPT_ARGS = [ ["--debug", GetoptLong::NO_ARGUMENT, "Turn on debugging output"], ["--bnf", GetoptLong::NO_ARGUMENT, "Transform EBNF to BNF"], ["--evaluate","-e", GetoptLong::REQUIRED_ARGUMENT,"Evaluate argument as an EBNF document"], - ["--ll1", GetoptLong::REQUIRED_ARGUMENT,"Generate First/Follow rules, argument is start symbol"], ["--format", "-f", GetoptLong::REQUIRED_ARGUMENT,"Specify output format one of abnf, abnfh, ebnf, html, isoebnf, isoebnfh, ttl, sxp, or rb"], ["--input-format", GetoptLong::REQUIRED_ARGUMENT,"Specify input format one of abnf, ebnf isoebnf, native, or sxp"], + ["--ll1", GetoptLong::REQUIRED_ARGUMENT,"Generate First/Follow rules, argument is start symbol"], ["--mod-name", GetoptLong::REQUIRED_ARGUMENT,"Module name used when creating ruby tables"], + ["--namespace", "-n", GetoptLong::REQUIRED_ARGUMENT,"Namespace to use when generating Turtle"], ["--output", "-o", GetoptLong::REQUIRED_ARGUMENT,"Output to the specified file path"], ["--peg", GetoptLong::NO_ARGUMENT, "Transform EBNF to PEG"], ["--prefix", "-p", GetoptLong::REQUIRED_ARGUMENT,"Prefix to use when generating Turtle"], ["--progress", "-v", GetoptLong::NO_ARGUMENT, "Detail on execution"], - ["--namespace", "-n", GetoptLong::REQUIRED_ARGUMENT,"Namespace to use when generating Turtle"], + ["--renumber", GetoptLong::NO_ARGUMENT, "Renumber parsed reules"], ["--validate", GetoptLong::NO_ARGUMENT, "Validate grammar"], ["--help", "-?", GetoptLong::NO_ARGUMENT, "This message"] ] @@ -76,6 +77,7 @@ opts.each do |opt, arg| when '--output' then out = File.open(arg, "w") when '--peg' then options[:peg] = true when '--prefix' then options[:prefix] = arg + when '--renumber' then options[:renumber] = true when '--namespace' then options[:namespace] = arg when '--progress' then options[:level] = 1 unless options[:level] == 0 when '--validate' then options[:validate] = true @@ -93,6 +95,8 @@ if options[:ll1] ebnf.build_tables end +ebnf.renumber! if options[:renumber] + res = case options[:output_format] when :abnf then ebnf.to_s(format: :abnf) when :abnfh then ebnf.to_html(format: :abnf) diff --git a/lib/ebnf/base.rb b/lib/ebnf/base.rb index 243e1df..90d8f71 100644 --- a/lib/ebnf/base.rb +++ b/lib/ebnf/base.rb @@ -254,6 +254,14 @@ def to_ruby(output = $stdout, grammarFile: nil, mod_name: 'Meta', **options) end end + ## + # Renumber, rule identifiers + def renumber! + ast.each_with_index do |rule, index| + rule.id = (index + 1).to_s + end + end + ## # Write out syntax tree as Turtle # @param [String] prefix for language @@ -275,7 +283,7 @@ def to_ttl(prefix = nil, ns = "http://example.org/") ].compact end.join("\n") + - ast.sort.map(&:to_ttl).join("\n") + ast.map(&:to_ttl).join("\n") end def dup diff --git a/spec/base_spec.rb b/spec/base_spec.rb index b649b20..7510b48 100644 --- a/spec/base_spec.rb +++ b/spec/base_spec.rb @@ -68,6 +68,20 @@ end end + describe "#renumber!" do + it "creates identifiers for grammars without identifiers" do + gram = EBNF.parse("a ::= b d ::= e") + gram.renumber! + expect(gram.ast.map(&:id)).to eq %w{1 2} + end + + it "renumbers grammars with identifiers" do + gram = EBNF.parse("[10] a ::= b [20] d ::= e") + gram.renumber! + expect(gram.ast.map(&:id)).to eq %w{1 2} + end + end + describe "#validate!" do let(:simple) {EBNF.parse("a ::= b")} it "notes invalid grammar" do From 2bb3f787b731b7d570183d952e6e55302d5263fa Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Mon, 13 Jul 2020 11:17:44 -0700 Subject: [PATCH 50/50] Version 2.1.0. --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 227cea2..7ec1d6d 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.0.0 +2.1.0