From bc3a2a7a96fd4bae5c69943fec3bcb842b8ba222 Mon Sep 17 00:00:00 2001 From: Gregg Kellogg Date: Wed, 23 Nov 2016 12:03:38 -0800 Subject: [PATCH] Properly handle comments within middle or at end of rule --- .gitignore | 1 + bin/ebnf | 6 +++--- lib/ebnf/parser.rb | 20 +++++++++++++++++--- spec/base_spec.rb | 12 ++++++++++++ 4 files changed, 33 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index 67dc548..7a48f73 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,4 @@ tmp .yardoc _yardoc /Gemfile.lock +/.byebug_history diff --git a/bin/ebnf b/bin/ebnf index b59982c..c64786d 100755 --- a/bin/ebnf +++ b/bin/ebnf @@ -17,10 +17,10 @@ options = { :namespace => "http://www.w3.org/ns/formats/Turtle#", } -out = STDOUT +input, out = nil, STDOUT OPT_ARGS = [ - ["--dbg", GetoptLong::NO_ARGUMENT, "Turn on debugging output"], + ["--debug", GetoptLong::NO_ARGUMENT, "Turn on debugging output"], ["--bnf", GetoptLong::NO_ARGUMENT, "Transform EBNF to BNF"], ["--evaluate","-e", GetoptLong::REQUIRED_ARGUMENT,"Evaluate argument as an EBNF document"], ["--ll1", GetoptLong::REQUIRED_ARGUMENT,"Generate First/Follow rules, argument is start symbol"], @@ -53,7 +53,7 @@ opts = GetoptLong.new(*OPT_ARGS.map {|o| o[0..-2]}) opts.each do |opt, arg| case opt - when '--dbg' then options[:debug] = true + when '--debug' then options[:debug] = true when '--bnf' then options[:bnf] = true when '--evaluate' then input = arg when '--input-format' then options[:format] = arg.to_sym diff --git a/lib/ebnf/parser.rb b/lib/ebnf/parser.rb index 35ab720..ecb60c8 100644 --- a/lib/ebnf/parser.rb +++ b/lib/ebnf/parser.rb @@ -18,14 +18,20 @@ def eachRule(scanner) #debug("eachRule(ws)") { "[#{cur_lineno}] #{s.inspect}" } when s = scanner.scan(%r(/\*([^\*]|\*[^\/])*\*/)m) # Eat comments /* .. */ + cur_lineno += s.count("\n") debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" } when s = scanner.scan(%r(\(\*([^\*]|\*[^\)])*\*\))m) # Eat comments (* .. *) + cur_lineno += s.count("\n") debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" } when s = scanner.scan(%r((#(?!x)|//).*$)) - # Eat comments + # Eat comments // & # cur_lineno += s.count("\n") debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" } + when s = scanner.scan(/\A["']/) + # Found a quote, scan until end of matching quote + s += scanner.scan_until(/#{scanner.matched}|$/) + r += s when s = scanner.scan(%r(^@terminals)) #debug("eachRule(@terminals)") { "[#{cur_lineno}] #{s.inspect}" } yield(r) unless r.empty? @@ -45,8 +51,15 @@ def eachRule(scanner) @lineno = cur_lineno r = s else - # Collect until end of line, or start of comment - s = scanner.scan_until(%r((?:/\*)|$)m) + # Collect until end of line, or start of comment or quote + s = scanner.scan_until(%r{(?:[/\(]\*)|#(?!x)|//|["']|$}) + if scanner.matched.length > 0 + # Back up scan head before ending match + scanner.pos = scanner.pos - scanner.matched.length + + # Remove matched from end of string + s = s[0..-(scanner.matched.length+1)] + end cur_lineno += s.count("\n") #debug("eachRule(rest)") { "[#{cur_lineno}] #{s.inspect}" } r += s @@ -268,6 +281,7 @@ def primary(s) # ((range "^<>'{}|^`") '-\[#x00-#x20\]') def terminal(s) s = s.strip + #STDERR.puts s.inspect case m = s[0,1] when '"', "'" # STRING1 or STRING2 l, s = s[1..-1].split(m.rstrip, 2) diff --git a/spec/base_spec.rb b/spec/base_spec.rb index 074291d..a46d490 100644 --- a/spec/base_spec.rb +++ b/spec/base_spec.rb @@ -34,6 +34,18 @@ %q{((terminal STRING1 "18" (seq "\"" (star (alt CHAR (range "\t'[]()-"))) "\"")))}, %q{[161s] WS ::= #x20 | #x9 | #xD | #xA} => %q{((terminal WS "161s" (alt (hex "#x20") (hex "#x9") (hex "#xD") (hex "#xA"))))}, + %q{[1] shexDoc ::= directive* # leading CODE} => + %q{((rule shexDoc "1" (star directive)))}, + %q{[1] shexDoc ::= directive* /* leading CODE */} => + %q{((rule shexDoc "1" (star directive)))}, + %q{[1] shexDoc ::= directive* (* leading CODE *)} => + %q{((rule shexDoc "1" (star directive)))}, + %q{[1] shexDoc ::= directive* // leading CODE} => + %q{((rule shexDoc "1" (star directive)))}, + %q{[1] shexDoc ::= /* leading CODE */ directive*} => + %q{((rule shexDoc "1" (star directive)))}, + %q{[1] shexDoc (* leading CODE *) ::= directive*} => + %q{((rule shexDoc "1" (star directive)))}, }.each do |input, expected| it "parses #{input.inspect}" do expect(parse(input).to_sxp).to produce(expected, @debug)