Permalink
Browse files

Produced tokens now include line number and column position of the ma…

…tched text
  • Loading branch information...
1 parent 573af0f commit 59ad4bba242cd15ca48418f48df0a5d1094ad23c @borgsmidt committed May 16, 2012
Showing with 51 additions and 25 deletions.
  1. +4 −0 CHANGELOG.md
  2. +27 −6 lib/rlex/lexer.rb
  3. +4 −2 lib/rlex/token.rb
  4. +1 −1 lib/rlex/version.rb
  5. +15 −16 spec/rlex/lexer_spec.rb
View
@@ -1,5 +1,9 @@
# Changes
+## Version 0.6.0
+
+Produced tokens now include line number and column position of the matched text
+
## Version 0.5.4
Update gem spec to reflect development dependency on `rspec`
View
@@ -100,10 +100,11 @@ def rule(name, pattern)
#
def keyword(name = nil, kword)
# @todo Validate the keyword name
- name = kword if name == nil
- pattern = Regexp.new(Regexp.escape kword.to_s)
+ kword_str = kword.to_s
+ name = kword.to_sym if name == nil
+ pattern = Regexp.new(Regexp.escape kword_str)
rule name, pattern
- @keywords[kword.to_s] = Token.new name.to_sym, kword.to_s
+ @keywords[kword_str] = Token.new name.to_sym, kword_str
return name.to_sym
end
@@ -117,6 +118,8 @@ def keyword(name = nil, kword)
# @return [String] The specified input
#
def start(input)
+ @line = 1
+ @col = 0
@scanner = StringScanner.new input
return input
end
@@ -134,9 +137,11 @@ def next_token
return next_token if ignore_prefix?
rule = greediest_rule
if rule
- prefix = @scanner.scan(rule.pattern)
+ prefix = fetch_prefix_and_update_pos(rule.pattern)
keyword = @keywords[prefix]
- return keyword ? keyword : Token.new(rule.name, prefix)
+ type = keyword ? keyword.type : rule.name
+ token = keyword ? keyword.value : prefix
+ return Token.new(type, token, @line, @col - token.size)
end
raise "unexpected input <#{@scanner.peek(5)}>"
end
@@ -149,7 +154,7 @@ def next_token
# @private
def ignore_prefix?
@ignored.each do |pattern|
- prefix = @scanner.scan(pattern)
+ prefix = fetch_prefix_and_update_pos(pattern)
return true if prefix
end
return false
@@ -168,5 +173,21 @@ def greediest_rule
end
return r
end
+
+ # @private
+ def fetch_prefix_and_update_pos(pattern)
+ prefix = @scanner.scan(pattern)
+ return nil if not prefix
+ parts = prefix.split("\n", -1) # arg -1 allows empty lines
+ if parts.count == 1
+ # Staying on the same line
+ @col += prefix.length
+ else
+ # On a new line
+ @line += parts.count - 1
+ @col = parts.last.length
+ end
+ return prefix
+ end
end
end
View
@@ -6,11 +6,13 @@ module Rlex
# @attr_reader [Symbol] type Type of the token, such as the name of
# the rule used to match it
# @attr_reader [String] value Text matched from the input
+ # @attr_reader [Integer] line Line number of the matched text
+ # @attr_reader [Integer] col Column position of the matched text
#
- Token = Struct.new :type, :value
+ Token = Struct.new :type, :value, :line, :col
# Special token used when the lexer has reached the end of the
# specified input.
#
- EOF_TOKEN = Token.new :eof, ""
+ EOF_TOKEN = Token.new :eof, "", -1, -1
end
View
@@ -1,4 +1,4 @@
module Rlex
# Project version
- VERSION = "0.5.4"
+ VERSION = "0.6.0"
end
View
@@ -32,21 +32,20 @@
@lexer.ignore /\s+/
@lexer.keyword :special
@lexer.start " \t\nspecialspecial special "
- special = Token.new :special, "special"
- @lexer.next_token.should eq special
- @lexer.next_token.should eq special
- @lexer.next_token.should eq special
+ @lexer.next_token.should eq Token.new(:special, "special", 2, 0)
+ @lexer.next_token.should eq Token.new(:special, "special", 2, 7)
+ @lexer.next_token.should eq Token.new(:special, "special", 2, 17)
@lexer.next_token.should eq EOF_TOKEN
end
it "should return tokens matched by regular rules and finish with EOF_TOKEN" do
@lexer.ignore /\s+/
@lexer.rule :word, /\w+/
@lexer.start "sentence with four tokens"
- @lexer.next_token.should eq Token.new :word, "sentence"
- @lexer.next_token.should eq Token.new :word, "with"
- @lexer.next_token.should eq Token.new :word, "four"
- @lexer.next_token.should eq Token.new :word, "tokens"
+ @lexer.next_token.should eq Token.new(:word, "sentence", 1, 0)
+ @lexer.next_token.should eq Token.new(:word, "with", 1, 9)
+ @lexer.next_token.should eq Token.new(:word, "four", 1, 14)
+ @lexer.next_token.should eq Token.new(:word, "tokens", 1, 19)
@lexer.next_token.should eq EOF_TOKEN
end
@@ -57,11 +56,11 @@
@lexer.keyword :rparen, ")"
@lexer.rule :word, /\w+/
@lexer.start "ifu ( if ) ifu"
- @lexer.next_token.should eq Token.new :word, "ifu"
- @lexer.next_token.should eq Token.new :lparen, "("
- @lexer.next_token.should eq Token.new :if, "if"
- @lexer.next_token.should eq Token.new :rparen, ")"
- @lexer.next_token.should eq Token.new :word, "ifu"
+ @lexer.next_token.should eq Token.new(:word, "ifu", 1, 0)
+ @lexer.next_token.should eq Token.new(:lparen, "(", 1, 4)
+ @lexer.next_token.should eq Token.new(:if, "if", 1, 6)
+ @lexer.next_token.should eq Token.new(:rparen, ")", 1, 9)
+ @lexer.next_token.should eq Token.new(:word, "ifu", 1, 11)
@lexer.next_token.should eq EOF_TOKEN
end
@@ -70,9 +69,9 @@
@lexer.rule :word, /\w+/
@lexer.keyword :keyword
@lexer.start "word keyword keywordmore"
- @lexer.next_token.should eq Token.new :word, "word"
- @lexer.next_token.should eq Token.new :keyword, "keyword"
- @lexer.next_token.should eq Token.new :word, "keywordmore"
+ @lexer.next_token.should eq Token.new(:word, "word", 1, 0)
+ @lexer.next_token.should eq Token.new(:keyword, "keyword", 1, 5)
+ @lexer.next_token.should eq Token.new(:word, "keywordmore", 1, 13)
@lexer.next_token.should eq EOF_TOKEN
end
end

0 comments on commit 59ad4bb

Please sign in to comment.