Skip to content
Browse files

Adds lexer support for braced string literals.

  • Loading branch information...
1 parent 00cb1f8 commit 0065e5a730f7d9744b1ab06d85f11b07a3432eac @inukshuk committed Jun 11, 2011
View
28 features/issues/braced_strings.feature
@@ -3,6 +3,15 @@ Feature: BibTeX Braced Strings
I want to be able to parse BibTeX files containing string definitions using braced expressions
Because applications such as BibDesk produce that format
+ @string
+ Scenario: A simple string assignment
+ When I parse the following file:
+ """
+ @string{ foo = {foo} }
+ """
+ Then my bibliography should contain 1 string
+
+ @string @replacement
Scenario: A BibTeX file with string assignments
When I parse the following file:
"""
@@ -13,13 +22,11 @@ Feature: BibTeX Braced Strings
@string{foo4={foo}}
@string{ foo5 = {"foo" bar} }
@string{ foo6 = {"foo" bar{"}} }
- @string{ foo7 = {"foo" bar\} foo} }
- @string{ foo8 = {"foo" bar\{ foo} }
Compound strings:
- @string{ foo8 = foo1 }
- @string{ foo9 = foo1 # {bar} }
- @string{ foo10 = {foo } # {bar} }
+ @string{ foo7 = foo1 }
+ @string{ foo8 = foo1 # {bar} }
+ @string{ foo9 = {foo } # {bar} }
"""
Then my bibliography should contain 9 strings
@@ -31,12 +38,11 @@ Feature: BibTeX Braced Strings
| foo |
| "foo" bar |
| "foo" bar{"} |
- | "foo" bar\} foo |
- | "foo" bar\{ foo |
- | foo |
+ | foo1 |
| foo1 # "bar" |
| "foo " # "bar" |
- When I replace all strings in my bibliography
- Then the string "foo9" should be "foobar"
- And the string "foo10" should be "foo bar"
+ When I replace and join all strings in my bibliography
+ Then the string "foo7" should be "foo"
+ And the string "foo8" should be "foobar"
+ And the string "foo9" should be "foo bar"
View
6 features/step_definitions/bibtex_steps.rb
@@ -72,17 +72,17 @@
Then /^my bibliography should contain (\d+) (\w+)$/ do |count, type|
- assert_equal count.to_i, @bibliography.q("@#{type.chomp!('s')}").length
+ assert_equal count.to_i, @bibliography.q("@#{type.chomp('s')}").length
end
Then /^my bibliography should contain (\d+) (\w+) published in (\d+)$/ do |count, type, year|
- assert_equal count.to_i, @bibliography.q("@#{type.chomp!('s')}[year=#{year}]").length
+ assert_equal count.to_i, @bibliography.q("@#{type.chomp('s')}[year=#{year}]").length
end
Then /^my bibliography should contain an? (\w+) with id "([^"]*)"$/ do |type, id|
assert_equal @bibliography[id.to_sym].type, type.to_sym
end
Then /^the string "([^"]*)" should be "([^"]*)"$/ do |key, value|
- assert_equal value, @bibliography.strings[key].to_s
+ assert_equal value, @bibliography.strings[key.to_sym].v.to_s
end
View
1 features/strings.feature
@@ -2,6 +2,7 @@ Feature: BibTeX Strings
As a hacker who works with bibliographies
I want to be able to parse BibTeX files containing string assignments
+ @string
Scenario: A BibTeX file with string assignments
When I parse the following file:
"""
View
4 lib/bibtex.rb
@@ -49,8 +49,8 @@ def self.log; BibTeX::Log end
end
# Load debugger
-# require 'ruby-debug'
-# Debugger.start
+require 'ruby-debug'
+Debugger.start
require 'bibtex/extensions'
require 'bibtex/value'
View
2 lib/bibtex/bibtex.y
@@ -60,6 +60,7 @@ rule
| string_value SHARP string_literal { result << val[2] }
string_literal : NAME { result = val[0].downcase.to_sym }
+ | LBRACE content RBRACE { result = val[1] }
| STRING_LITERAL { result = val[0] }
entry : entry_head assignments RBRACE { result = val[0] << val[1] }
@@ -79,7 +80,6 @@ rule
value : string_value { result = val[0] }
| NUMBER { result = val[0] }
- | LBRACE content RBRACE { result = val[1] }
end
View
49 lib/bibtex/lexer.rb
@@ -45,22 +45,26 @@ class Lexer
#
def initialize(options = {})
@options = DEFAULTS.merge(options)
- @data = nil
end
- # Sets the source for the lexical analysis and resets the internal state.
- def data=(string)
+ def reset
@stack = []
@brace_level = 0
@mode = :meta
@active_object = nil
+ @data = nil
+ end
+
+ # Sets the source for the lexical analysis and resets the internal state.
+ def data=(string)
+ reset
@data = StringScanner.new(string)
-
- # @line_breaks = []
- # @line_breaks << @data.pos until @data.scan_until(/\n|$/).empty?
- # @data.reset
end
+ def symbols
+ @stack.map(&:first)
+ end
+
# Returns the line number at a given position in the source.
def line_number_at(index)
0 # (@line_breaks.find_index { |n| n >= index } || 0) + 1
@@ -72,7 +76,7 @@ def next_token
end
def mode=(mode)
- # Log.debug("Lexer: switching to #{mode} mode...")
+ Log.debug("Lexer: switching to #{mode} mode...")
@active_object = case
when [:comment,:string,:preamble,:entry].include?(mode) then mode
@@ -107,17 +111,17 @@ def push(value)
case
when ([:CONTENT,:STRING_LITERAL].include?(value[0]) && value[0] == @stack.last[0])
@stack.last[1][0] << value[1]
- @stack.last[1][1] = line_number_at(@data.pos)
+ @stack.last[1][1] = @data.pos
when value[0] == :ERROR
@stack.push(value) if @options[:include].include?(:errors)
leave_object
when value[0] == :META_CONTENT
if @options[:include].include?(:meta_content)
- value[1] = [value[1], line_number_at(@data.pos)]
+ value[1] = [value[1], @data.pos]
@stack.push(value)
end
else
- value[1] = [value[1], line_number_at(@data.pos)]
+ value[1] = [value[1], @data.pos]
@stack.push(value)
end
self
@@ -145,6 +149,7 @@ def analyse(data=nil)
end
Log.debug('Lexer: finished lexical analysis.')
+ Log.debug(@stack.inspect)
push [false, '$end']
end
@@ -154,7 +159,7 @@ def parse_bibtex
when @data.scan(/\{/o)
@brace_level += 1
push [:LBRACE,'{']
- if (@brace_level == 1 && active?(:comment)) || (@brace_level == 2 && active?(:entry))
+ if (@brace_level == 1 && active?(:comment)) || (@brace_level > 1 )
self.mode = :content
end
when @data.scan(/\}/o)
@@ -210,7 +215,7 @@ def parse_content
push [:CONTENT,match.chop]
push [:RBRACE,'}']
leave_object
- when @brace_level == 1 && active?(:entry)
+ when @brace_level == 1 && (active?(:entry) || active?(:string))
push [:CONTENT,match.chop]
push [:RBRACE,'}']
self.mode = :bibtex
@@ -249,7 +254,7 @@ def parse_literal
push [:STRING_LITERAL,match.chop]
error_unterminated_string
else
- push [:STRING_LITERAL,self.data.rest]
+ push [:STRING_LITERAL,@data.rest]
@data.terminate
error_unterminated_string
end
@@ -285,26 +290,26 @@ def leave_object
def error_unbalanced_braces
- n = line_number_at(@data.pos)
- Log.warn("Lexer: unbalanced braces on line #{n}; brace level #{@brace_level}; mode #{@mode.inspect}.")
+ n = @data.pos
+ Log.warn("Lexer: unbalanced braces at #{n}; brace level #{@brace_level}; mode #{@mode.inspect}.")
backtrace [:E_UNBALANCED_BRACES, [self.data.matched,n]]
end
def error_unterminated_string
- n = line_number_at(@data.pos)
- Log.warn("Lexer: unterminated string on line #{n}; brace level #{@brace_level}; mode #{@mode.inspect}.")
+ n = @data.pos
+ Log.warn("Lexer: unterminated string at #{n}; brace level #{@brace_level}; mode #{@mode.inspect}.")
backtrace [:E_UNTERMINATED_STRING, [@data.matched,n]]
end
def error_unterminated_content
- n = line_number_at(@data.pos)
- Log.warn("Lexer: unterminated content on line #{n}; brace level #{@brace_level}; mode #{@mode.inspect}.")
+ n = @data.pos
+ Log.warn("Lexer: unterminated content at #{n}; brace level #{@brace_level}; mode #{@mode.inspect}.")
backtrace [:E_UNTERMINATED_CONTENT, [@data.matched,n]]
end
def error_unexpected_token
- n = line_number_at(@data.pos)
- Log.warn("Lexer: unexpected token `#{@data.matched}' on line #{n}; brace level #{@brace_level}; mode #{@mode.inspect}.")
+ n = @data.pos
+ Log.warn("Lexer: unexpected token `#{@data.matched}' at #{n}; brace level #{@brace_level}; mode #{@mode.inspect}.")
backtrace [:E_UNEXPECTED_TOKEN, [@data.matched,n]]
end
View
11 test/bibtex/test_lexer.rb
@@ -0,0 +1,11 @@
+require 'helper.rb'
+
+module BibTeX
+ class LexerTest < MiniTest::Spec
+
+ should 'correctly scan a string literal' do
+ assert_equal Lexer.new.analyse(%q(@string{ x = "foo" })).symbols, [:AT,:STRING,:LBRACE,:NAME,:EQ,:STRING_LITERAL,:RBRACE,false]
+ end
+
+ end
+end

0 comments on commit 0065e5a

Please sign in to comment.
Something went wrong with that request. Please try again.