Skip to content

Commit

Permalink
Merge branch 'master' of git://github.com/jgarber/treetop
Browse files Browse the repository at this point in the history
  • Loading branch information
cjheath committed Sep 10, 2009
2 parents 523f83c + 39a9c11 commit 074aa28
Show file tree
Hide file tree
Showing 9 changed files with 58 additions and 24 deletions.
2 changes: 1 addition & 1 deletion benchmark/seqpar_benchmark.rb
Expand Up @@ -96,7 +96,7 @@ def benchmark
end
File.foreach(File.join(@where, 'after.dat')) do |line|
size, time = line.split(' ')
performance_increases << (before[size].to_f - time.to_f) / before[size].to_f unless time == "0"
performance_increases << (before[size].to_f - time.to_f) / before[size].to_f unless time == "0" || before[size] == "0"
end
puts "Average performance increase: #{performance_increases.mean * 100}%"
end
Expand Down
5 changes: 2 additions & 3 deletions lib/treetop/compiler/node_classes/anything_symbol.rb
Expand Up @@ -4,10 +4,9 @@ class AnythingSymbol < AtomicExpression
def compile(address, builder, parent_expression = nil)
super
builder.if__ "index < input_length" do
builder << 'next_character = index + input[index..-1].match(/\A(.)/um).end(1)'
assign_result "instantiate_node(#{node_class_name},input, index...next_character)"
assign_result "instantiate_node(#{node_class_name},input, index...(index + 1))"
extend_result_with_inline_module
builder << "@index = next_character"
builder << "@index += 1"
end
builder.else_ do
builder << 'terminal_parse_failure("any character")'
Expand Down
5 changes: 2 additions & 3 deletions lib/treetop/compiler/node_classes/character_class.rb
Expand Up @@ -5,14 +5,13 @@ def compile(address, builder, parent_expression = nil)
super

builder.if__ "has_terminal?(#{grounded_regexp(text_value)}, true, index)" do
builder << 'next_character = index + input[index..-1].match(/\A(.)/um).end(1)'
if address == 0 || decorated?
assign_result "instantiate_node(#{node_class_name}, input, index...next_character)"
assign_result "instantiate_node(#{node_class_name},input, index...(index + 1))"
extend_result_with_inline_module
else
assign_lazily_instantiated_node
end
builder << "@index = next_character"
builder << "@index += 1"
end
builder.else_ do
# "terminal_parse_failure(#{single_quote(characters)})"
Expand Down
4 changes: 2 additions & 2 deletions lib/treetop/compiler/node_classes/sequence.rb
Expand Up @@ -53,10 +53,10 @@ def initialize(sequence_elements)
def compile(index, builder, rule)
super
builder.module_declaration(module_name) do
elements_by_name = sequence_elements.inject({}){|h,e| (h[e.label_name] ||= []) << e; h}
elements_by_name = sequence_elements.inject({}){|h,e| (h[e.label_name.to_s] ||= []) << e; h}
sequence_elements.each_with_index do |element, index|
if element.label_name
repetitions = elements_by_name[element.label_name]
repetitions = elements_by_name[element.label_name.to_s]
label_name = element.label_name + (repetitions.size > 1 ? (repetitions.index(element)+1).to_s : "")
builder.method_declaration(label_name) do
builder << "elements[#{index}]"
Expand Down
2 changes: 1 addition & 1 deletion lib/treetop/runtime/compiled_parser.rb
Expand Up @@ -88,7 +88,7 @@ def instantiate_node(node_type,*args)

def has_terminal?(terminal, regex, index)
if regex
rx = @regexps[terminal] ||= Regexp.new(terminal, nil, 'u')
rx = @regexps[terminal] ||= Regexp.new(terminal)
input.index(rx, index) == index
else
input[index, terminal.size] == terminal
Expand Down
9 changes: 1 addition & 8 deletions spec/compiler/anything_symbol_spec.rb
Expand Up @@ -21,15 +21,8 @@ class Foo < Treetop::Runtime::SyntaxNode
it "fails to parse epsilon" do
parse('').should be_nil
end

describe "an anything symbol" do
testing_expression '.'
it "matches an UTF-8 character" do
parse("ø").should_not be_nil
end
end
end

module ModFoo
end

Expand Down
6 changes: 0 additions & 6 deletions spec/compiler/character_class_spec.rb
Expand Up @@ -261,10 +261,4 @@ module ModFoo
end
end

describe "A character containing UTF-8 characters" do
testing_expression "[æøå]"
it "recognizes the UTF-8 characters" do
parse("ø").should_not be_nil
end
end
end
45 changes: 45 additions & 0 deletions spec/compiler/multibyte_chars_spec.rb
@@ -0,0 +1,45 @@
#!ruby19
# encoding: utf-8

require File.expand_path("#{File.dirname(__FILE__)}/../spec_helper")

# Require ActiveSupport for multibyte strings
begin
require 'active_support'
rescue
gem 'activesupport'
require 'active_support'
end

module MultibyteCharsSpec
describe "an anything symbol" do
testing_expression '.'
it "matches an UTF-8 character" do
parse_multibyte("ø").should_not be_nil
end
end

describe "A character class containing UTF-8 characters" do
testing_expression "[æøå]"
it "recognizes the UTF-8 characters" do
parse_multibyte("ø").should_not be_nil
end
end

describe "a character class repetition containing UTF-8 characters mixed with other expressions" do
testing_expression '[æøå]+ "a"'
it "lazily instantiates a node for the character" do
result = parse_multibyte('æøåa')
result.elements[0].instance_variable_get("@elements").should include(true)
result.elements[0].elements.should_not include(true)
result.elements[0].elements.size.should == 3
result.elements.size.should == 2
result.elements[0].text_value.should == "æøå"
result.elements[0].elements[0].text_value.should == "æ"
result.elements[0].elements[1].text_value.should == "ø"
result.elements[0].elements[2].text_value.should == "å"
result.elements[1].text_value == "a"
end
end

end
4 changes: 4 additions & 0 deletions spec/spec_helper.rb
Expand Up @@ -64,6 +64,10 @@ def parse(input, options = {})
result
end

def parse_multibyte(input, options = {})
parse(input.mb_chars, options)
end

def compiling_grammar(grammar_under_test)
lambda {
grammar_node = parse_with_metagrammar(grammar_under_test.strip, :grammar)
Expand Down

0 comments on commit 074aa28

Please sign in to comment.