diff --git a/Readme.rdoc b/Readme.rdoc new file mode 100644 index 0000000..f8a2c5d --- /dev/null +++ b/Readme.rdoc @@ -0,0 +1,55 @@ += Treetop S-Expression Parser + +This is a very small & simple demonstration parser built using {Treetop}[http://treetop.rubyforge.org/]. It is meant solely to teach the very basics of implementing a grammar in Treetop, not as an actual parser for S-Expressions (which is why it's not packaged as a gem, and why it has no tests). + +A real parser *should* improve in several main areas: + +* Test coverage: Unit tests are vital for parsers. +* Performance: This example is slow in the extreme compared to simpler parsing methods, a real parser should be performant. +* Error reporting: Even the best written parser is a complete pain if it doesn't have intelligent error reporting. + +=== The Core Grammar + +For easy reference the core grammar of the parser is reproduced here: + + grammar Sexp + + rule expression + space? '(' body ')' space? + end + + rule body + (expression / identifier / float / integer / string / space )* + end + + rule integer + ('+' / '-')? [0-9]+ + end + + rule float + ('+' / '-')? [0-9]+ (('.' [0-9]+) / ('e' [0-9]+)) + end + + rule string + '"' ([^"\\] / "\\" . )* '"' + end + + rule identifier + [a-zA-Z\=\*] [a-zA-Z0-9_\=\*]* + end + + rule space + [\s]+ + end + + end + +=== More Info + +For more info visit the {Treetop website}[http://treetop.rubyforge.org/], and have a read through the source-code of this parser (don't worry, there isn't much of it!). + +=== Author & Credits + +Author:: {Aaron Gough}[mailto:aaron@aarongough.com] + +Copyright (c) 2010 {Aaron Gough}[http://thingsaaronmade.com/] ({thingsaaronmade.com}[http://thingsaaronmade.com/]), released under the MIT license \ No newline at end of file diff --git a/node_extensions.rb b/node_extensions.rb new file mode 100644 index 0000000..023e3cd --- /dev/null +++ b/node_extensions.rb @@ -0,0 +1,37 @@ +module Sexp + class IntegerLiteral < Treetop::Runtime::SyntaxNode + def to_array + return self.text_value.to_i + end + end + + class StringLiteral < Treetop::Runtime::SyntaxNode + def to_array + return eval self.text_value + end + end + + class FloatLiteral < Treetop::Runtime::SyntaxNode + def to_array + return self.text_value.to_f + end + end + + class Identifier < Treetop::Runtime::SyntaxNode + def to_array + return self.text_value.to_sym + end + end + + class Expression < Treetop::Runtime::SyntaxNode + def to_array + return self.elements[0].to_array + end + end + + class Body < Treetop::Runtime::SyntaxNode + def to_array + return self.elements.map {|x| x.to_array} + end + end +end \ No newline at end of file diff --git a/parser.rb b/parser.rb new file mode 100644 index 0000000..3e87db7 --- /dev/null +++ b/parser.rb @@ -0,0 +1,30 @@ +require 'treetop' + +require File.expand_path(File.join(File.dirname(__FILE__), 'node_extensions.rb')) + +class Parser + + Treetop.load(File.expand_path(File.join(File.dirname(__FILE__), 'sexp_parser.treetop'))) + @@parser = SexpParser.new + + def self.parse(data) + tree = @@parser.parse(data) + + if(tree.nil?) + raise Exception, "Parse error at offset: #{@@parser.index}" + end + + self.clean_tree(tree) + + return tree.to_array + end + + private + + def self.clean_tree(root_node) + return if(root_node.elements.nil?) + root_node.elements.delete_if{|node| node.class.name == "Treetop::Runtime::SyntaxNode" } + root_node.elements.each {|node| self.clean_tree(node) } + end + +end diff --git a/sexp_parser.treetop b/sexp_parser.treetop new file mode 100644 index 0000000..df677dc --- /dev/null +++ b/sexp_parser.treetop @@ -0,0 +1,31 @@ +grammar Sexp + + rule expression + space? '(' body ')' space? + end + + rule body + (expression / identifier / float / integer / string / space )* + end + + rule integer + ('+' / '-')? [0-9]+ + end + + rule float + ('+' / '-')? [0-9]+ (('.' [0-9]+) / ('e' [0-9]+)) + end + + rule string + '"' ([^"\\] / "\\" . )* '"' + end + + rule identifier + [a-zA-Z\=\*] [a-zA-Z0-9_\=\*]* + end + + rule space + [\s]+ + end + +end \ No newline at end of file