From 4e60f7ebb59127e9d2668b969416133fca38b5b3 Mon Sep 17 00:00:00 2001 From: Jarrett Colby Date: Wed, 26 Feb 2014 13:17:02 -0500 Subject: [PATCH] First commit --- .gitignore | 56 ++++++++++++++++++ Rakefile | 24 ++++++++ erb_parser.gemspec | 16 ++++++ lib/erb_parser.rb | 33 +++++++++++ lib/erb_parser/erb_grammar.treetop | 47 +++++++++++++++ lib/erb_parser/erb_tag.rb | 23 ++++++++ lib/erb_parser/nodes.rb | 27 +++++++++ lib/erb_parser/parsed_erb.rb | 29 ++++++++++ lib/erb_parser/treetop_runner.rb | 18 ++++++ lib/erb_parser/xml_transformer.rb | 52 +++++++++++++++++ readme.md | 37 ++++++++++++ test/api_test.rb | 57 ++++++++++++++++++ test/peg_test.rb | 92 ++++++++++++++++++++++++++++++ test/test_helper.rb | 6 ++ test/xml_transformer_test.rb | 39 +++++++++++++ 15 files changed, 556 insertions(+) create mode 100644 .gitignore create mode 100644 Rakefile create mode 100644 erb_parser.gemspec create mode 100644 lib/erb_parser.rb create mode 100644 lib/erb_parser/erb_grammar.treetop create mode 100644 lib/erb_parser/erb_tag.rb create mode 100644 lib/erb_parser/nodes.rb create mode 100644 lib/erb_parser/parsed_erb.rb create mode 100644 lib/erb_parser/treetop_runner.rb create mode 100644 lib/erb_parser/xml_transformer.rb create mode 100644 readme.md create mode 100644 test/api_test.rb create mode 100644 test/peg_test.rb create mode 100644 test/test_helper.rb create mode 100644 test/xml_transformer_test.rb diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c562527 --- /dev/null +++ b/.gitignore @@ -0,0 +1,56 @@ + +#********** osx template********** + +.DS_Store + +# Thumbnails +._* + +# Files that might appear on external disk +.Spotlight-V100 +.Trashes + + +#********** linux template********** + +.* +!.gitignore +*~ + +# KDE +.directory + + +#********** windows template********** + +# Windows image file caches +Thumbs.db + +# Folder config file +Desktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + + +#********** ruby template********** + +*.gem +*.rbc +.bundle +.config +coverage +InstalledFiles +lib/bundler/man +pkg +rdoc +spec/reports +test/tmp +test/version_tmp +tmp + +# YARD artifacts +.yardoc +_yardoc +doc/ + diff --git a/Rakefile b/Rakefile new file mode 100644 index 0000000..653335a --- /dev/null +++ b/Rakefile @@ -0,0 +1,24 @@ +require 'rake/testtask' + +# To run one test: rake test TEST=just_one_file.rb +Rake::TestTask.new do |t| + t.test_files = FileList['test/*_test.rb'] + t.libs << 'test' +end + +def built_gem_name + Dir.glob('erb_parser-*.*.*.gem').first +end + +task :build do + `rm *.gem` + puts `gem build erb_parser.gemspec` +end + +task :install do + puts `gem install #{built_gem_name}` +end + +task :release do + puts `gem push #{built_gem_name}` +end \ No newline at end of file diff --git a/erb_parser.gemspec b/erb_parser.gemspec new file mode 100644 index 0000000..e62610f --- /dev/null +++ b/erb_parser.gemspec @@ -0,0 +1,16 @@ +Gem::Specification.new do |s| + s.name = 'erb_parser' + s.version = '0.0.0' + s.date = '2014-02-26' + s.summary = 'Parser for ERB templates' + s.description = 'Parses ERB templates into two types of tokens: Plain text and ERB tags. Special support for HTML/XML.' + s.authors = ['Jarrett Colby'] + s.email = 'jarrett@madebyhq.com' + s.files = Dir.glob('lib/**/*') + s.homepage = 'http://madebyhq.com/' + + s.add_runtime_dependency 'treetop' + + s.add_development_dependency 'minitest' + s.add_development_dependency 'turn' +end \ No newline at end of file diff --git a/lib/erb_parser.rb b/lib/erb_parser.rb new file mode 100644 index 0000000..adfc10f --- /dev/null +++ b/lib/erb_parser.rb @@ -0,0 +1,33 @@ +require 'treetop' +require 'erb_parser/nodes' +require 'erb_parser/treetop_runner' +require 'erb_parser/parsed_erb' +require 'erb_parser/erb_tag' +require 'erb_parser/xml_transformer' + +module ErbParser + def self.parse(str) + ParsedErb.new TreetopRunner.run(str) + end + + # Takes a string representing an XML document or fragment. Finds every ERB tag in the + # XML and replaces it with the tag . The contents of the replacement tag will be + # the inner Ruby code, escaped for XML. You can override the tag like so: + # + # ErbParser.transform_xml str, :tag => 'tag-name' + # + # If the ERB tag is of the form +<%=+, the attribute +interpolated="true"+ will be + # added. Else if the ERB tag is of the form +<#+, the attribute +comment="true"+ will be + # added. You can override this behavior like so: + # + # ErbParser.transform_xml str, :interp_attr => {'attr-name' => 'attr-value'} + # ErbParser.transform_xml str, :interp_attr => false + # + # ErbParser.transform_xml str, :comment_attr => {'attr-name' => 'attr-value'} + # ErbParser.transform_xml str, :comment_attr => false + # + # The returned value is a string representing the transformed XML document or fragment. + def self.transform_xml(str, options = {}) + XmlTransformer.transform(parse(str), options) + end +end \ No newline at end of file diff --git a/lib/erb_parser/erb_grammar.treetop b/lib/erb_parser/erb_grammar.treetop new file mode 100644 index 0000000..8a2aec7 --- /dev/null +++ b/lib/erb_parser/erb_grammar.treetop @@ -0,0 +1,47 @@ +module ErbParser + grammar ErbGrammar + rule document + (erb_tag / text)* + end + + rule text + (!'<%' .)+ + + end + + rule erb_tag + '<%' + number_sign:'#'? equal_sign:'='? + _ruby_code:ruby_code + '%>' + + end + + rule ruby_code + (string_literal / (!'%>' .))* + end + + # Matches the following quote styles: + # "string" + # 'string' + # %q(string (string) string) + # %Q(string (string) string) + # %(string (string) string) + # %q{string {string} string} + # %Q{string {string} string} + # %{string {string} string} + rule string_literal + ('"' ('\"' / !'"' .)* '"') / + ('\'' ('\\\'' / !'\'' .)* '\'') / + ('%' ('q' / 'Q')? (curly_brackets / parens)) + end + + rule curly_brackets + '{' (curly_brackets / '\}' / !'}' .)* '}' + end + + rule parens + '(' (parens / '\)' / !')' .)* ')' + end + end +end \ No newline at end of file diff --git a/lib/erb_parser/erb_tag.rb b/lib/erb_parser/erb_tag.rb new file mode 100644 index 0000000..c34eb19 --- /dev/null +++ b/lib/erb_parser/erb_tag.rb @@ -0,0 +1,23 @@ +module ErbParser + class ErbTag + def comment? + @treetop_node.comment? + end + + def initialize(treetop_node) + @treetop_node = treetop_node + end + + def interpolated? + @treetop_node.interpolated? + end + + def ruby_code + @treetop_node.ruby_code + end + + def to_s + @treetop_node.text_value + end + end +end \ No newline at end of file diff --git a/lib/erb_parser/nodes.rb b/lib/erb_parser/nodes.rb new file mode 100644 index 0000000..343c8d7 --- /dev/null +++ b/lib/erb_parser/nodes.rb @@ -0,0 +1,27 @@ +module ErbParser + module ErbGrammar + module Text + def type + :text + end + end + + module ErbTag + def comment? + !number_sign.empty? + end + + def interpolated? + !equal_sign.empty? + end + + def ruby_code + _ruby_code.text_value + end + + def type + :erb_tag + end + end + end +end \ No newline at end of file diff --git a/lib/erb_parser/parsed_erb.rb b/lib/erb_parser/parsed_erb.rb new file mode 100644 index 0000000..3d5c9f2 --- /dev/null +++ b/lib/erb_parser/parsed_erb.rb @@ -0,0 +1,29 @@ +module ErbParser + class ParsedErb + # Accesses the parsed tokens as an array. Each element of the array is either a + # String, representing plain text, or an ErbTag. + def [](index) + @tokens[index] + end + + def initialize(treetop_ast) + @treetop_ast = treetop_ast + @tokens = treetop_ast.elements.map do |elem| + case elem.type + when :text + elem.text_value + when :erb_tag + ErbTag.new elem + else + raise "Unexpected type: #{elem.type}" + end + end + end + + # Returns the array of parsed tokens. + attr_reader :tokens + + # Returns the raw Treetop AST. + attr_reader :treetop_ast + end +end \ No newline at end of file diff --git a/lib/erb_parser/treetop_runner.rb b/lib/erb_parser/treetop_runner.rb new file mode 100644 index 0000000..cb4b2d0 --- /dev/null +++ b/lib/erb_parser/treetop_runner.rb @@ -0,0 +1,18 @@ +Treetop.load File.join(File.dirname(__FILE__), 'erb_grammar') + +module ErbParser + # This module doesn't do much. It just provides some boilerplate code to invoke Treetop. + # The result is whatever Treetop returns. + module TreetopRunner + def self.run(str, options = {}) + treetop = ErbGrammarParser.new + if result = treetop.parse(str, options) + result + else + raise ParseError, treetop.failure_reason + end + end + + class ParseError < RuntimeError; end + end +end \ No newline at end of file diff --git a/lib/erb_parser/xml_transformer.rb b/lib/erb_parser/xml_transformer.rb new file mode 100644 index 0000000..cbc3567 --- /dev/null +++ b/lib/erb_parser/xml_transformer.rb @@ -0,0 +1,52 @@ +require 'cgi' + +module ErbParser + module XmlTransformer + def self.transform(parsed_erb, options) + options = { + :tag => 'erb', + :interp_attr => {'interpolated' => 'true'}, + :comment_attr => {'comment' => 'true'} + }.merge(options) + + parsed_erb.tokens.map do |elem| + case elem + when String + elem + when ErbTag + if elem.interpolated? + if options[:interp_attr].is_a?(Hash) + attrs = options[:interp_attr] + else + attrs = {} + end + elsif elem.comment? + if options[:comment_attr].is_a?(Hash) + attrs = options[:comment_attr] + else + attrs = {} + end + else + attrs = {} + end + content_tag options[:tag], CGI.escape_html(elem.ruby_code), attrs + else + raise "Unexpected element: #{elem.class.name}" + end + end.join + end + + def self.content_tag(name, contents, attrs = {}) + if attrs.empty? + attrs_str = '' + else + attrs_str = ' ' + attrs.map do |key, val| + key = CGI.escape_html(key.to_s) + val = CGI.escape_html(val.to_s) + %Q(#{key}="#{val}") + end.join(' ') + end + '<' + name.to_s + attrs_str + '>' + contents.to_s + '' + end + end +end \ No newline at end of file diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..ec46188 --- /dev/null +++ b/readme.md @@ -0,0 +1,37 @@ +## Can ErbParser handle all valid Ruby code? + +No it cannot. Ruby has a very complex syntax. In a library like this, it would be a fool's +errand to try to handle every weird syntactic construct that could technically be +considered valid Ruby. Instead, this library is designed to handle only the constructs +that would commonly appear inside ERB tags. In other words, the basics of the language. + +Just avoid exotic syntactic constructs, and you should be fine. (You shouldn't do anything +syntactically fancy in an ERB template anyway--it's bad coding style.) In particular, you +must avoid Ruby's weirder string literals, such as the following: + + %q!This is a valid string literal, but you must not use this syntax.! + +Also be wary of tricky escape sequences. If you absolutely must use unusual syntax, and it +breaks ErbParser, consider moving the offending code into a class or module external to +the ERB template. + +Nonetheless, the library *does* account for and allow the following string literal +formats: + + "string" + 'string' + %q(string (string) string) + %Q(string (string) string) + %(string (string) string) + %q{string {string} string} + %Q{string {string} string} + %{string {string} string} + +This parser is *not* hardened against malicious input. But then, you shouldn't be +accepting ERB as untrusted input anyway, because ERB allows arbitrary code execution. + +## What does ErbParser do with invalid ERB or Ruby code? + +If you pass code containing a syntax error, the parsing behavior is undefined. You may get +an exception, or you may just get nonsensical results. It depends on the type of the +syntax error. \ No newline at end of file diff --git a/test/api_test.rb b/test/api_test.rb new file mode 100644 index 0000000..e2dc3d2 --- /dev/null +++ b/test/api_test.rb @@ -0,0 +1,57 @@ +require 'test_helper' + +# Test the gem's public API. +class ApiTest < MiniTest::Unit::TestCase + def test_complex_document + str = %Q( +

The time is <%= Time.now.strftime("%m %d %Y") %>.

+ + <% 5.times do |i| %> +

<%= %Q{A string with {nested} brackets and a closing tag %>. } * i %>

+ <% end %> + + <%# puts "This is a comment." %> + ) + result = ErbParser.parse(str) + + assert_equal '

The time is', result[0].strip + + assert_kind_of ErbParser::ErbTag, result[1] + assert_equal '<%= Time.now.strftime("%m %d %Y") %>', result[1].to_s + assert_equal ' Time.now.strftime("%m %d %Y") ', result[1].ruby_code + assert result[1].interpolated? + assert !result[1].comment? + + assert_equal '.

', result[2].strip + + assert_kind_of ErbParser::ErbTag, result[3] + assert_equal '<% 5.times do |i| %>', result[3].to_s + assert_equal ' 5.times do |i| ', result[3].ruby_code + assert !result[3].interpolated? + assert !result[3].comment? + + assert_equal '

', result[4].strip + + assert_kind_of ErbParser::ErbTag, result[5] + assert_equal '<%= %Q{A string with {nested} brackets and a closing tag %>. } * i %>', result[5].to_s + assert_equal ' %Q{A string with {nested} brackets and a closing tag %>. } * i ', result[5].ruby_code + assert result[5].interpolated? + assert !result[5].comment? + + assert_equal '

', result[6].strip + + assert_kind_of ErbParser::ErbTag, result[7] + assert_equal '<% end %>', result[7].to_s + assert_equal ' end ', result[7].ruby_code + assert !result[7].interpolated? + assert !result[7].comment? + + assert_equal '', result[8].strip + + assert_kind_of ErbParser::ErbTag, result[9] + assert_equal '<%# puts "This is a comment." %>', result[9].to_s + assert_equal ' puts "This is a comment." ', result[9].ruby_code + assert !result[9].interpolated? + assert result[9].comment? + end +end \ No newline at end of file diff --git a/test/peg_test.rb b/test/peg_test.rb new file mode 100644 index 0000000..05aaf2a --- /dev/null +++ b/test/peg_test.rb @@ -0,0 +1,92 @@ +require 'test_helper' + +# Test the parsing expression grammar (PEG) directly. +class PegTest < MiniTest::Unit::TestCase + include ErbParser + + STRING_LITERALS = [ + '"foo (bar) \" baz"', + "'foo (bar) \\' baz'", + '%q(string (string) string)', + '%Q(string (string) string)', + '%(string (string) string)', + '%q{string {string} string}', + '%Q{string {string} string}', + '%{string {string} string}' + ] + + def test_string_literals + STRING_LITERALS.each do |literal| + result = TreetopRunner.run(literal, root: :string_literal) + assert_kind_of Treetop::Runtime::SyntaxNode, result + end + end + + def test_basic_tag + result = TreetopRunner.run 'Text 1 <% puts "hello world" %> Text 2' + + assert_equal :text, result.elements[0].type + assert_equal 'Text 1 ', result.elements[0].text_value + + assert_equal :erb_tag, result.elements[1].type + assert_equal '<% puts "hello world" %>', result.elements[1].text_value + assert_equal ' puts "hello world" ', result.elements[1].ruby_code + assert !result.elements[1].interpolated? + assert !result.elements[1].comment? + + assert_equal :text, result.elements[2].type + assert_equal ' Text 2', result.elements[2].text_value + end + + def test_interpolated_tag + result = TreetopRunner.run 'Text 1 <%= "hello world" %> Text 2' + + assert_equal :text, result.elements[0].type + assert_equal 'Text 1 ', result.elements[0].text_value + + assert_equal :erb_tag, result.elements[1].type + assert_equal '<%= "hello world" %>', result.elements[1].text_value + assert_equal ' "hello world" ', result.elements[1].ruby_code + assert result.elements[1].interpolated? + assert !result.elements[1].comment? + + assert_equal :text, result.elements[2].type + assert_equal ' Text 2', result.elements[2].text_value + end + + def test_comment_tag + result = TreetopRunner.run 'Text 1 <%# puts "hello world" %> Text 2' + + assert_equal :text, result.elements[0].type + assert_equal 'Text 1 ', result.elements[0].text_value + + assert_equal :erb_tag, result.elements[1].type + assert_equal '<%# puts "hello world" %>', result.elements[1].text_value + assert_equal ' puts "hello world" ', result.elements[1].ruby_code + assert !result.elements[1].interpolated? + assert result.elements[1].comment? + + assert_equal :text, result.elements[2].type + assert_equal ' Text 2', result.elements[2].text_value + end + + def test_tag_with_string_literal + STRING_LITERALS.each do |literal| + result = TreetopRunner.run("Text 1 <%= literal %> Text 2") + assert_kind_of Treetop::Runtime::SyntaxNode, result + end + end + + def test_tag_with_closing_tag_in_string_literal + result = TreetopRunner.run("Text 1 <%= %Q(Foo (bar) %> baz) %>") + + assert_equal :text, result.elements[0].type + assert_equal 'Text 1 ', result.elements[0].text_value + + assert_equal :erb_tag, result.elements[1].type + assert_equal '<%= %Q(Foo (bar) %> baz) %>', result.elements[1].text_value + assert_equal ' %Q(Foo (bar) %> baz) ', result.elements[1].ruby_code + assert result.elements[1].interpolated? + assert !result.elements[1].comment? + end +end \ No newline at end of file diff --git a/test/test_helper.rb b/test/test_helper.rb new file mode 100644 index 0000000..d219c93 --- /dev/null +++ b/test/test_helper.rb @@ -0,0 +1,6 @@ +require 'minitest/unit' +require 'turn/autorun' + +$:.unshift(File.join(File.expand_path(File.dirname(__FILE__)), '../lib')) + +require 'erb_parser' \ No newline at end of file diff --git a/test/xml_transformer_test.rb b/test/xml_transformer_test.rb new file mode 100644 index 0000000..953cf15 --- /dev/null +++ b/test/xml_transformer_test.rb @@ -0,0 +1,39 @@ +require 'test_helper' + +class XmlTransformerTest < MiniTest::Unit::TestCase + def test_replace_erb_tags_with_xml_elements + result = ErbParser.transform_xml( + '

Foo <%= "bar %>" & 1 %> baz. <% foo %> bar <%# baz %>.

' + ) + assert_equal( + '

Foo "bar %>" & 1 baz. ' + + ' foo bar baz .

', + result + ) + end + + def test_override_tag_attr + result = ErbParser.transform_xml '

Foo <% bar %> baz.', :tag => 'erb-tag' + assert_equal '

Foo bar baz.', result + end + + def test_override_interp_attr_to_false + result = ErbParser.transform_xml '

Foo <%= bar %> baz.', :interp_attr => false + assert_equal '

Foo bar baz.', result + end + + def test_override_interp_attr_to_key_value_pair + result = ErbParser.transform_xml '

Foo <%= bar %> baz.', :interp_attr => {'int' => 'yes'} + assert_equal '

Foo bar baz.', result + end + + def test_override_comment_attr_to_false + result = ErbParser.transform_xml '

Foo <%# bar %> baz.', :comment_attr => false + assert_equal '

Foo bar baz.', result + end + + def test_override_comment_attr_to_key_value_pair + result = ErbParser.transform_xml '

Foo <%# bar %> baz.', :comment_attr => {'comm' => 'yes'} + assert_equal '

Foo bar baz.', result + end +end \ No newline at end of file