From 4e60f7ebb59127e9d2668b969416133fca38b5b3 Mon Sep 17 00:00:00 2001
From: Jarrett Colby <jarrettcolby@gmail.com>
Date: Wed, 26 Feb 2014 13:17:02 -0500
Subject: [PATCH] First commit

---
 .gitignore                         | 56 ++++++++++++++++++
 Rakefile                           | 24 ++++++++
 erb_parser.gemspec                 | 16 ++++++
 lib/erb_parser.rb                  | 33 +++++++++++
 lib/erb_parser/erb_grammar.treetop | 47 +++++++++++++++
 lib/erb_parser/erb_tag.rb          | 23 ++++++++
 lib/erb_parser/nodes.rb            | 27 +++++++++
 lib/erb_parser/parsed_erb.rb       | 29 ++++++++++
 lib/erb_parser/treetop_runner.rb   | 18 ++++++
 lib/erb_parser/xml_transformer.rb  | 52 +++++++++++++++++
 readme.md                          | 37 ++++++++++++
 test/api_test.rb                   | 57 ++++++++++++++++++
 test/peg_test.rb                   | 92 ++++++++++++++++++++++++++++++
 test/test_helper.rb                |  6 ++
 test/xml_transformer_test.rb       | 39 +++++++++++++
 15 files changed, 556 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 Rakefile
 create mode 100644 erb_parser.gemspec
 create mode 100644 lib/erb_parser.rb
 create mode 100644 lib/erb_parser/erb_grammar.treetop
 create mode 100644 lib/erb_parser/erb_tag.rb
 create mode 100644 lib/erb_parser/nodes.rb
 create mode 100644 lib/erb_parser/parsed_erb.rb
 create mode 100644 lib/erb_parser/treetop_runner.rb
 create mode 100644 lib/erb_parser/xml_transformer.rb
 create mode 100644 readme.md
 create mode 100644 test/api_test.rb
 create mode 100644 test/peg_test.rb
 create mode 100644 test/test_helper.rb
 create mode 100644 test/xml_transformer_test.rb

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c562527
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,56 @@
+
+#********** osx template********** 
+
+.DS_Store
+
+# Thumbnails
+._*
+
+# Files that might appear on external disk
+.Spotlight-V100
+.Trashes
+ 
+
+#********** linux template********** 
+
+.*
+!.gitignore
+*~
+
+# KDE
+.directory
+ 
+
+#********** windows template********** 
+
+# Windows image file caches
+Thumbs.db 
+
+# Folder config file
+Desktop.ini
+
+# Recycle Bin used on file shares
+$RECYCLE.BIN/
+ 
+
+#********** ruby template********** 
+
+*.gem
+*.rbc
+.bundle
+.config
+coverage
+InstalledFiles
+lib/bundler/man
+pkg
+rdoc
+spec/reports
+test/tmp
+test/version_tmp
+tmp
+
+# YARD artifacts
+.yardoc
+_yardoc
+doc/
+ 
diff --git a/Rakefile b/Rakefile
new file mode 100644
index 0000000..653335a
--- /dev/null
+++ b/Rakefile
@@ -0,0 +1,24 @@
+require 'rake/testtask'
+
+# To run one test: rake test TEST=just_one_file.rb
+Rake::TestTask.new do |t|
+  t.test_files = FileList['test/*_test.rb']
+  t.libs << 'test'
+end
+
+def built_gem_name
+  Dir.glob('erb_parser-*.*.*.gem').first
+end
+
+task :build do
+  `rm *.gem`
+  puts `gem build erb_parser.gemspec`
+end
+
+task :install do
+  puts `gem install #{built_gem_name}`
+end
+
+task :release do
+  puts `gem push #{built_gem_name}`
+end
\ No newline at end of file
diff --git a/erb_parser.gemspec b/erb_parser.gemspec
new file mode 100644
index 0000000..e62610f
--- /dev/null
+++ b/erb_parser.gemspec
@@ -0,0 +1,16 @@
+Gem::Specification.new do |s|
+  s.name         = 'erb_parser'
+  s.version      = '0.0.0'
+  s.date         = '2014-02-26'
+  s.summary      = 'Parser for ERB templates'
+  s.description  = 'Parses ERB templates into two types of tokens: Plain text and ERB tags. Special support for HTML/XML.'
+  s.authors      = ['Jarrett Colby']
+  s.email        = 'jarrett@madebyhq.com'
+  s.files        = Dir.glob('lib/**/*')
+  s.homepage     = 'http://madebyhq.com/'
+  
+  s.add_runtime_dependency 'treetop'
+  
+  s.add_development_dependency 'minitest'
+  s.add_development_dependency 'turn'
+end
\ No newline at end of file
diff --git a/lib/erb_parser.rb b/lib/erb_parser.rb
new file mode 100644
index 0000000..adfc10f
--- /dev/null
+++ b/lib/erb_parser.rb
@@ -0,0 +1,33 @@
+require 'treetop'
+require 'erb_parser/nodes'
+require 'erb_parser/treetop_runner'
+require 'erb_parser/parsed_erb'
+require 'erb_parser/erb_tag'
+require 'erb_parser/xml_transformer'
+
+module ErbParser
+  def self.parse(str)
+    ParsedErb.new TreetopRunner.run(str)
+  end
+  
+  # Takes a string representing an XML document or fragment. Finds every ERB tag in the
+  # XML and replaces it with the tag <erb>. The contents of the replacement tag will be
+  # the inner Ruby code, escaped for XML. You can override the tag like so:
+  # 
+  #    ErbParser.transform_xml str, :tag => 'tag-name'
+  # 
+  # If the ERB tag is of the form +<%=+, the attribute +interpolated="true"+ will be
+  # added. Else if the ERB tag is of the form +<#+, the attribute +comment="true"+ will be
+  # added. You can override this behavior like so:
+  #
+  #    ErbParser.transform_xml str, :interp_attr => {'attr-name' => 'attr-value'}
+  #    ErbParser.transform_xml str, :interp_attr => false
+  #    
+  #    ErbParser.transform_xml str, :comment_attr => {'attr-name' => 'attr-value'}
+  #    ErbParser.transform_xml str, :comment_attr => false
+  # 
+  # The returned value is a string representing the transformed XML document or fragment.
+  def self.transform_xml(str, options = {})
+    XmlTransformer.transform(parse(str), options)   
+  end
+end
\ No newline at end of file
diff --git a/lib/erb_parser/erb_grammar.treetop b/lib/erb_parser/erb_grammar.treetop
new file mode 100644
index 0000000..8a2aec7
--- /dev/null
+++ b/lib/erb_parser/erb_grammar.treetop
@@ -0,0 +1,47 @@
+module ErbParser
+  grammar ErbGrammar
+    rule document
+      (erb_tag / text)*
+    end
+    
+    rule text
+      (!'<%' .)+
+      <Text>
+    end
+    
+    rule erb_tag
+      '<%'
+      number_sign:'#'? equal_sign:'='?
+      _ruby_code:ruby_code
+      '%>'
+      <ErbTag>
+    end
+    
+    rule ruby_code
+      (string_literal / (!'%>' .))*
+    end
+
+    # Matches the following quote styles:
+    # "string"
+    # 'string'
+    # %q(string (string) string)
+    # %Q(string (string) string)
+    # %(string (string) string)
+    # %q{string {string} string}
+    # %Q{string {string} string}
+    # %{string {string} string}
+    rule string_literal
+      ('"' ('\"' / !'"' .)* '"') /
+      ('\'' ('\\\'' / !'\'' .)* '\'') /
+      ('%' ('q' / 'Q')? (curly_brackets / parens))
+    end
+    
+    rule curly_brackets
+      '{' (curly_brackets / '\}' / !'}' .)* '}'
+    end
+    
+    rule parens
+      '(' (parens         / '\)' / !')' .)* ')'
+    end
+  end
+end
\ No newline at end of file
diff --git a/lib/erb_parser/erb_tag.rb b/lib/erb_parser/erb_tag.rb
new file mode 100644
index 0000000..c34eb19
--- /dev/null
+++ b/lib/erb_parser/erb_tag.rb
@@ -0,0 +1,23 @@
+module ErbParser
+  class ErbTag
+    def comment?
+      @treetop_node.comment?
+    end
+    
+    def initialize(treetop_node)
+      @treetop_node = treetop_node
+    end
+    
+    def interpolated?
+      @treetop_node.interpolated?
+    end
+    
+    def ruby_code
+      @treetop_node.ruby_code
+    end
+    
+    def to_s
+      @treetop_node.text_value
+    end
+  end
+end
\ No newline at end of file
diff --git a/lib/erb_parser/nodes.rb b/lib/erb_parser/nodes.rb
new file mode 100644
index 0000000..343c8d7
--- /dev/null
+++ b/lib/erb_parser/nodes.rb
@@ -0,0 +1,27 @@
+module ErbParser
+  module ErbGrammar
+    module Text
+      def type
+        :text
+      end
+    end
+    
+    module ErbTag
+      def comment?
+        !number_sign.empty?
+      end
+      
+      def interpolated?
+        !equal_sign.empty?
+      end
+      
+      def ruby_code
+        _ruby_code.text_value
+      end
+      
+      def type
+        :erb_tag
+      end
+    end
+  end
+end
\ No newline at end of file
diff --git a/lib/erb_parser/parsed_erb.rb b/lib/erb_parser/parsed_erb.rb
new file mode 100644
index 0000000..3d5c9f2
--- /dev/null
+++ b/lib/erb_parser/parsed_erb.rb
@@ -0,0 +1,29 @@
+module ErbParser
+  class ParsedErb
+    # Accesses the parsed tokens as an array. Each element of the array is either a
+    # String, representing plain text, or an ErbTag.
+    def [](index)
+      @tokens[index]
+    end
+    
+    def initialize(treetop_ast)
+      @treetop_ast = treetop_ast
+      @tokens = treetop_ast.elements.map do |elem|
+        case elem.type
+        when :text
+          elem.text_value
+        when :erb_tag
+          ErbTag.new elem
+        else
+          raise "Unexpected type: #{elem.type}"
+        end
+      end
+    end
+    
+    # Returns the array of parsed tokens.
+    attr_reader :tokens
+    
+    # Returns the raw Treetop AST.
+    attr_reader :treetop_ast
+  end
+end
\ No newline at end of file
diff --git a/lib/erb_parser/treetop_runner.rb b/lib/erb_parser/treetop_runner.rb
new file mode 100644
index 0000000..cb4b2d0
--- /dev/null
+++ b/lib/erb_parser/treetop_runner.rb
@@ -0,0 +1,18 @@
+Treetop.load File.join(File.dirname(__FILE__), 'erb_grammar')
+
+module ErbParser
+  # This module doesn't do much. It just provides some boilerplate code to invoke Treetop.
+  # The result is whatever Treetop returns.
+  module TreetopRunner
+    def self.run(str, options = {})
+      treetop = ErbGrammarParser.new
+      if result = treetop.parse(str, options)
+        result
+      else
+        raise ParseError, treetop.failure_reason
+      end
+    end
+    
+    class ParseError < RuntimeError; end
+  end
+end
\ No newline at end of file
diff --git a/lib/erb_parser/xml_transformer.rb b/lib/erb_parser/xml_transformer.rb
new file mode 100644
index 0000000..cbc3567
--- /dev/null
+++ b/lib/erb_parser/xml_transformer.rb
@@ -0,0 +1,52 @@
+require 'cgi'
+
+module ErbParser
+  module XmlTransformer
+    def self.transform(parsed_erb, options)
+      options = {
+        :tag          => 'erb',
+        :interp_attr  => {'interpolated' => 'true'},
+        :comment_attr => {'comment'      => 'true'}
+      }.merge(options)
+      
+      parsed_erb.tokens.map do |elem|
+        case elem
+        when String
+          elem
+        when ErbTag
+          if elem.interpolated?
+            if options[:interp_attr].is_a?(Hash)
+              attrs = options[:interp_attr]
+            else
+              attrs = {}
+            end
+          elsif elem.comment?
+            if options[:comment_attr].is_a?(Hash)
+              attrs = options[:comment_attr]
+            else
+              attrs = {}
+            end
+          else
+            attrs = {}
+          end
+          content_tag options[:tag], CGI.escape_html(elem.ruby_code), attrs
+        else
+          raise "Unexpected element: #{elem.class.name}"
+        end
+      end.join
+    end
+  
+    def self.content_tag(name, contents, attrs = {})
+      if attrs.empty?
+        attrs_str = ''
+      else
+        attrs_str = ' ' + attrs.map do |key, val|
+          key = CGI.escape_html(key.to_s)
+          val = CGI.escape_html(val.to_s)
+          %Q(#{key}="#{val}")
+        end.join(' ')
+      end
+      '<' + name.to_s + attrs_str + '>' + contents.to_s + '</' + name.to_s + '>'
+    end
+  end
+end
\ No newline at end of file
diff --git a/readme.md b/readme.md
new file mode 100644
index 0000000..ec46188
--- /dev/null
+++ b/readme.md
@@ -0,0 +1,37 @@
+## Can ErbParser handle all valid Ruby code?
+
+No it cannot. Ruby has a very complex syntax. In a library like this, it would be a fool's
+errand to try to handle every weird syntactic construct that could technically be
+considered valid Ruby. Instead, this library is designed to handle only the constructs
+that would commonly appear inside ERB tags. In other words, the basics of the language.
+
+Just avoid exotic syntactic constructs, and you should be fine. (You shouldn't do anything
+syntactically fancy in an ERB template anyway--it's bad coding style.) In particular, you
+must avoid Ruby's weirder string literals, such as the following:
+
+    %q!This is a valid string literal, but you must not use this syntax.!
+
+Also be wary of tricky escape sequences. If you absolutely must use unusual syntax, and it
+breaks ErbParser, consider moving the offending code into a class or module external to
+the ERB template.
+
+Nonetheless, the library *does* account for and allow the following string literal
+formats:
+
+    "string"
+    'string'
+    %q(string (string) string)
+    %Q(string (string) string)
+    %(string (string) string)
+    %q{string {string} string}
+    %Q{string {string} string}
+    %{string {string} string}
+
+This parser is *not* hardened against malicious input. But then, you shouldn't be
+accepting ERB as untrusted input anyway, because ERB allows arbitrary code execution.
+
+## What does ErbParser do with invalid ERB or Ruby code?
+
+If you pass code containing a syntax error, the parsing behavior is undefined. You may get
+an exception, or you may just get nonsensical results. It depends on the type of the
+syntax error.
\ No newline at end of file
diff --git a/test/api_test.rb b/test/api_test.rb
new file mode 100644
index 0000000..e2dc3d2
--- /dev/null
+++ b/test/api_test.rb
@@ -0,0 +1,57 @@
+require 'test_helper'
+
+# Test the gem's public API.
+class ApiTest < MiniTest::Unit::TestCase
+  def test_complex_document
+    str = %Q(
+      <p>The time is <%= Time.now.strftime("%m %d %Y") %>.</p>
+      
+      <% 5.times do |i| %>
+        <p><%= %Q{A string with {nested} brackets and a closing tag %>. } * i %></p>
+      <% end %> 
+      
+      <%# puts "This is a comment." %>
+    )
+    result = ErbParser.parse(str)
+
+    assert_equal '<p>The time is', result[0].strip
+    
+    assert_kind_of ErbParser::ErbTag, result[1]
+    assert_equal '<%= Time.now.strftime("%m %d %Y") %>', result[1].to_s
+    assert_equal ' Time.now.strftime("%m %d %Y") ', result[1].ruby_code
+    assert result[1].interpolated?
+    assert !result[1].comment?
+    
+    assert_equal '.</p>', result[2].strip
+    
+    assert_kind_of ErbParser::ErbTag, result[3]
+    assert_equal '<% 5.times do |i| %>', result[3].to_s
+    assert_equal ' 5.times do |i| ', result[3].ruby_code
+    assert !result[3].interpolated?
+    assert !result[3].comment?
+    
+    assert_equal '<p>', result[4].strip
+    
+    assert_kind_of ErbParser::ErbTag, result[5]
+    assert_equal '<%= %Q{A string with {nested} brackets and a closing tag %>. } * i %>', result[5].to_s
+    assert_equal ' %Q{A string with {nested} brackets and a closing tag %>. } * i ', result[5].ruby_code
+    assert result[5].interpolated?
+    assert !result[5].comment?
+    
+    assert_equal '</p>', result[6].strip
+    
+    assert_kind_of ErbParser::ErbTag, result[7]
+    assert_equal '<% end %>', result[7].to_s
+    assert_equal ' end ', result[7].ruby_code
+    assert !result[7].interpolated?
+    assert !result[7].comment?
+    
+    assert_equal '', result[8].strip
+    
+    assert_kind_of ErbParser::ErbTag, result[9]
+    assert_equal '<%# puts "This is a comment." %>', result[9].to_s
+    assert_equal ' puts "This is a comment." ', result[9].ruby_code
+    assert !result[9].interpolated?
+    assert result[9].comment?
+  end
+end
\ No newline at end of file
diff --git a/test/peg_test.rb b/test/peg_test.rb
new file mode 100644
index 0000000..05aaf2a
--- /dev/null
+++ b/test/peg_test.rb
@@ -0,0 +1,92 @@
+require 'test_helper'
+
+# Test the parsing expression grammar (PEG) directly.
+class PegTest < MiniTest::Unit::TestCase
+  include ErbParser
+  
+  STRING_LITERALS = [
+      '"foo (bar) \" baz"',
+      "'foo (bar) \\' baz'",
+      '%q(string (string) string)',
+      '%Q(string (string) string)',
+      '%(string (string) string)',
+      '%q{string {string} string}',
+      '%Q{string {string} string}',
+      '%{string {string} string}'
+    ]
+  
+  def test_string_literals
+    STRING_LITERALS.each do |literal|
+      result = TreetopRunner.run(literal, root: :string_literal)
+      assert_kind_of Treetop::Runtime::SyntaxNode, result
+    end
+  end
+  
+  def test_basic_tag
+    result = TreetopRunner.run 'Text 1 <% puts "hello world" %> Text 2'
+    
+    assert_equal :text, result.elements[0].type
+    assert_equal 'Text 1 ', result.elements[0].text_value
+    
+    assert_equal :erb_tag, result.elements[1].type
+    assert_equal '<% puts "hello world" %>', result.elements[1].text_value
+    assert_equal ' puts "hello world" ', result.elements[1].ruby_code
+    assert !result.elements[1].interpolated?
+    assert !result.elements[1].comment?
+    
+    assert_equal :text, result.elements[2].type
+    assert_equal ' Text 2', result.elements[2].text_value
+  end
+  
+  def test_interpolated_tag
+    result = TreetopRunner.run 'Text 1 <%= "hello world" %> Text 2'
+    
+    assert_equal :text, result.elements[0].type
+    assert_equal 'Text 1 ', result.elements[0].text_value
+    
+    assert_equal :erb_tag, result.elements[1].type
+    assert_equal '<%= "hello world" %>', result.elements[1].text_value
+    assert_equal ' "hello world" ', result.elements[1].ruby_code
+    assert result.elements[1].interpolated?
+    assert !result.elements[1].comment?
+    
+    assert_equal :text, result.elements[2].type
+    assert_equal ' Text 2', result.elements[2].text_value
+  end
+  
+  def test_comment_tag
+    result = TreetopRunner.run 'Text 1 <%# puts "hello world" %> Text 2'
+    
+    assert_equal :text, result.elements[0].type
+    assert_equal 'Text 1 ', result.elements[0].text_value
+    
+    assert_equal :erb_tag, result.elements[1].type
+    assert_equal '<%# puts "hello world" %>', result.elements[1].text_value
+    assert_equal ' puts "hello world" ', result.elements[1].ruby_code
+    assert !result.elements[1].interpolated?
+    assert result.elements[1].comment?
+    
+    assert_equal :text, result.elements[2].type
+    assert_equal ' Text 2', result.elements[2].text_value
+  end
+  
+  def test_tag_with_string_literal
+    STRING_LITERALS.each do |literal|
+      result = TreetopRunner.run("Text 1 <%= literal %> Text 2")
+      assert_kind_of Treetop::Runtime::SyntaxNode, result
+    end
+  end
+  
+  def test_tag_with_closing_tag_in_string_literal
+    result = TreetopRunner.run("Text 1 <%= %Q(Foo (bar) %> baz) %>")
+    
+    assert_equal :text, result.elements[0].type
+    assert_equal 'Text 1 ', result.elements[0].text_value
+    
+    assert_equal :erb_tag, result.elements[1].type
+    assert_equal '<%= %Q(Foo (bar) %> baz) %>', result.elements[1].text_value
+    assert_equal ' %Q(Foo (bar) %> baz) ', result.elements[1].ruby_code
+    assert result.elements[1].interpolated?
+    assert !result.elements[1].comment?
+  end
+end
\ No newline at end of file
diff --git a/test/test_helper.rb b/test/test_helper.rb
new file mode 100644
index 0000000..d219c93
--- /dev/null
+++ b/test/test_helper.rb
@@ -0,0 +1,6 @@
+require 'minitest/unit'
+require 'turn/autorun'
+
+$:.unshift(File.join(File.expand_path(File.dirname(__FILE__)), '../lib'))
+
+require 'erb_parser'
\ No newline at end of file
diff --git a/test/xml_transformer_test.rb b/test/xml_transformer_test.rb
new file mode 100644
index 0000000..953cf15
--- /dev/null
+++ b/test/xml_transformer_test.rb
@@ -0,0 +1,39 @@
+require 'test_helper'
+
+class XmlTransformerTest < MiniTest::Unit::TestCase
+  def test_replace_erb_tags_with_xml_elements
+    result = ErbParser.transform_xml(
+      '<p>Foo <%= "bar %>" & 1 %> baz. <% foo %> bar <%# baz %>.</p>'
+    )
+    assert_equal(
+      '<p>Foo <erb interpolated="true"> &quot;bar %&gt;&quot; &amp; 1 </erb> baz. ' +
+      '<erb> foo </erb> bar <erb comment="true"> baz </erb>.</p>',
+      result
+    )
+  end
+  
+  def test_override_tag_attr
+    result = ErbParser.transform_xml '<p>Foo <% bar %> baz.', :tag => 'erb-tag'
+    assert_equal '<p>Foo <erb-tag> bar </erb-tag> baz.', result
+  end
+  
+  def test_override_interp_attr_to_false
+    result = ErbParser.transform_xml '<p>Foo <%= bar %> baz.', :interp_attr => false
+    assert_equal '<p>Foo <erb> bar </erb> baz.', result
+  end
+  
+  def test_override_interp_attr_to_key_value_pair
+    result = ErbParser.transform_xml '<p>Foo <%= bar %> baz.', :interp_attr => {'int' => 'yes'}
+    assert_equal '<p>Foo <erb int="yes"> bar </erb> baz.', result
+  end
+  
+  def test_override_comment_attr_to_false
+    result = ErbParser.transform_xml '<p>Foo <%# bar %> baz.', :comment_attr => false
+    assert_equal '<p>Foo <erb> bar </erb> baz.', result
+  end
+  
+  def test_override_comment_attr_to_key_value_pair
+    result = ErbParser.transform_xml '<p>Foo <%# bar %> baz.', :comment_attr => {'comm' => 'yes'}
+    assert_equal '<p>Foo <erb comm="yes"> bar </erb> baz.', result
+  end
+end
\ No newline at end of file