Merge pull request #460 from LightGuard/switch_from_rexml_to_oga

Switching from REXML to Oga
awestruct · Apr 9, 2015 · 33415cb · 33415cb
2 parents 782bddd + 9e5e1b6
commit 33415cb
Show file tree

Hide file tree

Showing 3 changed files with 22 additions and 25 deletions.
diff --git a/awestruct.gemspec b/awestruct.gemspec
@@ -43,6 +43,8 @@ Haml and Markdown filters are touchy things. Redcarpet or Rdiscount work well if
   s.add_dependency 'git', '~> 1.2.6'
   s.add_dependency 'guard-livereload', '~> 2.1.2'
   s.add_dependency 'colorize', '~> 0.7.1'
+  s.add_dependency 'oga', '~> 0.3'
+
   s.add_dependency 'parallel', '> 1.1.1'
 
   s.add_development_dependency 'nokogiri', '~> 1.5.10'

diff --git a/lib/awestruct/context_helper.rb b/lib/awestruct/context_helper.rb
@@ -1,8 +1,7 @@
-require 'rexml/document'
+require 'oga'
 
 module Awestruct
   module ContextHelper
-    include REXML
 
     def html_to_text(str)
       str.gsub( /<[^>]+>/, '' ).gsub( /&nbsp;/, ' ' )
@@ -39,27 +38,23 @@ def summarize(text, numwords=20, ellipsis='...')
     end
 
     def fully_qualify_urls(base_url, text)
-      doc = Document.new text
-      doc.context[:attribute_quote] = :quote  # Set double-quote as the attribute value delimiter
+      doc = Oga.parse_html text
 
-      XPath.each(doc, "//a") do |a|
-        a.attributes['href'] = fix_url( base_url, a.attributes['href'] ) if a.attributes['href']
-      end
-
-      XPath.each(doc, "//link") do |link|
-        link.attributes['href'] = fix_url( base_url, link.attributes['href'] )
-      end
-
-      XPath.each(doc, "//img") do |img|
-        img.attributes['src'] = fix_url( base_url, img.attributes['src'] )
+      doc.each_node do |elem|
+        if (elem.is_a?(Oga::XML::Element) && elem.html?)
+          case elem.name
+          when 'a'
+            elem.set 'href', fix_url(base_url, elem.get('href')) if elem.get('href')
+          when 'link'
+            elem.set 'href', fix_url(base_url, elem.get('href')) if elem.get('href')
+          when 'img'
+            elem.set 'src', fix_url(base_url, elem.get('src')) if elem.get('src')
+          end
+        end
       end
 
-      if RUBY_VERSION.start_with? '1.8'
-        doc.to_s
-      else
-        doc.to_s.tap do |d| 
-          d.force_encoding(text.encoding) if d.encoding != text.encoding 
-        end 
+      doc.to_xml.tap do |d|
+        d.force_encoding(text.encoding) if d.encoding != text.encoding
       end
     end
 

diff --git a/spec/awestruct/context_helper_spec.rb b/spec/awestruct/context_helper_spec.rb
@@ -76,21 +76,21 @@ class Tester
   describe "fully_qualify_urls" do
     it "should fix anchor tags" do
       str = "<a href='/foo'>foobar</a>"
-      @tester.fully_qualify_urls('http://foobar.com', str).should == "<a href=\"http://foobar.com/foo\">foobar</a>"
+      @tester.fully_qualify_urls('http://foobar.com', str).should == %q(<a href="http://foobar.com/foo">foobar</a>)
     end
 
     it "should fix link tags" do
-      str = "<link href='/foo' />"
-      @tester.fully_qualify_urls('http://foobar.com', str).should == "<link href=\"http://foobar.com/foo\"/>"
+      str = "<link href='/foo'>"
+      @tester.fully_qualify_urls('http://foobar.com', str).should == %q(<link href="http://foobar.com/foo" />)
     end
 
     it "should fix image tags" do
       str = "<img src='/foo' />"
-      @tester.fully_qualify_urls('http://foobar.com', str).should == "<img src=\"http://foobar.com/foo\"/>"
+      @tester.fully_qualify_urls('http://foobar.com', str).should == %q(<img src="http://foobar.com/foo" />)
     end
 
     it "should leave anchor tags with no href attribute (for page anchors) unchanged" do
-      str = "<a target=\"#foo\">foobar</a>"
+      str = %q(<a target="#foo">foobar</a>)
       @tester.fully_qualify_urls('http://foobar.com', str).should == str
     end
   end