Skip to content

Commit

Permalink
Merge pull request #460 from LightGuard/switch_from_rexml_to_oga
Browse files Browse the repository at this point in the history
Switching from REXML to Oga
  • Loading branch information
LightGuard committed Apr 9, 2015
2 parents 782bddd + 9e5e1b6 commit 33415cb
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 25 deletions.
2 changes: 2 additions & 0 deletions awestruct.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ Haml and Markdown filters are touchy things. Redcarpet or Rdiscount work well if
s.add_dependency 'git', '~> 1.2.6'
s.add_dependency 'guard-livereload', '~> 2.1.2'
s.add_dependency 'colorize', '~> 0.7.1'
s.add_dependency 'oga', '~> 0.3'

s.add_dependency 'parallel', '> 1.1.1'

s.add_development_dependency 'nokogiri', '~> 1.5.10'
Expand Down
35 changes: 15 additions & 20 deletions lib/awestruct/context_helper.rb
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
require 'rexml/document'
require 'oga'

module Awestruct
module ContextHelper
include REXML

def html_to_text(str)
str.gsub( /<[^>]+>/, '' ).gsub( /&nbsp;/, ' ' )
Expand Down Expand Up @@ -39,27 +38,23 @@ def summarize(text, numwords=20, ellipsis='...')
end

def fully_qualify_urls(base_url, text)
doc = Document.new text
doc.context[:attribute_quote] = :quote # Set double-quote as the attribute value delimiter
doc = Oga.parse_html text

XPath.each(doc, "//a") do |a|
a.attributes['href'] = fix_url( base_url, a.attributes['href'] ) if a.attributes['href']
end

XPath.each(doc, "//link") do |link|
link.attributes['href'] = fix_url( base_url, link.attributes['href'] )
end

XPath.each(doc, "//img") do |img|
img.attributes['src'] = fix_url( base_url, img.attributes['src'] )
doc.each_node do |elem|
if (elem.is_a?(Oga::XML::Element) && elem.html?)
case elem.name
when 'a'
elem.set 'href', fix_url(base_url, elem.get('href')) if elem.get('href')
when 'link'
elem.set 'href', fix_url(base_url, elem.get('href')) if elem.get('href')
when 'img'
elem.set 'src', fix_url(base_url, elem.get('src')) if elem.get('src')
end
end
end

if RUBY_VERSION.start_with? '1.8'
doc.to_s
else
doc.to_s.tap do |d|
d.force_encoding(text.encoding) if d.encoding != text.encoding
end
doc.to_xml.tap do |d|
d.force_encoding(text.encoding) if d.encoding != text.encoding
end
end

Expand Down
10 changes: 5 additions & 5 deletions spec/awestruct/context_helper_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -76,21 +76,21 @@ class Tester
describe "fully_qualify_urls" do
it "should fix anchor tags" do
str = "<a href='/foo'>foobar</a>"
@tester.fully_qualify_urls('http://foobar.com', str).should == "<a href=\"http://foobar.com/foo\">foobar</a>"
@tester.fully_qualify_urls('http://foobar.com', str).should == %q(<a href="http://foobar.com/foo">foobar</a>)
end

it "should fix link tags" do
str = "<link href='/foo' />"
@tester.fully_qualify_urls('http://foobar.com', str).should == "<link href=\"http://foobar.com/foo\"/>"
str = "<link href='/foo'>"
@tester.fully_qualify_urls('http://foobar.com', str).should == %q(<link href="http://foobar.com/foo" />)
end

it "should fix image tags" do
str = "<img src='/foo' />"
@tester.fully_qualify_urls('http://foobar.com', str).should == "<img src=\"http://foobar.com/foo\"/>"
@tester.fully_qualify_urls('http://foobar.com', str).should == %q(<img src="http://foobar.com/foo" />)
end

it "should leave anchor tags with no href attribute (for page anchors) unchanged" do
str = "<a target=\"#foo\">foobar</a>"
str = %q(<a target="#foo">foobar</a>)
@tester.fully_qualify_urls('http://foobar.com', str).should == str
end
end
Expand Down

0 comments on commit 33415cb

Please sign in to comment.