Skip to content

Commit

Permalink
Add a new Liquid filter rebase_html
Browse files Browse the repository at this point in the history
  • Loading branch information
knu committed Oct 18, 2016
1 parent 3099405 commit 2890c9e
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 0 deletions.
5 changes: 5 additions & 0 deletions app/concerns/liquid_interpolatable.rb
Expand Up @@ -189,6 +189,11 @@ def uri_expand(url, limit = 5)
url
end

# Rebase URIs contained in attributes in a given HTML fragment
def rebase_html(input, base_uri)
Utils.rebase_html(input, base_uri) rescue input
end

# Unescape (basic) HTML entities in a string
#
# This currently decodes the following entities only: "'",
Expand Down
89 changes: 89 additions & 0 deletions lib/utils.rb
Expand Up @@ -170,4 +170,93 @@ def self.if_present(string, method)
nil
end
end

module HTMLTransformer
SINGLE = 1
MULTIPLE = 2
COMMA_SEPARATED = 3
SRCSET = 4

URI_ATTRIBUTES = {
'a' => { 'href' => SINGLE },
'applet' => { 'archive' => COMMA_SEPARATED, 'codebase' => SINGLE },
'area' => { 'href' => SINGLE },
'audio' => { 'src' => SINGLE },
'base' => { 'href' => SINGLE },
'blockquote' => { 'cite' => SINGLE },
'body' => { 'background' => SINGLE },
'button' => { 'formaction' => SINGLE },
'command' => { 'icon' => SINGLE },
'del' => { 'cite' => SINGLE },
'embed' => { 'src' => SINGLE },
'form' => { 'action' => SINGLE },
'frame' => { 'longdesc' => SINGLE, 'src' => SINGLE },
'head' => { 'profile' => SINGLE },
'html' => { 'manifest' => SINGLE },
'iframe' => { 'longdesc' => SINGLE, 'src' => SINGLE },
'img' => { 'longdesc' => SINGLE, 'src' => SINGLE, 'srcset' => SRCSET, 'usemap' => SINGLE },
'input' => { 'formaction' => SINGLE, 'src' => SINGLE, 'usemap' => SINGLE },
'ins' => { 'cite' => SINGLE },
'link' => { 'href' => SINGLE },
'object' => { 'archive' => MULTIPLE, 'classid' => SINGLE, 'codebase' => SINGLE, 'data' => SINGLE, 'usemap' => SINGLE },
'q' => { 'cite' => SINGLE },
'script' => { 'src' => SINGLE },
'source' => { 'src' => SINGLE, 'srcset' => SRCSET },
'video' => { 'poster' => SINGLE, 'src' => SINGLE },
}

URI_ELEMENTS_XPATH = '//*[%s]' % URI_ATTRIBUTES.keys.map { |name| "name()='#{name}'" }.join(' or ')

module_function

def transform(html, &block)
block or raise ArgumentError, 'block must be given'

case html
when /\A\s*(?:<\?xml[\s?]|<!DOCTYPE\s)/i
doc = Nokogiri.parse(html)
yield doc
doc.to_s
when /\A\s*<(html|head|body)[\s>]/i
# Libxml2 automatically adds DOCTYPE and <html>, so we need to
# skip them.
element_name = $1
doc = Nokogiri::HTML::Document.parse(html)
yield doc
doc.at_xpath("//#{element_name}").xpath('self::node() | following-sibling::node()').to_s
else
doc = Nokogiri::HTML::Document.parse("<html><body>#{html}")
yield doc
doc.xpath("/html/body/node()").to_s
end
end

def replace_uris(html, &block)
block or raise ArgumentError, 'block must be given'

transform(html) { |doc|
doc.xpath(URI_ELEMENTS_XPATH).each { |element|
uri_attrs = URI_ATTRIBUTES[element.name] or next
uri_attrs.each { |name, format|
attr = element.attribute(name) or next
case format
when SINGLE
attr.value = block.call(attr.value.strip)
when MULTIPLE
attr.value = attr.value.gsub(/(\S+)/) { block.call($1) }
when COMMA_SEPARATED, SRCSET
attr.value = attr.value.gsub(/((?:\A|,)\s*)(\S+)/) { $1 + block.call($2) }
end
}
}
}
end
end

def self.rebase_html(html, base_uri)
base_uri = normalize_uri(base_uri)
HTMLTransformer.replace_uris(html) { |url|
base_uri.merge(normalize_uri(url)).to_s
}
end
end
38 changes: 38 additions & 0 deletions spec/concerns/liquid_interpolatable_spec.rb
Expand Up @@ -323,4 +323,42 @@ def ensure_safety(obj)
end
end
end

describe 'rebase_html' do
let(:agent) { Agents::InterpolatableAgent.new(name: "test") }

let(:fragment) { <<HTML }
<ul>
<li>
<a href="downloads/file1"><img src="/images/iconA.png" srcset="/images/iconA.png 1x, /images/iconA@2x.png 2x">file1</a>
</li>
<li>
<a href="downloads/file2"><img src="/images/iconA.png" srcset="/images/iconA.png 1x, /images/iconA@2x.png 2x">file2</a>
</li>
<li>
<a href="downloads/file3"><img src="/images/iconB.png" srcset="/images/iconB.png 1x, /images/iconB@2x.png 2x">file3</a>
</li>
</ul>
HTML

let(:replaced_fragment) { <<HTML }
<ul>
<li>
<a href="http://example.com/support/downloads/file1"><img src="http://example.com/images/iconA.png" srcset="http://example.com/images/iconA.png 1x, http://example.com/images/iconA@2x.png 2x">file1</a>
</li>
<li>
<a href="http://example.com/support/downloads/file2"><img src="http://example.com/images/iconA.png" srcset="http://example.com/images/iconA.png 1x, http://example.com/images/iconA@2x.png 2x">file2</a>
</li>
<li>
<a href="http://example.com/support/downloads/file3"><img src="http://example.com/images/iconB.png" srcset="http://example.com/images/iconB.png 1x, http://example.com/images/iconB@2x.png 2x">file3</a>
</li>
</ul>
HTML

it 'rebases relative URLs in a fragment' do
agent.interpolation_context['content'] = fragment
agent.options['template'] = "{{ content | rebase_html: 'http://example.com/support/files.html' }}"
expect(agent.interpolated['template']).to eq(replaced_fragment)
end
end
end

0 comments on commit 2890c9e

Please sign in to comment.