From 2890c9e89241d3a75023f2b85f68008d2b4964c1 Mon Sep 17 00:00:00 2001 From: Akinori MUSHA Date: Wed, 19 Oct 2016 05:14:33 +0900 Subject: [PATCH] Add a new Liquid filter `rebase_html` --- app/concerns/liquid_interpolatable.rb | 5 ++ lib/utils.rb | 89 +++++++++++++++++++++ spec/concerns/liquid_interpolatable_spec.rb | 38 +++++++++ 3 files changed, 132 insertions(+) diff --git a/app/concerns/liquid_interpolatable.rb b/app/concerns/liquid_interpolatable.rb index dde867a671..4f75092b40 100644 --- a/app/concerns/liquid_interpolatable.rb +++ b/app/concerns/liquid_interpolatable.rb @@ -189,6 +189,11 @@ def uri_expand(url, limit = 5) url end + # Rebase URIs contained in attributes in a given HTML fragment + def rebase_html(input, base_uri) + Utils.rebase_html(input, base_uri) rescue input + end + # Unescape (basic) HTML entities in a string # # This currently decodes the following entities only: "'", diff --git a/lib/utils.rb b/lib/utils.rb index c192cea276..cbdee4c4da 100644 --- a/lib/utils.rb +++ b/lib/utils.rb @@ -170,4 +170,93 @@ def self.if_present(string, method) nil end end + + module HTMLTransformer + SINGLE = 1 + MULTIPLE = 2 + COMMA_SEPARATED = 3 + SRCSET = 4 + + URI_ATTRIBUTES = { + 'a' => { 'href' => SINGLE }, + 'applet' => { 'archive' => COMMA_SEPARATED, 'codebase' => SINGLE }, + 'area' => { 'href' => SINGLE }, + 'audio' => { 'src' => SINGLE }, + 'base' => { 'href' => SINGLE }, + 'blockquote' => { 'cite' => SINGLE }, + 'body' => { 'background' => SINGLE }, + 'button' => { 'formaction' => SINGLE }, + 'command' => { 'icon' => SINGLE }, + 'del' => { 'cite' => SINGLE }, + 'embed' => { 'src' => SINGLE }, + 'form' => { 'action' => SINGLE }, + 'frame' => { 'longdesc' => SINGLE, 'src' => SINGLE }, + 'head' => { 'profile' => SINGLE }, + 'html' => { 'manifest' => SINGLE }, + 'iframe' => { 'longdesc' => SINGLE, 'src' => SINGLE }, + 'img' => { 'longdesc' => SINGLE, 'src' => SINGLE, 'srcset' => SRCSET, 'usemap' => SINGLE }, + 'input' => { 'formaction' => SINGLE, 'src' => SINGLE, 'usemap' => SINGLE }, + 'ins' => { 'cite' => SINGLE }, + 'link' => { 'href' => SINGLE }, + 'object' => { 'archive' => MULTIPLE, 'classid' => SINGLE, 'codebase' => SINGLE, 'data' => SINGLE, 'usemap' => SINGLE }, + 'q' => { 'cite' => SINGLE }, + 'script' => { 'src' => SINGLE }, + 'source' => { 'src' => SINGLE, 'srcset' => SRCSET }, + 'video' => { 'poster' => SINGLE, 'src' => SINGLE }, + } + + URI_ELEMENTS_XPATH = '//*[%s]' % URI_ATTRIBUTES.keys.map { |name| "name()='#{name}'" }.join(' or ') + + module_function + + def transform(html, &block) + block or raise ArgumentError, 'block must be given' + + case html + when /\A\s*(?:<\?xml[\s?]|]/i + # Libxml2 automatically adds DOCTYPE and , so we need to + # skip them. + element_name = $1 + doc = Nokogiri::HTML::Document.parse(html) + yield doc + doc.at_xpath("//#{element_name}").xpath('self::node() | following-sibling::node()').to_s + else + doc = Nokogiri::HTML::Document.parse("#{html}") + yield doc + doc.xpath("/html/body/node()").to_s + end + end + + def replace_uris(html, &block) + block or raise ArgumentError, 'block must be given' + + transform(html) { |doc| + doc.xpath(URI_ELEMENTS_XPATH).each { |element| + uri_attrs = URI_ATTRIBUTES[element.name] or next + uri_attrs.each { |name, format| + attr = element.attribute(name) or next + case format + when SINGLE + attr.value = block.call(attr.value.strip) + when MULTIPLE + attr.value = attr.value.gsub(/(\S+)/) { block.call($1) } + when COMMA_SEPARATED, SRCSET + attr.value = attr.value.gsub(/((?:\A|,)\s*)(\S+)/) { $1 + block.call($2) } + end + } + } + } + end + end + + def self.rebase_html(html, base_uri) + base_uri = normalize_uri(base_uri) + HTMLTransformer.replace_uris(html) { |url| + base_uri.merge(normalize_uri(url)).to_s + } + end end diff --git a/spec/concerns/liquid_interpolatable_spec.rb b/spec/concerns/liquid_interpolatable_spec.rb index cc7bbbf35f..adea241c1b 100644 --- a/spec/concerns/liquid_interpolatable_spec.rb +++ b/spec/concerns/liquid_interpolatable_spec.rb @@ -323,4 +323,42 @@ def ensure_safety(obj) end end end + + describe 'rebase_html' do + let(:agent) { Agents::InterpolatableAgent.new(name: "test") } + + let(:fragment) { < +
  • + file1 +
  • +
  • + file2 +
  • +
  • + file3 +
  • + +HTML + + let(:replaced_fragment) { < +
  • + file1 +
  • +
  • + file2 +
  • +
  • + file3 +
  • + +HTML + + it 'rebases relative URLs in a fragment' do + agent.interpolation_context['content'] = fragment + agent.options['template'] = "{{ content | rebase_html: 'http://example.com/support/files.html' }}" + expect(agent.interpolated['template']).to eq(replaced_fragment) + end + end end