Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
tree: 2fc7ec051a
Fetching contributors…

Cannot retrieve contributors at this time

file 116 lines (97 sloc) 2.128 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
require "open-uri"
require "hpricot"
require "string"
class BatchImageParser
  def initialize(url_pattern, key, value_range, options={})
    @url_pattern = url_pattern
    @key = key
    @value_range = parse_values(value_range)
    @css_path = options[:css_path]
  end
  
  def images
    image_parsers.map{|e|e.images}.flatten
  end
  
  def parse_values(value)
    return nil if value.blank?
    value.split(",").map{|e|
      if (e.split("~").size>1)
        start , endd = e.split("~")
        if start=~/^\d+$/
          start = Integer(start)
          endd = Integer(endd)
        end
        Range.new(start, endd).to_a
      else
        e
      end
    }.flatten
  end
  
  def title
    image_parsers.first.title.to_utf8
  end
  
  def image_parsers
    @image_parsers ||= urls.map{|e|ImageParser.new(e, :css_path => @css_path)}
  end
  
  private
  def urls
    @urls||=begin
      @value_range.nil? ? [@url_pattern] : @value_range.map{|e| apply_url_pattern(e) }
    end
  end
  
  def apply_url_pattern(value)
    @url_pattern.gsub(/\{\w+\}/,value.to_s)
  end
  
end

class ImageParser
  attr_reader :doc, :url
  def initialize(url, options={})
    @url = url
    @doc = Hpricot(get_url(url))
    @css_path = options[:css_path]||"img"
  end
  
  def title
    @doc.at("title").inner_text
  end
  
  
  def images
    @images||=@doc.search(@css_path).map{|e|ImageElement.new(e,:host=>host)}.sort
  end
  
  def host
    uri = URI.parse(url)
    port = ""
    if uri.port && uri.port!=80
      port = ":#{uri.port}"
    end
    "#{uri.scheme}://#{uri.host}#{port}"
  end
  
  def get_url(url)
    begin
      open(url)
    rescue Exception => e
      raise "Error when parsing #{url}, #{e.message}"
    end
  end
  
end

class ImageElement
  attr_reader :host
  def initialize(ele, options={})
    @ele = ele
    @host = options[:host]
  end
  
  def <=>(other)
    self.size_factor <=> other.size_factor
  end
  
  def src
    s = self[:src]
    if s=~/^http/
      s
    else
      host+s
    end
  end
  
  def [](key)
    @ele[key]
  end
  
  def size_factor
    self[:width].to_i+self[:height].to_i
  end
end
Something went wrong with that request. Please try again.