Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
branch: master
Fetching contributors…

Octocat-spinner-32-eaf2f5

Cannot retrieve contributors at this time

file 298 lines (230 sloc) 7.503 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297
require 'uri'
require 'fileutils'

require 'big_sitemap/builder'

class BigSitemap
  DEFAULTS = {
    :max_per_sitemap => Builder::MAX_URLS,
    :document_path => '/',
    :gzip => true,

    # Opinionated
    :ping_google => true,
    :ping_yahoo => false, # needs :yahoo_app_id
    :ping_bing => false,
    :ping_ask => false,
    :ping_yandex => false
  }

  class << self
    def generate(options={}, &block)
      @sitemap = self.new(options)

      @sitemap.first_id_of_last_sitemap = first_id_of_last_sitemap

      instance_eval(&block)

      @sitemap.with_lock do
        @sitemap.generate(options)
      end
    end

    private

    def first_id_of_last_sitemap
      Dir["#{@sitemap.document_full}sitemap*.{xml,xml.gz}"].map do |file|
        file.to_s.scan(/sitemap_(.+).xml/).flatten.last.to_i
      end.sort.last
    end

    def add(path, options={})
      @sitemap.add_path(path, options)
    end
  end

  def initialize(options={})
    @options = DEFAULTS.merge options

    if @options[:max_per_sitemap] <= 1
      raise ArgumentError, '":max_per_sitemap" must be greater than 1'
    end

    if @options[:url_options] && !@options[:base_url]
      @options[:base_url] = URI::Generic.build( {:scheme => "http"}.merge(@options.delete(:url_options)) ).to_s
    end

    unless @options[:base_url]
      raise ArgumentError, 'you must specify either ":url_options" hash or ":base_url" string'
    end
    @options[:url_path] ||= @options[:document_path]

    unless @options[:document_root]
      raise ArgumentError, 'Document root must be specified with the ":document_root" option"'
    end

    @options[:document_full] ||= File.join(@options[:document_root], @options[:document_path])
    unless @options[:document_full]
      raise ArgumentError, 'Document root must be specified with the ":document_root" option, the full path with ":document_full"'
    end

    Dir.mkdir(@options[:document_full]) unless File.exists?(@options[:document_full])

    @sources = []
    @models = []
    @sitemap_files = []
  end

  def first_id_of_last_sitemap
    @first_id_of_last_sitemap
  end

  def first_id_of_last_sitemap=(first_id)
    @first_id_of_last_sitemap = first_id
  end

  def document_full
    @options[:document_full]
  end

  def add(model, options={})
    warn 'BigSitemap#add is deprecated. Please use BigSitemap.generate and call add inside the block (in BigSitemap 1.0.0+). You will have to perform the find and generate the path for each record yourself.'
    @models << model

    filename_suffix = @models.count(model) - 1

    options[:path] ||= table_name(model)
    options[:filename] ||= file_name(model)
    options[:primary_column] ||= 'id' if model.new.respond_to?('id')
    options[:partial_update] = @options[:partial_update] && options[:partial_update] != false

    options[:filename] << "_#{filename_suffix}" unless filename_suffix == 0

    @sources << [model, options.dup]

    self
  end

  def add_path(path, options)
    @paths ||= []
    @paths << [path, options]
    self
  end

  def add_static(url, time = nil, frequency = nil, priority = nil)
    warn 'BigSitemap#add_static is deprecated. Please use BigSitemap#add_path instead'
    @static_pages ||= []
    @static_pages << [url, time, frequency, priority]
    self
  end

  def with_lock
    lock!
    begin
      yield
    ensure
      unlock!
    end
  rescue Errno::EACCES => e
    STDERR.puts 'Lockfile exists' if $VERBOSE
  end

  def file_name(name=nil)
    name = table_name(name) unless (name.nil? || name.is_a?(String))
    prefix = 'sitemap'
    prefix << '_' unless name.nil?
    File.join(@options[:document_full], "#{prefix}#{name}")
  end

  def dir_files
    File.join(@options[:document_full], "sitemap*.{xml,xml.gz}")
  end

  def clean
    Dir[dir_files].each do |file|
      FileUtils.rm file
    end

    self
  end

  # TODO: Deprecate (move to private)
  def generate(options={})
    clean unless options[:partial_update]

    add_urls

    generate_sitemap_index

    ping_search_engines

    self
  end

  def add_urls
    return self if Array(@paths).empty?

    with_sitemap do |builder|
      @paths.uniq!
      @paths.each do |path, options|
        url = URI.join(@options[:base_url], path)
        builder.add_url! url, options
      end
    end

    self
  end

  # Create a sitemap index document
  def generate_sitemap_index(files=nil)
    files ||= Dir[dir_files]

    with_sitemap({:name => 'index', :type => 'index'}) do |sitemap|
      for path in files
        next if path =~ /index/
        sitemap.add_url! url_for_sitemap(path), :last_modified => File.stat(path).mtime
      end
    end

    self
  end

  def ping_search_engines
    require 'net/http'
    require 'cgi'

    sitemap_uri = CGI::escape(url_for_sitemap(@sitemap_files.last))

    if @options[:ping_google]
      Net::HTTP.get('www.google.com', "/webmasters/tools/ping?sitemap=#{sitemap_uri}")
    end

    if @options[:ping_yahoo]
      if @options[:yahoo_app_id]
        Net::HTTP.get(
          'search.yahooapis.com', "/SiteExplorerService/V1/updateNotification?" +
            "appid=#{@options[:yahoo_app_id]}&url=#{sitemap_uri}"
        )
      else
        STDERR.puts 'unable to ping Yahoo: no ":yahoo_app_id" provided'
      end
    end

    if @options[:ping_bing]
      Net::HTTP.get('www.bing.com', "/webmaster/ping.aspx?siteMap=#{sitemap_uri}")
    end

    if @options[:ping_ask]
      Net::HTTP.get('submissions.ask.com', "/ping?sitemap=#{sitemap_uri}")
    end

    if @options[:ping_yandex]
      Net::HTTP.get('webmaster.yandex.ru', "/wmconsole/sitemap_list.xml?host=#{sitemap_uri}")
    end
  end

  private

  def lock!(lock_file = 'generator.lock')
    lock_file = File.join(@options[:document_full], lock_file)
    File.open(lock_file, 'w', File::EXCL)
  end

  def unlock!(lock_file = 'generator.lock')
    lock_file = File.join(@options[:document_full], lock_file)
    FileUtils.rm lock_file
  end

  def with_sitemap(options={})
    options[:filename] ||= file_name(options[:name])
    options[:type] ||= 'sitemap'
    options[:max_urls] ||= @options["max_per_#{options[:type]}".to_sym]
    options[:gzip] ||= @options[:gzip]
    options[:indent] ||= 2
    options[:partial_update] ||= @options[:partial_update]
    options[:start_part_id] ||= first_id_of_last_sitemap

    sitemap = if options[:type] == 'index'
      IndexBuilder.new(options)
    else
      Builder.new(options)
    end

    begin
      yield sitemap
    ensure
      sitemap.close!
      @sitemap_files.concat sitemap.filepaths!
    end
  end

  def pick_method(model, candidates)
    method = nil
    candidates.each do |candidate|
      if model.respond_to? candidate
        method = candidate
        break
      end
    end
    method
  end

  def url_for_sitemap(path)
    File.join @options[:base_url], @options[:url_path], File.basename(path)
  end

end


class BigSitemapRails < BigSitemap
  def self.generate(options={}, &block)
    raise 'No Rails Environment loaded' unless defined? Rails

    DEFAULTS.merge!(:document_root => "#{Rails.root}/public", :url_options => default_url_options)
    super(options, &block)
  end
end


class BigSitemapMerb < BigSitemap
  def self.generate(options={}, &block)
    raise 'No Merb Environment loaded' unless defined? ::Merb
    require 'extlib'

    DEFAULTS.merge!(:document_root => "#{Merb.root}/public")
    super(options, &block)
  end
end
Something went wrong with that request. Please try again.