diff --git a/MIT-LICENSE b/MIT-LICENSE new file mode 100644 index 00000000..9376605b --- /dev/null +++ b/MIT-LICENSE @@ -0,0 +1,20 @@ +Copyright (c) 2009 [name of plugin creator] + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 00000000..736b1d78 --- /dev/null +++ b/README.md @@ -0,0 +1,40 @@ +SitemapGenerator +================ + +This plugin enables Google Sitemaps to be easily generated for a Rails site as a rake task. (and it _actually_ works) + +SiteMaps are generally not required to be dynamic, so if you need to refresh your Sitemaps regularly you can set the rake task up as a cron job. + +Raison d'ĂȘtre +------- + +I was dissatisfied with any of the current Rails sitemap plugins that I found. So I decided I would write my own. ;) + +I say "it actually works" because in the process of creating this plugin I tried about 6 different plugins, none of which (IMHO) worked in a natural 'railsy' way. Your mileage may differ of course. + +Installation +======= + +1. Install plugin as normal + + ./script/plugin install git://github.com/adamsalter/sitemap_generator-plugin.git + +2. Installation will create a 'config/sitemap.rb' file which will contain your logic for generation of the Sitemap files. Explanation of syntax for this file is contained in the file itself. (If you want to recreate this file manually run `rake sitemap:install`) + +3. Run `rake sitemap:refresh` as needed to create sitemap files. This will also ping all the major search engines. + +4. Add the following to your robots.txt file. + + Sitemap: <sitemap_index_location> + +The <sitemap_index_location> should be the complete URL to the Sitemap index, such as: http://www.example.org/sitemap_index.xml.gz + +Notes +======= + +- only tested/working on Rails 2.3.2, no guarantees made for any other versions of Rails. +- currently only supports one sitemap index file, which can contain 50,000 sitemap files which can each contain 50,000 urls, so plugin only supports up to 500,000,000 urls. I personally have no need of support for more urls, but plugin could be improved to support this. + +Copyright (c) 2009 Adam @ [Codebright.net][cb], released under the MIT license + +[cb]:http://codebright.net \ No newline at end of file diff --git a/Rakefile b/Rakefile new file mode 100644 index 00000000..31967db0 --- /dev/null +++ b/Rakefile @@ -0,0 +1,23 @@ +require 'rake' +require 'rake/testtask' +require 'rake/rdoctask' + +desc 'Default: run unit tests.' +task :default => :test + +desc 'Test the sitemap_generator plugin.' +Rake::TestTask.new(:test) do |t| + t.libs << 'lib' + t.libs << 'test' + t.pattern = 'test/**/*_test.rb' + t.verbose = true +end + +desc 'Generate documentation for the sitemap_generator plugin.' +Rake::RDocTask.new(:rdoc) do |rdoc| + rdoc.rdoc_dir = 'rdoc' + rdoc.title = 'SitemapGenerator' + rdoc.options << '--line-numbers' << '--inline-source' + rdoc.rdoc_files.include('README') + rdoc.rdoc_files.include('lib/**/*.rb') +end diff --git a/init.rb b/init.rb new file mode 100644 index 00000000..28772093 --- /dev/null +++ b/init.rb @@ -0,0 +1,2 @@ +# Include hook code here + diff --git a/install.rb b/install.rb new file mode 100644 index 00000000..c1409a5d --- /dev/null +++ b/install.rb @@ -0,0 +1,8 @@ +# Install hook code here + +# Copy sitemap_template.rb to config/sitemap.rb +require 'fileutils' +current_dir = File.dirname(__FILE__) +sitemap_template = File.join(current_dir, 'templates/sitemap.rb') +new_sitemap = File.join(RAILS_ROOT, 'config/sitemap.rb') +FileUtils.cp(sitemap_template, new_sitemap) unless File.exist?(new_sitemap) \ No newline at end of file diff --git a/lib/sitemap_plugin.rb b/lib/sitemap_plugin.rb new file mode 100644 index 00000000..3b583a2a --- /dev/null +++ b/lib/sitemap_plugin.rb @@ -0,0 +1,9 @@ +require 'sitemap_plugin/mapper' +require 'sitemap_plugin/link' +require 'sitemap_plugin/link_set' +require 'sitemap_plugin/helper' + +module SitemapPlugin + Sitemap = LinkSet.new +end + \ No newline at end of file diff --git a/lib/sitemap_plugin/helper.rb b/lib/sitemap_plugin/helper.rb new file mode 100644 index 00000000..1df1b002 --- /dev/null +++ b/lib/sitemap_plugin/helper.rb @@ -0,0 +1,38 @@ +module SitemapPlugin + module Helper + def load_sitemap_rb + controller = ApplicationController.new + controller.request = ActionController::TestRequest.new + controller.params = {} + controller.send(:initialize_current_url) + b = controller.send(:binding) + sitemap_mapper_file = File.join(RAILS_ROOT, 'config/sitemap.rb') + eval(open(sitemap_mapper_file).read, b) + end + + def url_with_hostname(path) + URI.join(Sitemap.default_host, path).to_s + end + + def w3c_date(date) + date.utc.strftime("%Y-%m-%dT%H:%M:%S+00:00") + end + + def ping_search_engines(sitemap_index) + index_location = CGI.escape(url_with_hostname(sitemap_index)) + # engines list from http://en.wikipedia.org/wiki/Sitemap_index + {:google => "http://www.google.com/webmasters/sitemaps/ping?sitemap=#{index_location}", + :yahoo => "http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap=#{index_location}", + :ask => "http://submissions.ask.com/ping?sitemap=#{index_location}", + :msn => "http://webmaster.live.com/ping.aspx?siteMap=#{index_location}", + :sitemap_writer => "http://www.sitemapwriter.com/notify.php?crawler=all&url=#{index_location}"}.each do |engine, link| + begin + open(link) + puts "Successful ping of #{engine.to_s.titleize}" + rescue StandardError => e + puts "Ping failed for #{engine.to_s.titleize}: #{e.inspect}" + end + end + end + end +end \ No newline at end of file diff --git a/lib/sitemap_plugin/link.rb b/lib/sitemap_plugin/link.rb new file mode 100644 index 00000000..9cf56bb0 --- /dev/null +++ b/lib/sitemap_plugin/link.rb @@ -0,0 +1,20 @@ + +module SitemapPlugin + class Link + attr_accessor :path, :priority, :changefreq, :lastmod, :host + + def initialize(path, options = {}) + options.assert_valid_keys(:priority, :changefreq, :lastmod, :host) + options.reverse_merge!(:priority => 0.5, :changefreq => 'weekly', :lastmod => Time.now, :host => Sitemap.default_host) + @path = path + @priority = options[:priority] + @changefreq = options[:changefreq] + @lastmod = options[:lastmod] + @host = options[:host] + end + + def loc + URI.join(@host, @path).to_s + end + end +end diff --git a/lib/sitemap_plugin/link_set.rb b/lib/sitemap_plugin/link_set.rb new file mode 100644 index 00000000..3e4599b2 --- /dev/null +++ b/lib/sitemap_plugin/link_set.rb @@ -0,0 +1,20 @@ +module SitemapPlugin + class LinkSet + attr_accessor :default_host, :links + + def initialize + @links = [] + # Add default links + @links << Link.new('/', :lastmod => Time.now, :changefreq => 'always', :priority => 1.0) + @links << Link.new('/sitemap_index.xml.gz', :lastmod => Time.now, :changefreq => 'always', :priority => 1.0) + end + + def add_links + yield Mapper.new(self) + end + + def add_link(link) + @links << link + end + end +end \ No newline at end of file diff --git a/lib/sitemap_plugin/mapper.rb b/lib/sitemap_plugin/mapper.rb new file mode 100644 index 00000000..6c784981 --- /dev/null +++ b/lib/sitemap_plugin/mapper.rb @@ -0,0 +1,16 @@ + +module SitemapPlugin + # Generator instances are used to build links. + # The object passed to the add_links block in config/sitemap.rb is a Generator instance. + class Mapper + attr_accessor :set + + def initialize(set) + @set = set + end + + def add(loc, options = {}) + set.add_link Link.new(loc, options) + end + end +end \ No newline at end of file diff --git a/tasks/sitemap_generator_tasks.rake b/tasks/sitemap_generator_tasks.rake new file mode 100644 index 00000000..8564ee50 --- /dev/null +++ b/tasks/sitemap_generator_tasks.rake @@ -0,0 +1,50 @@ +require 'zlib' + +namespace :sitemap do + + desc "install a default config/sitemap.rb file" + task :install do + load File.expand_path(File.join(File.dirname(__FILE__), "..", "install.rb")) + end + + desc "Regenerate Google Sitemap files in public/ directory" + task :refresh => :environment do + include SitemapPlugin::Helper + + # update links from config/sitemap.rb + load_sitemap_rb + + raise(ArgumentError, "Default hostname not defined") unless SitemapPlugin::Sitemap.default_host.present? + + links_grps = SitemapPlugin::Sitemap.links.in_groups_of(50000, false) + + # render individual sitemaps + sitemap_files = [] + xml_sitemap_template = File.join(File.dirname(__FILE__), '../templates/xml_sitemap.builder') + links_grps.each_with_index do |links, index| + buffer = '' + xml = Builder::XmlMarkup.new(:target=>buffer) + eval(open(xml_sitemap_template).read, binding) + filename = File.join(RAILS_ROOT, "public/sitemap#{index+1}.xml.gz") + Zlib::GzipWriter.open(filename) do |gz| + gz.write buffer + end + puts "+ #{filename}" + sitemap_files << filename + end + + # render index + sitemap_index_template = File.join(File.dirname(__FILE__), '../templates/sitemap_index.builder') + buffer = '' + xml = Builder::XmlMarkup.new(:target=>buffer) + eval(open(sitemap_index_template).read, binding) + filename = File.join(RAILS_ROOT, "public/sitemap_index.xml.gz") + Zlib::GzipWriter.open(filename) do |gz| + gz.write buffer + end + puts "+ #{filename}" + + ping_search_engines("sitemap_index.xml.gz") + + end +end \ No newline at end of file diff --git a/templates/sitemap.rb b/templates/sitemap.rb new file mode 100644 index 00000000..97c33414 --- /dev/null +++ b/templates/sitemap.rb @@ -0,0 +1,19 @@ + +# Set the host name for URL creation +SitemapPlugin::Sitemap.default_host = "http://www.example.com" + +# Put links creation logic here +# (the root path '/' and sitemap files are added automatically) +SitemapPlugin::Sitemap.add_links do |sitemap| + # add '/articles' + # default values are added if you don't specify anything + sitemap.add articles_path # :priority => 0.5, :changefreq => 'weekly', :lastmod => Time.now, :host => default_host + + # add all articles + Article.find(:all).each do |a| + sitemap.add article_path(a), :lastmod => a.updated_at + end + + # add merchant path + sitemap.add '/purchase', :host => "https://www.example.com" +end \ No newline at end of file diff --git a/templates/sitemap_index.builder b/templates/sitemap_index.builder new file mode 100644 index 00000000..ad886cfa --- /dev/null +++ b/templates/sitemap_index.builder @@ -0,0 +1,21 @@ +# +# +# +# http://www.example.com/sitemap1.xml.gz +# 2004-10-01T18:23:17+00:00 +# +# +# http://www.example.com/sitemap2.xml.gz +# 2005-01-01 +# +# + +xml.instruct! :encoding => 'UTF-8' +xml.sitemapindex "xmlns" => "http://www.sitemaps.org/schemas/sitemap/0.9" do + sitemap_files.each do |file| + xml.sitemap do + xml.loc url_with_hostname(file) + xml.lastmod w3c_date(File.mtime(file)) + end + end +end \ No newline at end of file diff --git a/templates/xml_sitemap.builder b/templates/xml_sitemap.builder new file mode 100644 index 00000000..12d2595f --- /dev/null +++ b/templates/xml_sitemap.builder @@ -0,0 +1,17 @@ +xml.instruct! :encoding => 'UTF-8' +xml.instruct! :"xml-stylesheet", :type=>"text/xsl", :href=>"sitemap.xsl" + +xml.urlset "xmlns:xsi" => "http://www.w3.org/2001/XMLSchema-instance", + "xsi:schemaLocation" => "http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd", + "xmlns" => "http://www.sitemaps.org/schemas/sitemap/0.9" do + + links.each do |link| + xml.url do + xml.loc link.loc + xml.lastmod w3c_date(link.lastmod) + xml.changefreq link.changefreq + xml.priority link.priority + end + end + +end \ No newline at end of file diff --git a/test/sitemap_generator_test.rb b/test/sitemap_generator_test.rb new file mode 100644 index 00000000..99a282c0 --- /dev/null +++ b/test/sitemap_generator_test.rb @@ -0,0 +1,8 @@ +require 'test_helper' + +class SitemapGeneratorTest < ActiveSupport::TestCase + # Replace this with your real tests. + test "the truth" do + assert true + end +end diff --git a/test/test_helper.rb b/test/test_helper.rb new file mode 100644 index 00000000..cf148b8b --- /dev/null +++ b/test/test_helper.rb @@ -0,0 +1,3 @@ +require 'rubygems' +require 'active_support' +require 'active_support/test_case' \ No newline at end of file diff --git a/uninstall.rb b/uninstall.rb new file mode 100644 index 00000000..ffe744bf --- /dev/null +++ b/uninstall.rb @@ -0,0 +1,4 @@ +# Uninstall hook code here + +new_sitemap = File.join(RAILS_ROOT, 'config/sitemap.rb') +File.rm(new_sitemap) if File.exist?(new_sitemap) \ No newline at end of file