Skip to content
Browse files

adding a better robots filtering mechanism

  • Loading branch information...
1 parent 167ed02 commit 3295c24bf1c3b15e6b0a33092264ec86792ae45c erik committed
View
2 app/controllers/application_controller.rb
@@ -4,8 +4,6 @@
class ApplicationController < ActionController::Base
helper :all # include all helpers, all the time
#protect_from_forgery # See ActionController::RequestForgeryProtection for details
-
- BOT_FILTER = /(?:Googlebot|Slurp|Apache|msnbot|wget|libwww|nutch|ia_archiver|heretrix|cuil|google|yandex)/i
after_filter :minify_html, :unless => Proc.new { Rails.env.development? }
after_filter :save_clickstream, :if => :save_clickstream?
View
2 app/models/robot.rb
@@ -0,0 +1,2 @@
+class Robot < ActiveRecord::Base
+end
View
3 config/initializers/config.rb
@@ -24,4 +24,5 @@
SPHINX_SEARCH = SETTINGS[:search] && !SETTINGS[:search].blank? && SETTINGS[:search]=='sphinx'
-DEFAULT_FILTER = 'recency'
+DEFAULT_FILTER = 'recency'
+BOT_FILTER = /(?:Robot.all.map(&:name).join(':'))/i
View
159 db/migrate/20111227173221_create_robots.rb
@@ -0,0 +1,159 @@
+class CreateRobots < ActiveRecord::Migration
+ def self.up
+ create_table :robots do |t|
+ t.string :name
+ t.timestamps
+ end
+
+ Robot.create({:name => 'abachobot'})
+ Robot.create({:name => 'accoona-ai-agent'})
+ Robot.create({:name => 'anyapexbot'})
+ Robot.create({:name => 'arachmo'})
+ Robot.create({:name => 'b-l-i-t-z-b-o-t'})
+ Robot.create({:name => 'baiduspider'})
+ Robot.create({:name => 'becomebot'})
+ Robot.create({:name => 'bimbot'})
+ Robot.create({:name => 'blitzbot'})
+ Robot.create({:name => 'boitho.com-dc'})
+ Robot.create({:name => 'boitho.com-robot'})
+ Robot.create({:name => 'bot'})
+ Robot.create({:name => 'btbot'})
+ Robot.create({:name => 'cerberian'})
+ Robot.create({:name => 'drtrs'})
+ Robot.create({:name => 'converacrawler'})
+ Robot.create({:name => 'cosmos'})
+ Robot.create({:name => 'dataparksearch'})
+ Robot.create({:name => 'diamondbot'})
+ Robot.create({:name => 'discobot'})
+ Robot.create({:name => 'emeraldshield.com'})
+ Robot.create({:name => 'webbot'})
+ Robot.create({:name => 'envolk[its]spider'})
+ Robot.create({:name => 'esperanzabot'})
+ Robot.create({:name => 'exabot'})
+ Robot.create({:name => 'fast'})
+ Robot.create({:name => 'enterprise'})
+ Robot.create({:name => 'crawler'})
+ Robot.create({:name => 'fast-webcrawler'})
+ Robot.create({:name => 'fdse'})
+ Robot.create({:name => 'robot'})
+ Robot.create({:name => 'findlinks'})
+ Robot.create({:name => 'furlbot'})
+ Robot.create({:name => 'fyberspider'})
+ Robot.create({:name => 'gcrawler'})
+ Robot.create({:name => 'gaisbot'})
+ Robot.create({:name => 'geniebot'})
+ Robot.create({:name => 'gigabot'})
+ Robot.create({:name => 'girafabot'})
+ Robot.create({:name => 'googlebot'})
+ Robot.create({:name => 'googlebot-image'})
+ Robot.create({:name => 'hl_ftien_spider'})
+ Robot.create({:name => 'htdig'})
+ Robot.create({:name => 'ia_archiver'})
+ Robot.create({:name => 'ichiro'})
+ Robot.create({:name => 'irlbot'})
+ Robot.create({:name => 'issuecrawler'})
+ Robot.create({:name => 'jyxobot'})
+ Robot.create({:name => 'lapozzbot'})
+ Robot.create({:name => 'larbin'})
+ Robot.create({:name => 'linkwalker'})
+ Robot.create({:name => 'lmspider'})
+ Robot.create({:name => 'lwp-trivial'})
+ Robot.create({:name => 'mabontland'})
+ Robot.create({:name => 'mediapartners-google'})
+ Robot.create({:name => 'mjbot'})
+ Robot.create({:name => 'mnogosearch'})
+ Robot.create({:name => 'mogimogi'})
+ Robot.create({:name => 'mojeekbot'})
+ Robot.create({:name => 'morning'})
+ Robot.create({:name => 'paper'})
+ Robot.create({:name => 'msnbot'})
+ Robot.create({:name => 'msrbot'})
+ Robot.create({:name => 'mvaclient'})
+ Robot.create({:name => 'netresearchserver'})
+ Robot.create({:name => 'ng-search'})
+ Robot.create({:name => 'nicebot'})
+ Robot.create({:name => 'noxtrumbot'})
+ Robot.create({:name => 'nusearch'})
+ Robot.create({:name => 'spider'})
+ Robot.create({:name => 'nutchcvs'})
+ Robot.create({:name => 'obot'})
+ Robot.create({:name => 'oegp'})
+ Robot.create({:name => 'omniexplorer_bot'})
+ Robot.create({:name => 'oozbot'})
+ Robot.create({:name => 'orbiter'})
+ Robot.create({:name => 'pagebiteshyperbot'})
+ Robot.create({:name => 'polybot'})
+ Robot.create({:name => 'pompos'})
+ Robot.create({:name => 'psbot'})
+ Robot.create({:name => 'pycurl'})
+ Robot.create({:name => 'rampybot'})
+ Robot.create({:name => 'rufusbot'})
+ Robot.create({:name => 'sandcrawler'})
+ Robot.create({:name => 'sbider'})
+ Robot.create({:name => 'scoutjet'})
+ Robot.create({:name => 'scrubby'})
+ Robot.create({:name => 'searchsight'})
+ Robot.create({:name => 'seekbot'})
+ Robot.create({:name => 'semanticdiscovery'})
+ Robot.create({:name => 'sensis'})
+ Robot.create({:name => 'web'})
+ Robot.create({:name => 'crawler'})
+ Robot.create({:name => 'seochat::bot'})
+ Robot.create({:name => 'shim-crawler'})
+ Robot.create({:name => 'shopwiki'})
+ Robot.create({:name => 'shoula'})
+ Robot.create({:name => 'robot'})
+ Robot.create({:name => 'silk'})
+ Robot.create({:name => 'snappy'})
+ Robot.create({:name => 'sogou'})
+ Robot.create({:name => 'spider'})
+ Robot.create({:name => 'speedy'})
+ Robot.create({:name => 'spider'})
+ Robot.create({:name => 'sqworm'})
+ Robot.create({:name => 'stackrambler'})
+ Robot.create({:name => 'surveybot'})
+ Robot.create({:name => 'synoobot'})
+ Robot.create({:name => 'teoma'})
+ Robot.create({:name => 'terrawizbot'})
+ Robot.create({:name => 'thesubot'})
+ Robot.create({:name => 'thumbnail.cz'})
+ Robot.create({:name => 'robot'})
+ Robot.create({:name => 'tineye'})
+ Robot.create({:name => 'turnitinbot'})
+ Robot.create({:name => 'updated'})
+ Robot.create({:name => 'vagabondo'})
+ Robot.create({:name => 'voilabot'})
+ Robot.create({:name => 'vortex'})
+ Robot.create({:name => 'voyager'})
+ Robot.create({:name => 'vyu'})
+ Robot.create({:name => 'webcollage'})
+ Robot.create({:name => 'websquash.com'})
+ Robot.create({:name => 'wf'})
+ Robot.create({:name => 'wofindeich'})
+ Robot.create({:name => 'robot'})
+ Robot.create({:name => 'xaldon_webspider'})
+ Robot.create({:name => 'yacy'})
+ Robot.create({:name => 'yahoo!'})
+ Robot.create({:name => 'slurp'})
+ Robot.create({:name => 'yahoo!'})
+ Robot.create({:name => 'slurp'})
+ Robot.create({:name => 'china'})
+ Robot.create({:name => 'yahooseeker'})
+ Robot.create({:name => 'yahooseeker-testing'})
+ Robot.create({:name => 'yooglifetchagent'})
+ Robot.create({:name => 'zao'})
+ Robot.create({:name => 'zealbot'})
+ Robot.create({:name => 'zspider'})
+ Robot.create({:name => 'zyborg'})
+ Robot.create({:name => 'Apache'})
+ Robot.create({:name => 'wget'})
+ Robot.create({:name => 'libwww'})
+ Robot.create({:name => 'nutch'})
+ Robot.create({:name => 'cuil'})
+ Robot.create({:name => 'libcurl'})
+ end
+
+ def self.down
+ drop_table :robots
+ end
+end
View
11 test/fixtures/robots.yml
@@ -0,0 +1,11 @@
+# Read about fixtures at http://ar.rubyonrails.org/classes/Fixtures.html
+
+# This model initially had no columns defined. If you add columns to the
+# model remove the '{}' from the fixture names and add the columns immediately
+# below each fixture, per the syntax in the comments below
+#
+one: {}
+# column: value
+#
+two: {}
+# column: value
View
8 test/unit/robot_test.rb
@@ -0,0 +1,8 @@
+require 'test_helper'
+
+class RobotTest < ActiveSupport::TestCase
+ # Replace this with your real tests.
+ test "the truth" do
+ assert true
+ end
+end

0 comments on commit 3295c24

Please sign in to comment.
Something went wrong with that request. Please try again.