Permalink
Browse files

Add bot-detector code from KISSMetrics

  • Loading branch information...
1 parent 8a410eb commit 790f5d1d992a0bed8f2263c2b8f1fe9d04695139 @jkrall committed May 4, 2010
Showing with 96 additions and 1 deletion.
  1. +4 −0 lib/analytical.rb
  2. +46 −0 lib/analytical/bot_detector.rb
  3. +24 −0 spec/analytical/bot_detector_spec.rb
  4. +22 −1 spec/analytical_spec.rb
View
@@ -7,6 +7,7 @@ module Analytical
# any method placed here will apply to ActionController::Base
def analytical(options={})
send :include, InstanceMethods
+ send :include, Analytical::BotDetector
send :helper_method, :analytical
send :cattr_accessor, :analytical_options
@@ -38,6 +39,9 @@ def analytical
if options[:disable_if].call(self)
options[:modules] = options[:development_modules]
end
+ if analytical_is_robot?(request.user_agent)
+ options[:modules] = []
+ end
Analytical::Api.new options
end
end
@@ -0,0 +1,46 @@
+module Analytical
+ module BotDetector
+
+ def analytical_is_robot?(user_agent)
+ unless user_agent.blank?
+ user_agent = user_agent.to_s.downcase
+
+ # We mark something as a bot if it contains any of the $bot_indicators
+ # or if it does not contain one of the $browser_indicators. In addition,
+ # if the user-agent string contains "mozilla" we make sure it has version
+ # information. Finally anything that starts with a word in the $whitelist
+ # is never considered a bot.
+
+ whitelist = %w(w3m dillo links elinks lynx)
+ whitelist.each do |word|
+ return false if user_agent.index(word) == 0
+ end
+
+ bot_indicators = %w(bot spider search jeeves crawl seek heritrix slurp thumbnails capture ferret webinator scan retriever accelerator upload digg extractor grub scrub)
+ bot_indicators.each do |word|
+ return true if user_agent.index word
+ end
+
+ browser_indicators = %w(mozilla browser iphone lynx mobile opera icab)
+ has_browser_indicator = false
+
+ browser_indicators.each do |word|
+ if user_agent.index word
+ has_browser_indicator = true
+ break
+ end
+ end
+
+ return true if not has_browser_indicator
+
+ # Check for mozilla version information
+ if user_agent.include? "mozilla"
+ return true if not user_agent.include? "("
+ return true if user_agent !~ /mozilla\/\d+/i
+ end
+ end
+ return false
+ end
+
+ end
+end
@@ -0,0 +1,24 @@
+require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
+
+describe "Analytical::BotDetector" do
+ class DummyForBotDetector
+ include Analytical::BotDetector
+ end
+ before(:each) do
+ @d = DummyForBotDetector.new
+ end
+
+ describe 'with nil user_agent' do
+ it 'should return false' do
+ @d.analytical_is_robot?(nil).should be_false
+ end
+ end
+
+ describe 'with empty user_agent' do
+ it 'should return false' do
+ @d.analytical_is_robot?('').should be_false
+ end
+ end
+
+
+end
View
@@ -11,8 +11,12 @@
describe 'on initialization' do
class DummyForInit
extend Analytical
- def request; OpenStruct.new(:'ssl?'=>true); end
def self.helper_method(*a); end
+ def request
+ Spec::Mocks::Mock.new 'request',
+ :'ssl?'=>true,
+ :user_agent=>'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3 GTB7.0'
+ end
end
it 'should have the default options' do
@@ -23,6 +27,23 @@ def self.helper_method(*a); end
d.options[:disable_if].call.should be_false
end
+ it 'should use the supplied options' do
+ DummyForInit.analytical :modules=>[:google]
+ d = DummyForInit.new.analytical
+ d.options[:modules].should == [:google]
+ d.options[:development_modules].should == [:console]
+ d.options[:disable_if].call.should be_false
+ end
+
+ describe 'with a robot request' do
+ it 'should set the modules to []' do
+ DummyForInit.analytical
+ d = DummyForInit.new
+ d.stub!(:'analytical_is_robot?').and_return(true)
+ d.analytical.options[:modules].should == []
+ end
+ end
+
it 'should open the initialization file' do
File.should_receive(:'exists?').with("#{RAILS_ROOT}/config/analytical.yml").and_return(true)
DummyForInit.analytical

0 comments on commit 790f5d1

Please sign in to comment.