Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

first commit

  • Loading branch information...
commit 755bd937da5c1cf336969987eaeb9367a8d0de63 0 parents
John McAliley johnmcaliley authored
Showing with 12,760 additions and 0 deletions.
  1. +27 −0 Gemfile
  2. +20 −0 LICENSE.txt
  3. +95 −0 README.rdoc
  4. +87 −0 Rakefile
  5. +1 −0  VERSION
  6. +60 −0 app/controllers/impressionist_controller.rb
  7. +3 −0  app/models/impression.rb
  8. +1,462 −0 app/models/impressionist/bots.rb
  9. +22 −0 app/models/impressionist/impressionable.rb
  10. +2 −0  config/routes.rb
  11. +109 −0 impressionist.gemspec
  12. +20 −0 lib/generators/impressionist/impressionist_generator.rb
  13. +20 −0 lib/generators/impressionist/templates/create_impressions_table.rb
  14. +5 −0 lib/impressionist.rb
  15. +18 −0 lib/impressionist/bots.rb
  16. +18 −0 lib/impressionist/engine.rb
  17. 0  lib/impressionist/railties/tasks.rake
  18. +4 −0 test_app/.gitignore
  19. +1 −0  test_app/.rspec
  20. +24 −0 test_app/Gemfile
  21. +256 −0 test_app/README
  22. +7 −0 test_app/Rakefile
  23. +8 −0 test_app/app/controllers/application_controller.rb
  24. +9 −0 test_app/app/controllers/articles_controller.rb
  25. +14 −0 test_app/app/controllers/posts_controller.rb
  26. +11 −0 test_app/app/controllers/widgets_controller.rb
  27. +2 −0  test_app/app/helpers/application_helper.rb
  28. +3 −0  test_app/app/models/article.rb
  29. +2 −0  test_app/app/views/articles/index.html.erb
  30. 0  test_app/app/views/articles/show.html.erb
  31. +14 −0 test_app/app/views/layouts/application.html.erb
  32. 0  test_app/app/views/posts/edit.html.erb
  33. 0  test_app/app/views/posts/index.html.erb
  34. 0  test_app/app/views/posts/show.html.erb
  35. 0  test_app/app/views/widgets/index.html.erb
  36. 0  test_app/app/views/widgets/new.html.erb
  37. 0  test_app/app/views/widgets/show.html.erb
  38. +4 −0 test_app/config.ru
  39. +42 −0 test_app/config/application.rb
  40. +13 −0 test_app/config/boot.rb
  41. +8 −0 test_app/config/cucumber.yml
  42. +25 −0 test_app/config/database.yml
  43. +5 −0 test_app/config/environment.rb
  44. +26 −0 test_app/config/environments/development.rb
  45. +49 −0 test_app/config/environments/production.rb
  46. +35 −0 test_app/config/environments/test.rb
  47. +7 −0 test_app/config/initializers/backtrace_silencers.rb
  48. +10 −0 test_app/config/initializers/inflections.rb
  49. +5 −0 test_app/config/initializers/mime_types.rb
  50. +7 −0 test_app/config/initializers/secret_token.rb
  51. +8 −0 test_app/config/initializers/session_store.rb
  52. +5 −0 test_app/config/locales/en.yml
  53. +3 −0  test_app/config/routes.rb
  54. +13 −0 test_app/db/migrate/20110201153144_create_articles.rb
  55. +20 −0 test_app/db/migrate/20110201164012_create_impressions_table.rb
  56. +35 −0 test_app/db/schema.rb
  57. +7 −0 test_app/db/seeds.rb
  58. +219 −0 test_app/features/step_definitions/web_steps.rb
  59. +67 −0 test_app/features/support/env.rb
  60. +33 −0 test_app/features/support/paths.rb
  61. 0  test_app/lib/tasks/.gitkeep
  62. +53 −0 test_app/lib/tasks/cucumber.rake
  63. +26 −0 test_app/public/404.html
  64. +26 −0 test_app/public/422.html
  65. +26 −0 test_app/public/500.html
  66. 0  test_app/public/favicon.ico
  67. BIN  test_app/public/images/rails.png
  68. +239 −0 test_app/public/index.html
  69. +2 −0  test_app/public/javascripts/application.js
  70. +965 −0 test_app/public/javascripts/controls.js
  71. +974 −0 test_app/public/javascripts/dragdrop.js
  72. +1,123 −0 test_app/public/javascripts/effects.js
  73. +6,001 −0 test_app/public/javascripts/prototype.js
  74. +175 −0 test_app/public/javascripts/rails.js
  75. +5 −0 test_app/public/robots.txt
  76. 0  test_app/public/stylesheets/.gitkeep
  77. +10 −0 test_app/script/cucumber
  78. +6 −0 test_app/script/rails
  79. +44 −0 test_app/spec/controllers/controller_spec.rb
  80. +3 −0  test_app/spec/fixtures/articles.yml
  81. +26 −0 test_app/spec/fixtures/impressions.yml
  82. +16 −0 test_app/spec/initializers_spec.rb
  83. +43 −0 test_app/spec/models/model_spec.rb
  84. +27 −0 test_app/spec/spec_helper.rb
  85. 0  test_app/spec/view_spec.rb
  86. 0  test_app/vendor/plugins/.gitkeep
27 Gemfile
@@ -0,0 +1,27 @@
+source "http://rubygems.org"
+
+group :development do
+ gem "shoulda", ">= 0"
+ gem "bundler", "~> 1.0.0"
+ gem "jeweler", "~> 1.5.1"
+ gem "rcov", ">= 0"
+end
+
+if ENV['MY_BUNDLE_ENV'] == "dev"
+ group :development do
+ gem 'ZenTest'
+ gem 'autotest'
+ gem 'systemu'
+ gem "rspec"
+ gem "rspec-rails"
+ gem "mongrel", "1.2.0.pre2"
+ gem 'capybara'
+ gem 'database_cleaner'
+ gem 'cucumber-rails'
+ gem 'cucumber'
+ gem 'spork'
+ gem 'launchy'
+ gem 'autotest-notification'
+ gem 'httpclient'
+ end
+end
20 LICENSE.txt
@@ -0,0 +1,20 @@
+Copyright (c) 2011 cowboycoded
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
95 README.rdoc
@@ -0,0 +1,95 @@
+= impressionist
+
+A lightweight plugin that logs impressions per action or manually per model
+
+== I would not call this a stable plugin yet, although I have been running it in prod with no problems. Use at your own risk ;-)
+
+== What does this thing do?
+
+Logs an impression... and I use that term loosely. It can log page impressions (technically action impressions), but it is not limited to that. You can log impressions multiple times per request.
+And you can also attach it to a model. The goal of this project is to provide customizable web stats that are immediately accessible in your application as opposed to using G Analytics and pulling data using their API.
+You can attach custom messages to impressions and log multiple impressions per request. No reporting yet.. this thingy just creates the data.
+
+== Which versions of Rails and Ruby is this compatible with?
+
+Rails 3.0.3 and Ruby 1.9.2 - Sorry, but you need to upgrade if you are using Rails 2. You know you want to anyways.. all the cool kids are doing it ;-)
+
+== Installation
+
+Add it to your Gemfile
+
+ gem 'impressionist', :git => 'git@github.com:cowboycoded/impressionist.git'"
+
+Install with Bundler
+
+ bundle install
+
+Generate the impressions table migration
+
+ rails g impressionist
+
+Run the migration
+
+ rake db:migrate
+
+The following fields are provided in the migration:
+
+ t.string "impressionable_type" # model type: Widget
+ t.integer "impressionable_id" # model instance ID: @widget.id
+ t.integer "user_id" # automatically logs @current_user.id
+ t.string "controller_name" # logs the controller name
+ t.string "action_name" # logs the action_name
+ t.string "view_name" # TODO: log individual views (as well as partials and nested partials)
+ t.string "request_hash" # unique ID per request, in case you want to log multiple impressions and associate them together
+ t.string "ip_address" # request.remote_ip
+ t.string "message" # custom message you can add
+ t.datetime "created_at" # I am not sure what this is.... Any clue?
+ t.datetime "updated_at" # never seen this one before either.... Your guess is as good as mine??
+
+== Usage
+
+Log all actions in a controller
+
+ WidgetsController < ApplicationController
+ impressionist
+ end
+
+Specify actions you want logged in a controller
+
+ WidgetsController < ApplicationController
+ impressionist :actions=>[:show,:index]
+ end
+
+Make your models impressionable. This allows you to attach impressions to an AR model instance.
+
+ class Widget < ActiveRecord::Base
+ is_impressionable
+ end
+
+Log an impression per model instance in your controller:
+
+ @widget = Widget.find
+ impressionist(@widget,message:"wtf is a widget?")
+
+== Development Roadmap
+
+* Automatic impression logging in views. For example, log initial view, and any partials called from initial view
+* Customizable black list for user-agents or IP addresses. Impressions will be ignored. Web admin as part of the Engine.
+* Reporting engine
+* AB testing integration
+
+== Contributing to impressionist
+
+* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
+* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
+* Fork the project
+* Start a feature/bugfix branch
+* Commit and push until you are happy with your contribution
+* Make sure to add rpsec tests for it. Patches or features without tests will be ignored. Also, try to write better tests than I do ;-)
+* If adding engine controller or view functionality, use HAML and Inherited Resources.
+* All testing is done inside a small Rails app (test_app). You will find specs within this app.
+== Copyright
+
+Copyright (c) 2011 cowboycoded. See LICENSE.txt for
+further details.
+
87 Rakefile
@@ -0,0 +1,87 @@
+require 'rubygems'
+require 'bundler'
+
+begin
+ Bundler.setup(:default, :development)
+rescue Bundler::BundlerError => e
+ $stderr.puts e.message
+ $stderr.puts "Run `bundle install` to install missing gems"
+ exit e.status_code
+end
+require 'rake'
+
+require 'jeweler'
+Jeweler::Tasks.new do |gem|
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
+ gem.name = "impressionist"
+ gem.homepage = "http://github.com/johnmcaliley/impressionist"
+ gem.license = "MIT"
+ gem.summary = %Q{Easy way to log impressions}
+ gem.description = %Q{Log impressions from controller actions or from a model}
+ gem.email = "john.mcaliley@gmail.com"
+ gem.authors = ["cowboycoded"]
+ gem.files.exclude "test_app"
+end
+Jeweler::RubygemsDotOrgTasks.new
+
+require 'rake/testtask'
+Rake::TestTask.new(:test) do |test|
+ test.libs << 'lib' << 'test'
+ test.pattern = 'test/**/test_*.rb'
+ test.verbose = true
+end
+
+require 'rcov/rcovtask'
+Rcov::RcovTask.new do |test|
+ test.libs << 'test'
+ test.pattern = 'test/**/test_*.rb'
+ test.verbose = true
+end
+
+task :default => :test
+
+require 'rake/rdoctask'
+Rake::RDocTask.new do |rdoc|
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
+
+ rdoc.rdoc_dir = 'rdoc'
+ rdoc.title = "impressionist #{version}"
+ rdoc.rdoc_files.include('README*')
+ rdoc.rdoc_files.include('lib/**/*.rb')
+end
+
+namespace :version do
+ desc "create a new version, create tag and push to github"
+ task :patch_release do
+ if Jeweler::Commands::ReleaseToGit.new.clean_staging_area?
+ Rake::Task['version:bump:patch'].invoke
+ Rake::Task['gemspec:release'].invoke
+ Rake::Task['git:release'].invoke
+ else
+ puts "Commit your changed files first"
+ end
+ end
+
+ desc "create a new version, create tag and push to github"
+ task :minor_release do
+ Rake::Task['version:bump:minor'].invoke
+ Rake::Task['gemspec:release'].invoke
+ Rake::Task['git:release'].invoke
+ end
+
+ desc "create a new version, create tag and push to github"
+ task :major_release do
+ Rake::Task['version:bump:major'].invoke
+ Rake::Task['gemspec:release'].invoke
+ Rake::Task['git:release'].invoke
+ end
+end
+
+namespace :impressionist do
+ require File.dirname(__FILE__) + "/lib/impressionist/bots"
+
+ desc "output the list of bots from http://www.user-agents.org/"
+ task :bots do
+ p Impressionist::Bots.consume
+ end
+end
1  VERSION
@@ -0,0 +1 @@
+0.1.0
60 app/controllers/impressionist_controller.rb
@@ -0,0 +1,60 @@
+require 'digest/sha2'
+
+module ImpressionistController
+ module ClassMethods
+ def impressionist(opts={})
+ before_filter { |c| c.impressionist_subapp_filter opts[:actions] }
+ end
+ end
+
+ module InstanceMethods
+ def self.included(base)
+ base.before_filter :impressionist_app_filter
+ end
+
+ def impressionist(obj,message=nil)
+ unless bypass
+ if obj.respond_to?("impressionable?")
+ obj.impressions.create(message: message,
+ request_hash: @impressionist_hash,
+ ip_address: request.remote_ip,
+ user_id: user_id)
+ else
+ raise "#{obj.class.to_s} is not impressionable!"
+ end
+ end
+ end
+
+ def impressionist_app_filter
+ @impressionist_hash = Digest::SHA2.hexdigest(Time.now.to_f.to_s+rand(10000).to_s)
+ end
+
+ def impressionist_subapp_filter(actions=nil)
+ unless bypass
+ actions.collect!{|a|a.to_s} unless actions.blank?
+ if actions.blank? or actions.include?(action_name)
+ Impression.create(controller_name: controller_name,
+ action_name: action_name,
+ user_id: user_id,
+ request_hash: @request_hash,
+ request_hash: @impressionist_hash,
+ ip_address: request.remote_ip,
+ impressionable_type: controller_name.singularize.camelize,
+ impressionable_id: params[:id])
+ end
+ end
+ end
+
+ private
+ def bypass
+ Impressionist::Bots::WILD_CARDS.each do |wild_card|
+ return true if request.user_agent.include? wild_card
+ end
+ Impressionist::Bots::LIST.include? request.user_agent
+ end
+
+ def user_id
+ @current_user ? @current_user.id : nil
+ end
+ end
+end
3  app/models/impression.rb
@@ -0,0 +1,3 @@
+class Impression < ActiveRecord::Base
+ belongs_to :impressionable, :polymorphic=>true
+end
1,462 app/models/impressionist/bots.rb
@@ -0,0 +1,1462 @@
+module Impressionist
+ module Bots
+ WILD_CARDS = ["bot","yahoo","slurp","google","msn","crawler"]
+
+ LIST = ["<a href='http://www.unchaos.com/'> UnChaos </a> From Chaos To Order Hybrid Web Search Engine.(vadim_gonchar@unchaos.com)",
+ "<a href='http://www.unchaos.com/'> UnChaos Bot Hybrid Web Search Engine. </a> (vadim_gonchar@unchaos.com)",
+ "<b> UnChaosBot From Chaos To Order UnChaos Hybrid Web Search Engine at www.unchaos.com </b> (info@unchaos.com)",
+ "<http://www.sygol.com/> http://www.sygol.com",
+ "*/Nutch-0.9-dev",
+ "+SitiDi.net/SitiDiBot/1.0 (+Have Good Day)",
+ "-DIE-KRAEHE- META-SEARCH-ENGINE/1.1 http://www.die-kraehe.de",
+ "192.comAgent",
+ "4anything.com LinkChecker v2.0",
+ "8484 Boston Project v 1.0",
+ ":robot/1.0 (linux) ( admin e-mail: undefined http://www.neofonie.de/loesungen/search/robot.html )",
+ "A-Online Search",
+ "A1 Sitemap Generator/1.0 (+http://www.micro-sys.dk/products/sitemap-generator/) miggibot/2006.01.24",
+ "aardvark-crawler",
+ "AbachoBOT",
+ "AbachoBOT (Mozilla compatible)",
+ "ABCdatos BotLink/5.xx.xxx#BBL",
+ "Aberja Checkomat",
+ "abot/0.1 (abot; http://www.abot.com; abot@abot.com)",
+ "About/0.1libwww-perl/5.47",
+ "Accelatech RSSCrawler/0.4",
+ "accoona",
+ "Accoona-AI-Agent/1.1.1 (crawler at accoona dot com)",
+ "Accoona-AI-Agent/1.1.2 (aicrawler at accoonabot dot com)",
+ "Ack (http://www.ackerm.com/)",
+ "AcoiRobot",
+ "Acoon Robot v1.50.001",
+ "Acoon Robot v1.52 (http://www.acoon.de)",
+ "Acoon-Robot 4.0.x.[xx] (http://www.acoon.de)",
+ "Acoon-Robot v3.xx (http://www.acoon.de and http://www.acoon.com)",
+ "Acorn/Nutch-0.9 (Non-Profit Search Engine; acorn.isara.org; acorn at isara dot org)",
+ "AESOP_com_SpiderMan",
+ "agadine/1.x.x (+http://www.agada.de)",
+ "Agent-SharewarePlazaFileCheckBot/2.0+(+http://www.SharewarePlaza.com)",
+ "AgentName/0.1 libwww-perl/5.48",
+ "AIBOT/2.1 By +(www.21seek.com A Real artificial intelligence search engine China)",
+ "aipbot/1.0 (aipbot; http://www.aipbot.com; aipbot@aipbot.com)",
+ "aipbot/2-beta (aipbot dev; http://aipbot.com; aipbot@aipbot.com)",
+ "Aladin/3.324",
+ "Aleksika Spider/1.0 (+http://www.aleksika.com/)",
+ "AlkalineBOT/1.3",
+ "AlkalineBOT/1.4 (1.4.0326.0 RTM)",
+ "Allesklar/0.1 libwww-perl/5.46",
+ "Allrati/1.1 (+)",
+ "AltaVista Intranet V2.0 AVS EVAL search@freeit.com",
+ "AltaVista Intranet V2.0 Compaq Altavista Eval sveand@altavista.net",
+ "AltaVista Intranet V2.0 evreka.com crawler@evreka.com",
+ "AltaVista V2.0B crawler@evreka.com",
+ "AmfibiBOT",
+ "Amfibibot/0.06 (Amfibi Web Search; http://www.amfibi.com; agent@amfibi.com)",
+ "Amfibibot/0.07 (Amfibi Robot; http://www.amfibi.com; agent@amfibi.com)",
+ "amibot",
+ "AnnoMille spider 0.1 alpha - http://www.annomille.it",
+ "AnswerBus (http://www.answerbus.com/)",
+ "antibot-V1.1.5/i586-linux-2.2",
+ "AnzwersCrawl/2.0 (anzwerscrawl@anzwers.com.au;Engine)",
+ "Apexoo Spider 1.x",
+ "Aport",
+ "appie 1.1 (www.walhello.com)",
+ "ArabyBot (compatible; Mozilla/5.0; GoogleBot; FAST Crawler 6.4; http://www.araby.com;)",
+ "ArachBot",
+ "Arachnoidea (arachnoidea@euroseek.com)",
+ "ArchitextSpider",
+ "archive.org_bot",
+ "Arikus_Spider",
+ "Arquivo-web-crawler (compatible; heritrix/1.12.1 +http://arquivo-web.fccn.pt)",
+ "ASAHA Search Engine Turkey V.001 (http://www.asaha.com/)",
+ "Asahina-Antenna/1.x",
+ "Asahina-Antenna/1.x (libhina.pl/x.x ; libtime.pl/x.x)",
+ "ask.24x.info",
+ "AskAboutOil/0.06-rcp (Nutch; http://www.nutch.org/docs/en/bot.html; nutch-agent@askaboutoil.com)",
+ "asked/Nutch-0.8 (web crawler; http://asked.jp; epicurus at gmail dot com)",
+ "ASPSeek/1.2.5",
+ "ASPseek/1.2.9d",
+ "ASPSeek/1.2.x",
+ "ASPSeek/1.2.xa",
+ "ASPseek/1.2.xx",
+ "ASPSeek/1.2.xxpre",
+ "ASSORT/0.10",
+ "asterias/2.0",
+ "AtlocalBot/1.1 +(http://www.atlocal.com/local-web-site-owner.html)",
+ "Atomic_Email_Hunter/4.0",
+ "Atomz/1.0",
+ "atSpider/1.0",
+ "Attentio/Nutch-0.9-dev (Attentio's beta blog crawler; www.attentio.com; info@attentio.com)",
+ "augurfind",
+ "augurnfind V-1.x",
+ "autoemailspider",
+ "autowebdir 1.1 (www.autowebdir.com)",
+ "AV Fetch 1.0",
+ "AVSearch-1.0(peter.turney@nrc.ca)",
+ "AVSearch-3.0(AltaVista/AVC)",
+ "axadine/ (Axadine Crawler; http://www.axada.de/; )",
+ "AxmoRobot - Crawling your site for better indexing on www.axmo.com search engine.",
+ "BabalooSpider/1.3 (BabalooSpider; http://www.babaloo.si; spider@babaloo.si)",
+ "BaboomBot/1.x.x (+http://www.baboom.us)",
+ "BaiduImagespider+(+http://www.baidu.jp/search/s308.html)",
+ "BaiDuSpider",
+ "Baiduspider+(+http://help.baidu.jp/system/05.html)",
+ "Baiduspider+(+http://www.baidu.com/search/spider.htm)",
+ "Baiduspider+(+http://www.baidu.com/search/spider_jp.html)",
+ "Balihoo/Nutch-1.0-dev (Crawler for Balihoo.com search engine - obeys robots.txt and robots meta tags ; http://balihoo.com/index.aspx; robot at balihoo dot com)",
+ "BarraHomeCrawler (albertof@barrahome.org)",
+ "bdcindexer_2.6.2 (research@bdc)",
+ "BDFetch",
+ "BDNcentral Crawler v2.3 [en] (http://www.bdncentral.com/robot.html) (X11; I; Linux 2.0.44 i686)",
+ "beautybot/1.0 (+http://www.uchoose.de/crawler/beautybot/)",
+ "BebopBot/2.5.1 ( crawler http://www.apassion4jazz.net/bebopbot.html )",
+ "BigCliqueBOT/1.03-dev (bigclicbot; http://www.bigclique.com; bot@bigclique.com)",
+ "BIGLOTRON (Beta 2;GNU/Linux)",
+ "Bigsearch.ca/Nutch-x.x-dev (Bigsearch.ca Internet Spider; http://www.bigsearch.ca/; info@enhancededge.com)",
+ "BilgiBetaBot/0.8-dev (bilgi.com (Beta) ; http://lucene.apache.org/nutch/bot.html; nutch-agent@lucene.apache.org)",
+ "BilgiBot/1.0(beta) (http://www.bilgi.com/; bilgi at bilgi dot com)",
+ "Bitacle bot/1.1",
+ "Bitacle Robot (V:1.0;) (http://www.bitacle.com)",
+ "BlackWidow",
+ "Blaiz-Bee/1.0 (+http://www.blaiz.net)",
+ "Blaiz-Bee/2.00.8222 (BE Internet Search Engine http://www.rawgrunt.com)",
+ "Blaiz-Bee/2.00.xxxx (+http://www.blaiz.net)",
+ "BlitzBOT@tricus.net",
+ "BlitzBOT@tricus.net (Mozilla compatible)",
+ "BlogBot/1.x",
+ "Bloglines Title Fetch/1.0 (http://www.bloglines.com)",
+ "Bloglines-Images/0.1 (http://www.bloglines.com)",
+ "Bloglines/3.1 (http://www.bloglines.com)",
+ "Blogpulse (info@blogpulse.com)",
+ "BlogPulseLive (support@blogpulse.com)",
+ "BlogSearch/1.x +http://www.icerocket.com/",
+ "blogsearchbot-pumpkin-3",
+ "BlogsNowBot, V 2.01 (+http://www.blogsnow.com/)",
+ "BlogVibeBot-v1.1 (spider@blogvibe.nl)",
+ "blogWatcher_Spider/0.1 (http://www.lr.pi.titech.ac.jp/blogWatcher/)",
+ "BlogzIce/1.0 (+http://icerocket.com; rhodes@icerocket.com)",
+ "BlogzIce/1.0 +http://www.icerocket.com/",
+ "BloobyBot",
+ "Bloodhound/Nutch-0.9 (Testing Crawler for Research - obeys robots.txt and robots meta tags ; http://balihoo.com/index.aspx; robot at balihoo dot com)",
+ "boitho.com-dc/0.xx (http://www.boitho.com/dcbot.html)",
+ "boitho.com-robot/1.x",
+ "boitho.com-robot/1.x (http://www.boitho.com/bot.html)",
+ "BPImageWalker/2.0 (www.bdbrandprotect.com)",
+ "BravoBrian SpiderEngine MarcoPolo",
+ "BruinBot (+http://webarchive.cs.ucla.edu/bruinbot.html) ",
+ "BSDSeek/1.0",
+ "BTbot/0.x (+http://www.btbot.com/btbot.html)",
+ "BuildCMS crawler (http://www.buildcms.com/crawler)",
+ "BullsEye",
+ "bumblebee@relevare.com",
+ "BurstFindCrawler/1.1 (crawler.burstfind.com; http://crawler.burstfind.com; crawler@burstfind.com)",
+ "Buscaplus Robi/1.0 (http://www.buscaplus.com/robi/)",
+ "bwh3_user_agent",
+ "Cabot/Nutch-0.9 (Amfibi's web-crawling robot; http://www.amfibi.com/cabot/; agent@amfibi.com)",
+ "Cabot/Nutch-1.0-dev (Amfibi's web-crawling robot; http://www.amfibi.com/cabot/; agent@amfibi.com)",
+ "carleson/1.0",
+ "Carnegie_Mellon_University_Research_WebBOT-->PLEASE READ-->http://www.andrew.cmu.edu/~brgordon/webbot/index.html http://www.andrew.cmu.edu/~brgordon/webbot/index.html",
+ "Carnegie_Mellon_University_WebCrawler http://www.andrew.cmu.edu/~brgordon/webbot/index.html",
+ "Catall Spider",
+ "CazoodleBot/CazoodleBot-0.1 (CazoodleBot Crawler; http://www.cazoodle.com/cazoodlebot; cazoodlebot@cazoodle.com)",
+ "CCBot/1.0 (+http://www.commoncrawl.org/bot.html)",
+ "ccubee/x.x",
+ "Ceramic Tile Installation Guide (http://www.floorstransformed.com)",
+ "cfetch/1.0",
+ "China Local Browse 2.6",
+ "ChristCRAWLER 2.0",
+ "CipinetBot (http://www.cipinet.com/bot.html)",
+ "ClariaBot/1.0",
+ "Claymont.com",
+ "CloakDetect/0.9 (+http://fulltext.seznam.cz/)",
+ "Clushbot/2.x (+http://www.clush.com/bot.html)",
+ "Clushbot/3.x-BinaryFury (+http://www.clush.com/bot.html)",
+ "Clushbot/3.xx-Ajax (+http://www.clush.com/bot.html)",
+ "Clushbot/3.xx-Hector (+http://www.clush.com/bot.html)",
+ "Clushbot/3.xx-Peleus (+http://www.clush.com/bot.html)",
+ "Cogentbot/1.X (+http://www.cogentsoftwaresolutions.com/bot.html)",
+ "combine/0.0",
+ "Combine/2.0 http://combine.it.lth.se/",
+ "Combine/3 http://combine.it.lth.se/",
+ "Combine/x.0",
+ "cometrics-bot, http://www.cometrics.de",
+ "Computer_and_Automation_Research_Institute_Crawler crawler@ilab.sztaki.hu",
+ "Comrite/0.7.1 (Nutch; http://lucene.apache.org/nutch/bot.html; nutch-agent@lucene.apache.org)",
+ "ContactBot/0.2",
+ "ContentSmartz",
+ "Convera Internet Spider V6.x",
+ "ConveraCrawler/0.2",
+ "ConveraCrawler/0.9d (+http://www.authoritativeweb.com/crawl)",
+ "ConveraMultiMediaCrawler/0.1 (+http://www.authoritativeweb.com/crawl)",
+ "CoolBot",
+ "cosmos/0.8_(robot@xyleme.com)",
+ "cosmos/0.9_(robot@xyleme.com)",
+ "CougarSearch/0.x (+http://www.cougarsearch.com/faq.shtml)",
+ "Covac TexAs Arachbot",
+ "Cowbot-0.1 (NHN Corp. / +82-2-3011-1954 / nhnbot@naver.com)",
+ "Cowbot-0.1.x (NHN Corp. / +82-2-3011-1954 / nhnbot@naver.com)",
+ "CrawlConvera0.1 (CrawlConvera@yahoo.com)",
+ "Crawler (cometsearch@cometsystems.com)",
+ "Crawler admin@crawler.de",
+ "Crawler V 0.2.x admin@crawler.de",
+ "crawler@alexa.com",
+ "CrawlerBoy Pinpoint.com",
+ "Crawllybot/0.1 (Crawllybot; +http://www.crawlly.com; crawler@crawlly.com)",
+ "CreativeCommons/0.06-dev (Nutch; http://www.nutch.org/docs/en/bot.html; nutch-agent@lists.sourceforge.net)",
+ "CrocCrawler vx.3 [en] (http://www.croccrawler.com) (X11; I; Linux 2.0.44 i686)",
+ "csci_b659/0.13",
+ "Cuasarbot/0.9b http://www.cuasar.com/spider_beta/ ",
+ "CurryGuide SiteScan 1.1",
+ "Custom Spider www.bisnisseek.com /1.0",
+ "CyberPatrol SiteCat Webbot (http://www.cyberpatrol.com/cyberpatrolcrawler.asp)",
+ "CydralSpider/1.x (Cydral Web Image Search; http://www.cydral.com)",
+ "CydralSpider/3.0 (Cydral Image Search; http://www.cydral.com)",
+ "DataCha0s/2.0",
+ "DataCha0s/2.0",
+ "DataFountains/DMOZ Downloader",
+ "DataFountains/Dmoz Downloader (http://ivia.ucr.edu/useragents.shtml)",
+ "DataFountains/DMOZ Feature Vector Corpus Creator (http://ivia.ucr.edu/useragents.shtml)",
+ "DataparkSearch/4.47 (+http://dataparksearch.org/bot)",
+ "DataparkSearch/4.xx (http://www.dataparksearch.org/)",
+ "DataSpear/1.0 (Spider; http://www.dataspear.com/spider.html; spider@dataspear.com)",
+ "DataSpearSpiderBot/0.2 (DataSpear Spider Bot; http://dssb.dataspear.com/bot.html; dssb@dataspear.com)",
+ "DatenBot( http://www.sicher-durchs-netz.de/bot.html)",
+ "DaviesBot/1.7 (www.wholeweb.net)",
+ "daypopbot/0.x",
+ "dbDig(http://www.prairielandconsulting.com)",
+ "DBrowse 1.4b",
+ "DBrowse 1.4d",
+ "dCSbot/1.1",
+ "de.searchengine.comBot 1.2 (http://de.searchengine.com/spider)",
+ "deepak-USC/ISI",
+ "DeepIndex",
+ "DeepIndex ( http://www.zetbot.com )",
+ "DeepIndex (www.en.deepindex.com)",
+ "DeepIndexer.ca",
+ "Demo Bot DOT 16b",
+ "Demo Bot Z 16b",
+ "Denmex websearch (http://search.denmex.com)",
+ "dev-spider2.searchpsider.com/1.3b",
+ "DiaGem/1.1 (http://www.skyrocket.gr.jp/diagem.html)",
+ "Diamond/x.0",
+ "DiamondBot",
+ "Digger/1.0 JDK/1.3.0rc3",
+ "DigOut4U",
+ "DIIbot/1.2",
+ "disco/Nutch-0.9 (experimental crawler; www.discoveryengine.com; disco-crawl@discoveryengine.com)",
+ "disco/Nutch-1.0-dev (experimental crawler; www.discoveryengine.com; disco-crawl@discoveryengine.com)",
+ "DittoSpyder",
+ "dloader(NaverRobot)/1.0",
+ "DoCoMo/1.0/Nxxxi/c10",
+ "DoCoMo/1.0/Nxxxi/c10/TB",
+ "DoCoMo/2.0 P900iV(c100;TB;W24H11) ",
+ "DoCoMo/2.0 SH902i (compatible; Y!J-SRD/1.0; http://help.yahoo.co.jp/help/jp/search/indexing/indexing-27.html)",
+ "DoCoMo/2.0/SO502i (compatible; Y!J-SRD/1.0; http://help.yahoo.co.jp/help/jp/search/indexing/indexing-27.html)",
+ "dodgebot/experimental",
+ "Download-Tipp Linkcheck (http://download-tipp.de/)",
+ "Drecombot/1.0 (http://career.drecom.jp/bot.html)",
+ "DSurf15a 01",
+ "DSurf15a 71",
+ "DSurf15a 81",
+ "DSurf15a VA",
+ "dtSearchSpider",
+ "DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)",
+ "Dumbot(version 0.1 beta - dumbfind.com)",
+ "Dumbot(version 0.1 beta - http://www.dumbfind.com/dumbot.html)",
+ "Dumbot(version 0.1 beta)",
+ "e-sense 1.0 ea(www.vigiltech.com/esensedisclaim.html)",
+ "e-SocietyRobot(http://www.yama.info.waseda.ac.jp/~yamana/es/)",
+ "eApolloBot/2.0 (compatible; heritrix/2.0.0-SNAPSHOT-20071024.170148 +http://www.eapollo-opto.com)",
+ "EARTHCOM.info/1.x [www.earthcom.info]",
+ "EARTHCOM.info/1.xbeta [www.earthcom.info]",
+ "EasyDL/3.xx",
+ "EasyDL/3.xx http://keywen.com/Encyclopedia/Bot",
+ "EBrowse 1.4b",
+ "EchO!/2.0",
+ "Educate Search VxB",
+ "egothor/3.0a (+http://www.xdefine.org/robot.html)",
+ "EgotoBot/4.8 (+http://www.egoto.com/about.htm)",
+ "ejupiter.com",
+ "elfbot/1.0 (+http://www.uchoose.de/crawler/elfbot/)",
+ "ELI/20070402:2.0 (DAUM RSS Robot, Daum Communications Corp.; +http://ws.daum.net/aboutkr.html)",
+ "EmailSiphon",
+ "EmailSpider",
+ "EmailWolf 1.00",
+ "EMPAS_ROBOT",
+ "EnaBot/1.x (http://www.enaball.com/crawler.html)",
+ "Enfish Tracker",
+ "Enterprise_Search/1.0",
+ "Enterprise_Search/1.0.xxx",
+ "Enterprise_Search/1.00.xxx;MSSQL (http://www.innerprise.net/es-spider.asp)",
+ "envolk/1.7 (+http://www.envolk.com/envolkspiderinfo.php)",
+ "envolk[ITS]spider/1.6(+http://www.envolk.com/envolkspider.html)",
+ "EroCrawler",
+ "ES.NET_Crawler/2.0 (http://search.innerprise.net/)",
+ "eseek-larbin_2.6.2 (crawler@exactseek.com)",
+ "ESISmartSpider",
+ "eStyleSearch 4 (compatible; MSIE 6.0; Windows NT 5.0)",
+ "ESurf15a 15",
+ "EuripBot/0.x (+http://www.eurip.com) GetFile",
+ "EuripBot/0.x (+http://www.eurip.com) GetRobots",
+ "EuripBot/0.x (+http://www.eurip.com) PreCheck",
+ "Eurobot/1.0 (http://www.ayell.eu)",
+ "EvaalSE - bot@evaal.com",
+ "eventax/1.3 (eventax; http://www.eventax.de/; info@eventax.de)",
+ "Everest-Vulcan Inc./0.1 (R&D project; host=e-1-24; http://everest.vulcan.com/crawlerhelp)",
+ "Everest-Vulcan Inc./0.1 (R&D project; http://everest.vulcan.com/crawlerhelp)",
+ "Exabot-Images/1.0",
+ "Exabot-Test/1.0",
+ "Exabot/2.0",
+ "Exabot/3.0",
+ "ExactSeek Crawler/0.1",
+ "exactseek-crawler-2.63 (crawler@exactseek.com)",
+ "exactseek-pagereaper-2.63 (crawler@exactseek.com)",
+ "exactseek.com",
+ "Exalead NG/MimeLive Client (convert/http/0.120)",
+ "Excalibur Internet Spider V6.5.4",
+ "Execrawl/1.0 (Execrawl; http://www.execrawl.com/; bot@execrawl.com)",
+ "exooba crawler/exooba crawler (crawler for exooba.com; http://www.exooba.com/; info at exooba dot com)",
+ "exooba/exooba crawler (exooba; exooba)",
+ "ExperimentalHenrytheMiragoRobot",
+ "ExtractorPro",
+ "EyeCatcher (Download-tipp.de)/1.0",
+ "Factbot 1.09 (see http://www.factbites.com/webmasters.php)",
+ "factbot : http://www.factbites.com/robots",
+ "Fast Crawler Gold Edition",
+ "FAST Enterprise Crawler 6 (Experimental)",
+ "FAST Enterprise Crawler 6 / Scirus scirus-crawler@fast.no; http://www.scirus.com/srsapp/contactus/",
+ "FAST Enterprise Crawler 6 used by Cobra Development (admin@fastsearch.com)",
+ "FAST Enterprise Crawler 6 used by Comperio AS (sts@comperio.no)",
+ "FAST Enterprise Crawler 6 used by FAST (FAST)",
+ "FAST Enterprise Crawler 6 used by Pages Jaunes (pvincent@pagesjaunes.fr)",
+ "FAST Enterprise Crawler 6 used by Sensis.com.au Web Crawler (search_comments\\at\\sensis\\dot\\com\\dot\\au)",
+ "FAST Enterprise Crawler 6 used by Singapore Press Holdings (crawler@sphsearch.sg)",
+ "FAST Enterprise Crawler/6 (www.fastsearch.com)",
+ "FAST Enterprise Crawler/6.4 (helpdesk at fast.no)",
+ "FAST FirstPage retriever (compatible; MSIE 5.5; Mozilla/4.0)",
+ "FAST MetaWeb Crawler (helpdesk at fastsearch dot com)",
+ "Fast PartnerSite Crawler",
+ "FAST-WebCrawler/2.2.10 (Multimedia Search) (crawler@fast.no; http://www.fast.no/faq/faqfastwebsearch/faqfastwebcrawler.html)",
+ "FAST-WebCrawler/2.2.6 (crawler@fast.no; http://www.fast.no/faq/faqfastwebsearch/faqfastwebcrawler.html)",
+ "FAST-WebCrawler/2.2.7 (crawler@fast.no; http://www.fast.no/faq/faqfastwebsearch/faqfastwebcrawler.html)http://www.fast.no",
+ "FAST-WebCrawler/2.2.8 (crawler@fast.no; http://www.fast.no/faq/faqfastwebsearch/faqfastwebcrawler.html)http://www.fast.no",
+ "FAST-WebCrawler/3.2 test",
+ "FAST-WebCrawler/3.3 (crawler@fast.no; http://fast.no/support.php?c=faqs/crawler)",
+ "FAST-WebCrawler/3.4/Nirvana (crawler@fast.no; http://fast.no/support.php?c=faqs/crawler)",
+ "FAST-WebCrawler/3.4/PartnerSite (crawler@fast.no; http://fast.no/support.php?c=faqs/crawler)",
+ "FAST-WebCrawler/3.5 (atw-crawler at fast dot no; http://fast.no/support.php?c=faqs/crawler)",
+ "FAST-WebCrawler/3.6 (atw-crawler at fast dot no; http://fast.no/support/crawler.asp)",
+ "FAST-WebCrawler/3.6/FirstPage (crawler@fast.no; http://fast.no/support.php?c=faqs/crawler)",
+ "FAST-WebCrawler/3.7 (atw-crawler at fast dot no; http://fast.no/support/crawler.asp)",
+ "FAST-WebCrawler/3.7/FirstPage (atw-crawler at fast dot no;http://fast.no/support/crawler.asp)",
+ "FAST-WebCrawler/3.8 (atw-crawler at fast dot no; http://fast.no/support/crawler.asp)",
+ "FAST-WebCrawler/3.8/Fresh (atw-crawler at fast dot no; http://fast.no/support/crawler.asp)",
+ "FAST-WebCrawler/3.x Multimedia",
+ "FAST-WebCrawler/3.x Multimedia (mm dash crawler at fast dot no)",
+ "fastbot crawler beta 2.0 (+http://www.fastbot.de)",
+ "FastBug http://www.ay-up.com",
+ "FastCrawler 3.0.1 (crawler@1klik.dk)",
+ "FastSearch Web Crawler for Verizon SuperPages (kevin.watters@fastsearch.com)",
+ "Favcollector/2.0 (info@favcollector.com http://www.favcollector.com/)",
+ "favo.eu crawler/0.6 (http://www.favo.eu)",
+ "Faxobot/1.0",
+ "Feed Seeker Bot (RSS Feed Seeker http://www.MyNewFavoriteThing.com/fsb.php)",
+ "Feed24.com",
+ "FeedChecker/0.01",
+ "Feedfetcher-Google; (+http://www.google.com/feedfetcher.html)",
+ "FeedHub FeedDiscovery/1.0 (http://www.feedhub.com)",
+ "FeedHub MetaDataFetcher/1.0 (http://www.feedhub.com)",
+ "Feedjit Favicon Crawler 1.0",
+ "Feedster Crawler/3.0; Feedster, Inc.",
+ "Felix - Mixcat Crawler (+http://mixcat.com)",
+ "FFC Trap Door Spider",
+ "Filtrbox/1.0",
+ "Findexa Crawler (http://www.findexa.no/gulesider/article26548.ece)",
+ "findlinks/x.xxx (+http://wortschatz.uni-leipzig.de/findlinks/) ",
+ "FineBot",
+ "Firefly/1.0",
+ "Firefly/1.0 (compatible; Mozilla 4.0; MSIE 5.5)",
+ "Firefox (kastaneta03@hotmail.com)",
+ "Firefox_1.0.6 (kasparek@naparek.cz)",
+ "FirstGov.gov Search - POC:firstgov.webmasters@gsa.gov",
+ "firstsbot",
+ "Flapbot/0.7.2 (Flaptor Crawler; http://www.flaptor.com; crawler at flaptor period com)",
+ "Flexum spider",
+ "Flexum/2.0",
+ "FlickBot 2.0 RPT-HTTPClient/0.3-3",
+ "flunky",
+ "FnooleBot/2.5.2 (+http://www.fnoole.com/addurl.html)",
+ "FocusedSampler/1.0",
+ "Folkd.com Spider/0.1 beta 1 (www.folkd.com)",
+ "Fooky.com/ScorpionBot/ScoutOut; http://www.fooky.com/scorpionbots",
+ "Francis/1.0 (francis@neomo.de http://www.neomo.de/)",
+ "Franklin Locator 1.8",
+ "FreeFind.com-SiteSearchEngine/1.0 (http://freefind.com; spiderinfo@freefind.com)",
+ "FreshNotes crawler< report problems to crawler-at-freshnotes-dot-com",
+ "FSurf15a 01",
+ "FTB-Bot http://www.findthebest.co.uk/",
+ "Full Web Bot 0416B",
+ "Full Web Bot 0516B",
+ "Full Web Bot 2816B",
+ "FuseBulb.Com",
+ "FyberSpider (+http://www.fybersearch.com/fyberspider.php)",
+ "GAIS Robot/1.0B2",
+ "Gaisbot/3.0 (indexer@gais.cs.ccu.edu.tw; http://gais.cs.ccu.edu.tw/robot.php)",
+ "Gaisbot/3.0+(robot06@gais.cs.ccu.edu.tw;+http://gais.cs.ccu.edu.tw/robot.php)",
+ "GalaxyBot/1.0 (http://www.galaxy.com/galaxybot.html)",
+ "Gallent Search Spider v1.4 Robot 2 (http://robot.GallentSearch.com)",
+ "gamekitbot/1.0 (+http://www.uchoose.de/crawler/gamekitbot/)",
+ "GammaSpider/1.0",
+ "gazz/x.x (gazz@nttrd.com)",
+ "generic_crawler/01.0217/",
+ "genieBot (http://64.5.245.11/faq/faq.html)",
+ "geniebot wgao@genieknows.com",
+ "GeonaBot 1.x; http://www.geona.com/",
+ "gigabaz/3.1x (baz@gigabaz.com; http://gigabaz.com/gigabaz/)",
+ "Gigabot/2.0 (gigablast.com)",
+ "Gigabot/2.0/gigablast.com/spider.html",
+ "Gigabot/2.0; http://www.gigablast.com/spider.html",
+ "Gigabot/2.0att",
+ "Gigabot/3.0 (http://www.gigablast.com/spider.html)",
+ "Gigabot/x.0",
+ "GigabotSiteSearch/2.0 (sitesearch.gigablast.com)",
+ "GNODSPIDER (www.gnod.net)",
+ "Goblin/0.9 (http://www.goguides.org/)",
+ "Goblin/0.9.x (http://www.goguides.org/goblin-info.html)",
+ "GoForIt.com",
+ "GOFORITBOT ( http://www.goforit.com/about/ )",
+ "gonzo1[P] +http://www.suchen.de/popups/faq.jsp",
+ "gonzo2[P] +http://www.suchen.de/faq.html",
+ "Goofer/0.2",
+ "Googlebot-Image/1.0",
+ "Googlebot-Image/1.0 ( http://www.googlebot.com/bot.html)",
+ "Googlebot/2.1 ( http://www.google.com/bot.html)",
+ "Googlebot/2.1 ( http://www.googlebot.com/bot.html)",
+ "Googlebot/Test ( http://www.googlebot.com/bot.html)",
+ "GrapeFX/0.3 libwww/5.4.0",
+ "great-plains-web-spider/flatlandbot (Flatland Industries Web Spider; http://www.flatlandindustries.com/flatlandbot.php; jason@flatlandindustries.com)",
+ "GrigorBot 0.8 (http://www.grigor.biz/bot.html)",
+ "Gromit/1.0",
+ "grub crawler(http://www.grub.org)",
+ "grub-client",
+ "gsa-crawler (Enterprise; GID-01422; jplastiras@google.com)",
+ "gsa-crawler (Enterprise; GID-01742;gsatesting@rediffmail.com)",
+ "gsa-crawler (Enterprise; GIX-02057; dm@enhesa.com)",
+ "gsa-crawler (Enterprise; GIX-03519; cknuetter@stubhub.com)",
+ "gsa-crawler (Enterprise; GIX-0xxxx; enterprise-training@google.com)",
+ "Guestbook Auto Submitter",
+ "Gulliver/1.3",
+ "Gulper Web Bot 0.2.4 (www.ecsl.cs.sunysb.edu/~maxim/cgi-bin/Link/GulperBot)",
+ "Gungho/0.08004 (http://code.google.com/p/gungho-crawler/wiki/Index)",
+ "GurujiBot/1.0 (+http://www.guruji.com/WebmasterFAQ.html)",
+ "GurujiImageBot/1.0 (+http://www.guruji.com/en/WebmasterFAQ.html)",
+ "HappyFunBot/1.1",
+ "Harvest-NG/1.0.2",
+ "Hatena Antenna/0.4 (http://a.hatena.ne.jp/help#robot)",
+ "Hatena Pagetitle Agent/1.0",
+ "Hatena RSS/0.3 (http://r.hatena.ne.jp)",
+ "hbtronix.spider.2 -- http://hbtronix.de/spider.php",
+ "HeinrichderMiragoRobot",
+ "HeinrichderMiragoRobot (http://www.miragorobot.com/scripts/deinfo.asp)",
+ "Helix/1.x ( http://www.sitesearch.ca/helix/)",
+ "HenriLeRobotMirago (http://www.miragorobot.com/scripts/frinfo.asp)",
+ "HenrytheMiragoRobot",
+ "HenryTheMiragoRobot (http://www.miragorobot.com/scripts/mrinfo.asp)",
+ "Hi! I'm CsCrawler my homepage: http://www.kde.cs.uni-kassel.de/lehre/ss2005/googlespam/crawler.html RPT-HTTPClient/0.3-3",
+ "Hippias/0.9 Beta",
+ "HitList",
+ "Hitwise Spider v1.0 http://www.hitwise.com",
+ "holmes/3.11 (http://morfeo.centrum.cz/bot)",
+ "holmes/3.9 (onet.pl)",
+ "holmes/3.xx (OnetSzukaj/5.0; +http://szukaj.onet.pl)",
+ "holmes/x.x",
+ "HolmesBot (http://holmes.ge)",
+ "HomePageSearch(hpsearch.uni-trier.de)",
+ "Homerbot: www.homerweb.com",
+ "Honda-Search/0.7.2 (Nutch; http://lucene.apache.org/nutch/bot.html; search@honda-search.com)",
+ "HooWWWer/2.1.3 (debugging run) (+http://cosco.hiit.fi/search/hoowwwer/ | mailto:crawler-info<at>hiit.fi)",
+ "HooWWWer/2.1.x ( http://cosco.hiit.fi/search/hoowwwer/ | mailto:crawler-info<at>hiit.fi)",
+ "HPL/Nutch-0.9 -",
+ "htdig/3.1.6 (http://computerorgs.com)",
+ "htdig/3.1.6 (unconfigured@htdig.searchengine.maintainer)",
+ "htdig/3.1.x (root@localhost)",
+ "http://Ask.24x.Info/ (http://narres.it/)",
+ "http://hilfe.acont.de/bot.html ACONTBOT",
+ "http://www.almaden.ibm.com/cs/crawler",
+ "http://www.almaden.ibm.com/cs/crawler [rc1.wf.ibm.com]",
+ "http://www.almaden.ibm.com/cs/crawler [wf216]",
+ "http://www.istarthere.com_spider@istarthere.com",
+ "http://www.monogol.de",
+ "http://www.trendtech.dk/spider.asp)",
+ "i1searchbot/2.0 (i1search web crawler; http://www.i1search.com; crawler@i1search.com)",
+ "IAArchiver-1.0",
+ "iaskspider2 (iask@staff.sina.com.cn)",
+ "ia_archiver",
+ "ia_archiver-web.archive.org",
+ "ia_archiver/1.6",
+ "ICC-Crawler(Mozilla-compatible; http://kc.nict.go.jp/icc/crawl.html; icc-crawl(at)ml(dot)nict(dot)go(dot)jp)",
+ "ICC-Crawler(Mozilla-compatible;http://kc.nict.go.jp/icc/crawl.html;icc-crawl-contact(at)ml(dot)nict(dot)go(dot)jp)",
+ "iCCrawler (http://www.iccenter.net)",
+ "ICCrawler - ICjobs (http://www.icjobs.de/bot.htm)",
+ "ichiro/x.0 (http://help.goo.ne.jp/door/crawler.html)",
+ "ichiro/x.0 (ichiro@nttr.co.jp)",
+ "IconSurf/2.0 favicon finder (see http://iconsurf.com/robot.html)",
+ "IconSurf/2.0 favicon monitor (see http://iconsurf.com/robot.html)",
+ "ICRA_label_spider/x.0",
+ "icsbot-0.1",
+ "ideare - SignSite/1.x",
+ "iFeed.jp/2.0 (www.psychedelix.com/agents/agents.rss; 0 subscribers)",
+ "igdeSpyder (compatible; igde.ru; +http://igde.ru/doc/tech.html)",
+ "IIITBOT/1.1 (Indian Language Web Search Engine; http://webkhoj.iiit.net; pvvpr at iiit dot ac dot in)",
+ "ilial/Nutch-0.9 (Ilial, Inc. is a Los Angeles based Internet startup company. For more information please visit http://www.ilial.com/crawler; http://www.ilial.com/crawler; crawl@ilial.com)",
+ "ilial/Nutch-0.9-dev",
+ "IlseBot/1.x",
+ "IlTrovatore-Setaccio ( http://www.iltrovatore.it)",
+ "Iltrovatore-Setaccio/0.3-dev (Indexing; http://www.iltrovatore.it/bot.html; info@iltrovatore.it)",
+ "IlTrovatore-Setaccio/1.2 ( http://www.iltrovatore.it/aiuto/faq.html)",
+ "Iltrovatore-Setaccio/1.2 (It-bot; http://www.iltrovatore.it/bot.html; info@iltrovatore.it)",
+ "iltrovatore-setaccio/1.2-dev (spidering; http://www.iltrovatore.it/aiuto/.....)",
+ "IlTrovatore/1.2 (IlTrovatore; http://www.iltrovatore.it/bot.html; bot@iltrovatore.it)",
+ "ImageWalker/2.0 (www.bdbrandprotect.com)",
+ "IncyWincy data gatherer(webmaster@loopimprovements.com",
+ "IncyWincy page crawler(webmaster@loopimprovements.com",
+ "IncyWincy(http://www.look.com)",
+ "IncyWincy(http://www.loopimprovements.com/robot.html)",
+ "IncyWincy/2.1(loopimprovements.com/robot.html)",
+ "IndexTheWeb.com Crawler7",
+ "Industry Program 1.0.x",
+ "Inet library",
+ "info@pubblisito.com- (http://www.pubblisito.com) il Sud dei Motori di Ricerca",
+ "InfoFly/1.0 (http://www.versions-project.org/)",
+ "INFOMINE/8.0 Adders",
+ "INFOMINE/8.0 RemoteServices",
+ "INFOMINE/8.0 VLCrawler (http://infomine.ucr.edu/useragents)",
+ "InfoNaviRobot(F107)",
+ "InfoSeek Sidewinder/0.9",
+ "InfoSeek Sidewinder/1.0A",
+ "InfoSeek Sidewinder/1.1A",
+ "Infoseek SideWinder/1.45 (Compatible; MSIE 10.0; UNIX)",
+ "Infoseek SideWinder/2.0B (Linux 2.4 i686)",
+ "INGRID/3.0 MT (webcrawler@NOSPAMexperimental.net; http://webmaster.ilse.nl/jsp/webmaster.jsp)",
+ "Inktomi Search",
+ "InnerpriseBot/1.0 (http://www.innerprise.com/)",
+ "Insitor.com search and find world wide!",
+ "Insitornaut",
+ "Internet Ninja x.0",
+ "InternetArchive/0.8-dev(Nutch;http://lucene.apache.org/nutch/bot.html;nutch-agent@lucene.apache",
+ "InternetSeer.com",
+ "IOI/2.0 (ISC Open Index crawler; http://index.isc.org/; bot@index.isc.org)",
+ "IPiumBot laurion(dot)com",
+ "IpselonBot/0.xx-beta (Ipselon; http://www.ipselon.com; ipselonbot@ipselon.com)",
+ "IRLbot/1.0 ( http://irl.cs.tamu.edu/crawler)",
+ "IRLbot/3.0 (compatible; MSIE 6.0; http://irl.cs.tamu.edu/crawler/)",
+ "ISC Systems iRc Search 2.1",
+ "IUPUI Research Bot v 1.9a",
+ "IWAgent/ 1.0 - www.brandprotect.com",
+ "Jabot/6.x (http://odin.ingrid.org/)",
+ "Jabot/7.x.x (http://odin.ingrid.org/)",
+ "Jack",
+ "Jambot/0.1.x (Jambot; http://www.jambot.com/blog; crawler@jambot.com)",
+ "Jambot/0.2.1 (Jambot; http://www.jambot.com/blog/static.php?page=webmaster-robot; crawler@jambot.com)",
+ "Jayde Crawler. http://www.jayde.com",
+ "Jetbot/1.0",
+ "JobSpider_BA/1.1",
+ "Jyxobot/x",
+ "k2spider",
+ "KAIST AITrc Crawler",
+ "KakleBot - www.kakle.com/0.1 (KakleBot - www.kakle.com; http:// www.kakle.com/bot.html; support@kakle.com)",
+ "kalooga/kalooga-4.0-dev-datahouse (Kalooga; http://www.kalooga.com; info@kalooga.com)",
+ "kalooga/KaloogaBot (Kalooga; http://www.kalooga.com/info.html?page=crawler; crawler@kalooga.com)",
+ "Kenjin Spider",
+ "Kevin http://dznet.com/kevin/",
+ "Kevin http://websitealert.net/kevin/",
+ "KE_1.0/2.0 libwww/5.2.8",
+ "KFSW-Bot (Version: 1.01 powered by KFSW www.kfsw.de)",
+ "kinja-imagebot (http://www.kinja.com/)",
+ "kinjabot (http://www.kinja.com)",
+ "KIT-Fireball/2.0",
+ "KIT-Fireball/2.0 (compatible; Mozilla 4.0; MSIE 5.5)",
+ "KnowItAll(knowitall@cs.washington.edu)",
+ "Knowledge.com/0.x",
+ "Krugle/Krugle,Nutch/0.8+ (Krugle web crawler; http://www.krugle.com/crawler/info.html; webcrawler@krugle.com)",
+ "KSbot/1.0 (KnowledgeStorm crawler; http://www.knowledgestorm.com/resources/content/crawler/index.html; crawleradmin@knowledgestorm.com)",
+ "kuloko-bot/0.x",
+ "kulokobot www.kuloko.com kuloko@backweave.com",
+ "kulturarw3/0.1",
+ "LapozzBot/1.4 ( http://robot.lapozz.com)",
+ "LapozzBot/1.5 (+http://robot.lapozz.hu)",
+ "larbin (samualt9@bigfoot.com)",
+ "LARBIN-EXPERIMENTAL (efp@gmx.net)",
+ "larbin_2.1.1 larbin2.1.1@somewhere.com",
+ "larbin_2.2.0 (crawl@compete.com)",
+ "larbin_2.2.1_de_Viennot (Laurent.Viennot@inria.fr)",
+ "larbin_2.2.2 (sugayama@lab7.kuis.kyoto-u.ac.jp)",
+ "larbin_2.2.2_guillaume (guillaume@liafa.jussieu.fr)",
+ "larbin_2.6.0 (larbin2.6.0@unspecified.mail)",
+ "larbin_2.6.1 (larbin2.6.1@unspecified.mail)",
+ "larbin_2.6.2 (hamasaki@grad.nii.ac.jp)",
+ "larbin_2.6.2 (larbin2.6.2@unspecified.mail)",
+ "larbin_2.6.2 (listonATccDOTgatechDOTedu)",
+ "larbin_2.6.2 (pimenas@systems.tuc.gr)",
+ "larbin_2.6.2 (tom@lemurconsulting.com)",
+ "larbin_2.6.2 (vitalbox1@hotmail.com)",
+ "larbin_2.6.3 (ltaa_web_crawler@groupes.epfl.ch)",
+ "larbin_2.6.3 (wgao@genieknows.com)",
+ "larbin_2.6.3_for_(http://cosco.hiit.fi/search/) tsilande@hiit.fi",
+ "larbin_2.6_basileocaml (basile.starynkevitch@cea.fr)",
+ "larbin_devel (http://pauillac.inria.fr/~ailleret/prog/larbin/)",
+ "lawinfo-crawler/Nutch-0.9-dev (Crawler for lawinfo.com pages; http://www.lawinfo.com; webmaster@lawinfo.com)",
+ "LECodeChecker/3.0 libgetdoc/1.0",
+ "LEIA/2.90",
+ "LEIA/3.01pr (LEIAcrawler; [SNIP])",
+ "LetsCrawl.com/1.0 +http://letscrawl.com/",
+ "LexiBot/1.00",
+ "Libby_1.1/libwww-perl/5.47",
+ "LibertyW (+http://www.lw01.com)",
+ "libWeb/clsHTTP -- hiongun@kt.co.kr",
+ "libwww-perl/5.41",
+ "libwww-perl/5.45",
+ "libwww-perl/5.48",
+ "libwww-perl/5.52 FP/2.1",
+ "libwww-perl/5.52 FP/4.0",
+ "libwww-perl/5.65",
+ "libwww-perl/5.800",
+ "libwww/5.3.2",
+ "LijitSpider/Nutch-0.9 (Reports crawler; http://www.lijit.com/; info(a)lijit(d)com)",
+ "Lincoln State Web Browser",
+ "linkbot",
+ "linknzbot",
+ "Links 2.0 (http://gossamer-threads.com/scripts/links/)",
+ "Links SQL (http://gossamer-threads.com/scripts/links-sql/)",
+ "LinkScan/11.0beta2 UnixShareware robot from Elsop.com (used by Indiafocus/Indiainfo)",
+ "LinkScan/9.0g Unix",
+ "LinkScan/x.x Unix",
+ "LiveTrans/Nutch-0.9 (maintainer: cobain at iis dot sinica dot edu dot tw; http://wkd.iis.sinica.edu.tw/LiveTrans/)",
+ "Llaut/1.0 (http://mnm.uib.es/~gallir/llaut/bot.html)",
+ "LMQueueBot/0.2",
+ "lmspider (lmspider@scansoft.com)",
+ "LNSpiderguy",
+ "LocalBot/1.0 ( http://www.localbot.co.uk/)",
+ "LocalcomBot/1.2.x ( http://www.local.com/bot.htm)",
+ "Lockstep Spider/1.0",
+ "Look.com",
+ "Lovel as 1.0 ( +http://www.everatom.com)",
+ "LTI/LemurProject Nutch Spider/Nutch-1.0-dev (lti crawler for CMU; http://www.lti.cs.cmu.edu; changkuk at cmu dot edu)",
+ "LTI/LemurProject Nutch Spider/Nutch-1.0-dev (Research spider using Nutch; http://www.lemurproject.org; mhoy@cs.cmu.edu)",
+ "lwp-trivial/1.32",
+ "lwp-trivial/1.34",
+ "lwp-trivial/1.34",
+ "LWP::Simple/5.22",
+ "LWP::Simple/5.36",
+ "LWP::Simple/5.48",
+ "LWP::Simple/5.50",
+ "LWP::Simple/5.51",
+ "LWP::Simple/5.53",
+ "LWP::Simple/5.63",
+ "LWP::Simple/5.803",
+ "Lycos_Spider_(modspider)",
+ "Lycos_Spider_(T-Rex)",
+ "Lynx/2.8.4rel.1 libwww-FM/2.14 SSL-MM/1.4.1 OpenSSL/0.9.6c (human-guided@lerly.net)",
+ "Mac Finder 1.0.xx",
+ "Mackster( http://www.ukwizz.com )",
+ "Mahiti.Com/Mahiti Crawler-1.0 (Mahiti.Com; http://mahiti.com ; mahiti.com)",
+ "Mail.Ru/1.0",
+ "mailto:webcraft@bea.com",
+ "mammoth/1.0 ( http://www.sli-systems.com/)",
+ "MantraAgent",
+ "MapoftheInternet.com ( http://MapoftheInternet.com)",
+ "Mariner/5.1b [de] (Win95; I ;Kolibri gncwebbot)",
+ "Marketwave Hit List",
+ "Martini",
+ "MARTINI",
+ "Marvin v0.3",
+ "MaSagool/1.0 (MaSagool; http://sagool.jp/; info@sagool.jp)",
+ "MasterSeek",
+ "Mata Hari/2.00 ",
+ "Matrix S.p.A. - FAST Enterprise Crawler 6 (Unknown admin e-mail address)",
+ "maxomobot/dev-20051201 (maxomo; http://67.102.134.34:4047/MAXOMO/MAXOMObot.html; maxomobot@maxomo.com)",
+ "MDbot/1.0 (+http://www.megadownload.net/bot.html)",
+ "MediaCrawler-1.0 (Experimental)",
+ "Mediapartners-Google/2.1 ( http://www.googlebot.com/bot.html)",
+ "MediaSearch/0.1",
+ "MegaSheep v1.0 (www.searchuk.com internet sheep)",
+ "Megite2.0 (http://www.megite.com)",
+ "Mercator-1.x",
+ "Mercator-2.0",
+ "Mercator-Scrub-1.1",
+ "Metaeuro Web Crawler/0.2 (MetaEuro Web Search Clustering Engine; http://www.metaeuro.com; crawler at metaeuro dot com)",
+ "MetaGer-LinkChecker",
+ "MetagerBot/0.8-dev (MetagerBot; http://metager.de; )",
+ "MetaGer_PreChecker0.1",
+ "Metaspinner/0.01 (Metaspinner; http://www.meta-spinner.de/; support@meta-spinner.de/)",
+ "metatagsdir/0.7 (+http://metatagsdir.com/directory/)",
+ "MFC Foundation Class Library 4.0",
+ "MicroBaz",
+ "Microsoft Small Business Indexer",
+ "Microsoft URL Control - 6.00.8xxx",
+ "MicrosoftPrototypeCrawler (How's my crawling? mailto:newbiecrawler@hotmail.com)",
+ "Missauga Locate 1.0.0",
+ "Missigua Locator 1.9",
+ "Missouri College Browse",
+ "Misterbot-Nutch/0.7.1 (Misterbot-Nutch; http://www.misterbot.fr; admin@misterbot.fr)",
+ "Miva (AlgoFeedback@miva.com)",
+ "Mizzu Labs 2.2",
+ "MJ12bot/vx.x.x (http://majestic12.co.uk/bot.php?+)",
+ "MJ12bot/vx.x.x (http://www.majestic12.co.uk/projects/dsearch/mj12bot.php)",
+ "MJBot (SEO assessment)",
+ "MLBot (www.metadatalabs.com)",
+ "MnogoSearch/3.2.xx",
+ "Mo College 1.9",
+ "moget/x.x (moget@goo.ne.jp)",
+ "mogimogi/1.0",
+ "MojeekBot/0.x (archi; http://www.mojeek.com/bot.html)",
+ "Morris - Mixcat Crawler ( http://mixcat.com)",
+ "Mouse-House/7.4 (spider_monkey spider info at www.mobrien.com/sm.shtml)",
+ "mozDex/0.xx-dev (mozDex; http://www.mozdex.com/en/bot.html; spider@mozdex.com)",
+ "Mozilla (Mozilla@somewhere.com)",
+ "Mozilla 4.0(compatible; BotSeer/1.0; +http://botseer.ist.psu.edu)",
+ "Mozilla/2.0 (compatible; Ask Jeeves)",
+ "Mozilla/2.0 (compatible; Ask Jeeves/Teoma)",
+ "Mozilla/2.0 (compatible; Ask Jeeves/Teoma; http://about.ask.com/en/docs/about/webmasters.shtml) ",
+ "Mozilla/2.0 (compatible; Ask Jeeves/Teoma; http://sp.ask.com/docs/about/tech_crawling.html)",
+ "Mozilla/2.0 (compatible; EZResult -- Internet Search Engine)",
+ "Mozilla/2.0 (compatible; NEWT ActiveX; Win32)",
+ "Mozilla/2.0 (compatible; T-H-U-N-D-E-R-S-T-O-N-E)",
+ "Mozilla/3.0 (compatible; Fluffy the spider; http://www.searchhippo.com/; info@searchhippo.com)",
+ "Mozilla/3.0 (compatible; Indy Library)",
+ "Mozilla/3.0 (compatible; MuscatFerret/1.5.4; claude@euroferret.com)",
+ "Mozilla/3.0 (compatible; MuscatFerret/1.5; olly@muscat.co.uk)",
+ "Mozilla/3.0 (compatible; MuscatFerret/1.6.x; claude@euroferret.com)",
+ "Mozilla/3.0 (compatible; scan4mail (advanced version) http://www.peterspages.net/?scan4mail)",
+ "Mozilla/3.0 (compatible; ScollSpider; http://www.webwobot.com)",
+ "Mozilla/3.0 (compatible; Webinator-DEV01.home.iprospect.com/2.56)",
+ "Mozilla/3.0 (compatible; Webinator-indexer.cyberalert.com/2.56)",
+ "Mozilla/3.0 (INGRID/3.0 MT; webcrawler@NOSPAMexperimental.net; http://aanmelden.ilse.nl/?aanmeld_mode=webhints)",
+ "Mozilla/3.0 (Slurp.so/Goo; slurp@inktomi.com; http://www.inktomi.com/slurp.html)",
+ "Mozilla/3.0 (Slurp/cat; slurp@inktomi.com; http://www.inktomi.com/slurp.html)",
+ "Mozilla/3.0 (Slurp/si; slurp@inktomi.com; http://www.inktomi.com/slurp.html)",
+ "Mozilla/3.0 (Vagabondo/1.1 MT; webcrawler@NOSPAMwise-guys.nl; http://webagent.wise-guys.nl/)",
+ "Mozilla/3.0 (Vagabondo/1.x MT; webagent@wise-guys.nl; http://webagent.wise-guys.nl/)",
+ "Mozilla/3.0 (Vagabondo/2.0 MT; webcrawler@NOSPAMexperimental.net; http://aanmelden.ilse.nl/?aanmeld_mode=webhints)",
+ "Mozilla/3.0 (Vagabondo/2.0 MT; webcrawler@NOSPAMwise-guys.nl; http://webagent.wise-guys.nl/)",
+ "Mozilla/3.01 (Compatible; Links2Go Similarity Engine)",
+ "Mozilla/4.0",
+ "Mozilla/4.0 (agadine3.0) www.agada.de",
+ "Mozilla/4.0 (compatible: AstraSpider V.2.1 : astrafind.com)",
+ "Mozilla/4.0 (compatible; Vagabondo/2.2; webcrawler at wise-guys dot nl; http://webagent.wise-guys.nl/)",
+ "Mozilla/4.0 (compatible; Vagabondo/4.0Beta; webcrawler at wise-guys dot nl; http://webagent.wise-guys.nl/)",
+ "Mozilla/4.0 (compatible; Advanced Email Extractor v2.xx)",
+ "Mozilla/4.0 (compatible; B_L_I_T_Z_B_O_T)",
+ "Mozilla/4.0 (compatible; ChristCrawler.com ChristCrawler@ChristCENTRAL.com)",
+ "Mozilla/4.0 (compatible; crawlx, crawler@trd.overture.com)",
+ "Mozilla/4.0 (compatible; DAUMOA-video; +http://ws.daum.net/aboutkr.html)",
+ "Mozilla/4.0 (compatible; FastCrawler3 support-fastcrawler3@fast.no)",
+ "Mozilla/4.0 (compatible; FDSE robot)",
+ "Mozilla/4.0 (compatible; GPU p2p crawler http://gpu.sourceforge.net/search_engine.php)",
+ "Mozilla/4.0 (compatible; grub-client-0.2.x; Crawl your stuff with http://grub.org)",
+ "Mozilla/4.0 (compatible; grub-client-0.3.x; Crawl your own stuff with http://grub.org)",
+ "Mozilla/4.0 (compatible; grub-client-2.x)",
+ "Mozilla/4.0 (compatible; Iplexx Spider/1.0 http://www.iplexx.at)",
+ "Mozilla/4.0 (compatible; MSIE 4.01; Vonna.com b o t)",
+ "Mozilla/4.0 (compatible; MSIE 4.01; Windows CE; PPC; 240x320; SPV M700; OpVer 19.123.2.733) OrangeBot-Mobile 2008.0 (mobilesearch.support@orange-ftgroup.com)",
+ "Mozilla/4.0 (compatible; MSIE 4.0; Windows NT; Site Server 3.0 Robot) Indonesia Interactive",
+ "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0) (samualt9@bigfoot.com)",
+ "Mozilla/4.0 (compatible; MSIE 5.0; NetNose-Crawler 2.0; A New Search Experience: http://www.netnose.com)",
+ "Mozilla/4.0 (compatible; MSIE 5.0; Windows 95) TrueRobot; 1.5",
+ "Mozilla/4.0 (compatible; MSIE 5.0; Windows 95) VoilaBot BETA 1.2 (http://www.voila.com/)",
+ "Mozilla/4.0 (compatible; MSIE 5.0; Windows 95) VoilaBot; 1.6",
+ "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt; DTS Agent",
+ "Mozilla/4.0 (compatible; MSIE 5.0; www.galaxy.com; www.psychedelix.com)",
+ "Mozilla/4.0 (compatible; MSIE 5.0; www.galaxy.com; www.psychedelix.com/; http://www.galaxy.com/info/crawler.html)",
+ "Mozilla/4.0 (compatible; MSIE 5.0; YANDEX)",
+ "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 4.0; obot)",
+ "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 4.0; QXW03018)",
+ "Mozilla/4.0 (compatible; MSIE 6.0 compatible; Asterias Crawler v4; +http://www.singingfish.com/help/spider.html; webmaster@singingfish.com); SpiderThread Revision: 3.10",
+ "Mozilla/4.0 (compatible; MSIE 6.0; MSIE 5.5; Windows NT 5.1) Skampy/0.9.x [en]",
+ "Mozilla/4.0 (compatible; MSIE 6.0; TargetSeek/1.0; +http://www.targetgroups.net/TargetSeek.html)",
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; ODP entries t_st; http://tuezilla.de/t_st-odp-entries-agent.html)",
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; ODP links test; http://tuezilla.de/test-odp-links-agent.html)",
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; ZoomSpider.net bot; .NET CLR 1.1.4322)",
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; heritrix/1.3.0 http://www.cs.washington.edu/research/networking/websys/)",
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; QihooBot 1.0 qihoobot@qihoo.net)",
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT; MS Search 4.0 Robot)",
+ "Mozilla/4.0 (compatible; MSIE enviable; DAUMOA 2.0; DAUM Web Robot; Daum Communications Corp., Korea; +http://ws.daum.net/aboutkr.html)",
+ "Mozilla/4.0 (compatible; MSIE is not me; DAUMOA/1.0.1; DAUM Web Robot; Daum Communications Corp., Korea)",
+ "Mozilla/4.0 (compatible; NaverBot/1.0; http://help.naver.com/delete_main.asp)",
+ "Mozilla/4.0 (compatible; SpeedySpider; www.entireweb.com)",
+ "Mozilla/4.0 (compatible; www.galaxy.com)",
+ "Mozilla/4.0 (compatible; Y!J; for robot study; keyoshid)",
+ "Mozilla/4.0 (compatible; Yahoo Japan; for robot study; kasugiya)",
+ "Mozilla/4.0 (JemmaTheTourist;http://www.activtourist.com)",
+ "Mozilla/4.0 (MobilePhone SCP-5500/US/1.0) NetFront/3.0 MMP/2.0 (compatible; Googlebot/2.1; http://www.google.com/bot.html)",
+ "Mozilla/4.0 (MobilePhone SCP-5500/US/1.0) NetFront/3.0 MMP/2.0 FAKE (compatible; Googlebot/2.1; http://www.google.com/bot.html)",
+ "Mozilla/4.0 (Mozilla; http://www.mozilla.org/docs/en/bot.html; master@mozilla.com)",
+ "Mozilla/4.0 (Sleek Spider/1.2)",
+ "Mozilla/4.0 compatible FurlBot/Furl Search 2.0 (FurlBot; http://www.furl.net; wn.furlbot@looksmart.net)",
+ "Mozilla/4.0 compatible ZyBorg/1.0 (wn.zyborg@looksmart.net; http://www.WISEnutbot.com)",
+ "Mozilla/4.0 compatible ZyBorg/1.0 (ZyBorg@WISEnutbot.com; http://www.WISEnutbot.com)",
+ "Mozilla/4.0 compatible ZyBorg/1.0 Dead Link Checker (wn.zyborg@looksmart.net; http://www.WISEnutbot.com)",
+ "Mozilla/4.0 compatible ZyBorg/1.0 for Homepage (ZyBorg@WISEnutbot.com; http://www.WISEnutbot.com)",
+ "Mozilla/4.0 efp@gmx.net",
+ "Mozilla/4.0 [en] (Ask Jeeves Corporate Spider)",
+ "Mozilla/4.0(compatible; Zealbot 1.0)",
+ "Mozilla/4.04 (compatible; Dulance bot; +http://www.dulance.com/bot.jsp)",
+ "Mozilla/4.0_(compatible;_MSIE_5.0;_Windows_95)_TrueRobot/1.4 libwww/5.2.8",
+ "Mozilla/4.0_(compatible;_MSIE_5.0;_Windows_95)_VoilaBot/1.6 libwww/5.3.2",
+ "Mozilla/4.6 [en] (http://www.cnet.com/)",
+ "Mozilla/4.7",
+ "Mozilla/4.7 (compatible; http://eidetica.com/spider)",
+ "Mozilla/4.7 (compatible; Intelliseek; http://www.intelliseek.com)",
+ "Mozilla/4.7 (compatible; Whizbang)",
+ "Mozilla/4.7 (compatible; WhizBang; http://www.whizbang.com/crawler)",
+ "Mozilla/4.7 [en](BecomeBot@exava.com)",
+ "Mozilla/4.7 [en](Exabot@exava.com)",
+ "Mozilla/4.72 [en] (BACS http://www.ba.be)",
+ "Mozilla/5.0",
+ "Mozilla/5.0 (+http://www.eurekster.com/mammoth) Mammoth/0.1",
+ "Mozilla/5.0 (+http://www.sli-systems.com/) Mammoth/0.1",
+ "Mozilla/5.0 (Clustered-Search-Bot/1.0; support@clush.com; http://www.clush.com/)",
+ "Mozilla/5.0 (compatible; +http://www.evri.com/evrinid)",
+ "Mozilla/5.0 (compatible; 008/0.83; http://www.80legs.com/spider.html;) Gecko/2008032620",
+ "Mozilla/5.0 (compatible; Abonti/0.8 - http://www.abonti.com)",
+ "Mozilla/5.0 (compatible; aiHitBot/1.0; +http://www.aihit.com/)",
+ "Mozilla/5.0 (compatible; AnsearchBot/1.x; +http://www.ansearch.com.au/)",
+ "Mozilla/5.0 (compatible; archive.org_bot/1.10.0 +http://www.loc.gov/minerva/crawl.html)",
+ "Mozilla/5.0 (compatible; archive.org_bot/1.13.1x http://crawler.archive.org)",
+ "Mozilla/5.0 (compatible; archive.org_bot/1.5.0-200506132127 http://crawler.archive.org) Hurricane Katrina",
+ "Mozilla/5.0 (compatible; Ask Jeeves/Teoma; http://about.ask.com/en/docs/about/webmasters.shtml)",
+ "Mozilla/5.0 (compatible; BecomeBot/1.23; http://www.become.com/webmasters.html)",
+ "Mozilla/5.0 (compatible; BecomeBot/1.xx; MSIE 6.0 compatible; http://www.become.com/webmasters.html)",
+ "Mozilla/5.0 (compatible; BecomeBot/2.0beta; http://www.become.com/webmasters.html)",
+ "Mozilla/5.0 (compatible; BecomeBot/2.x; MSIE 6.0 compatible; http://www.become.com/site_owners.html)",
+ "Mozilla/5.0 (compatible; BecomeJPBot/2.3; MSIE 6.0 compatible; +http://www.become.co.jp/site_owners.html)",
+ "Mozilla/5.0 (compatible; BlogRefsBot/0.1; http://www.blogrefs.com/about/bloggers)",
+ "Mozilla/5.0 (compatible; Bot; +http://pressemitteilung.ws/spamfilter",
+ "Mozilla/5.0 (compatible; BuzzRankingBot/1.0; +http://www.buzzrankingbot.com/)",
+ "Mozilla/5.0 (compatible; Charlotte/1.0b; charlotte@betaspider.com)",
+ "Mozilla/5.0 (compatible; Charlotte/1.0b; http://www.searchme.com/support/)",
+ "Mozilla/5.0 (compatible; Crawling jpeg; http://www.yama.info.waseda.ac.jp)",
+ "Mozilla/5.0 (compatible; de/1.13.2 +http://www.de.com)",
+ "Mozilla/5.0 (compatible; Diffbot/0.1; +http://www.diffbot.com)",
+ "Mozilla/5.0 (compatible; DNS-Digger-Explorer/1.0; +http://www.dnsdigger.com)",
+ "Mozilla/5.0 (compatible; DNS-Digger/1.0; +http://www.dnsdigger.com)",
+ "Mozilla/5.0 (compatible; EARTHCOM.info/2.01; http://www.earthcom.info)",
+ "Mozilla/5.0 (compatible; EARTHCOM/2.2; +http://enter4u.eu)",
+ "Mozilla/5.0 (compatible; Exabot Test/3.0; +http://www.exabot.com/go/robot)",
+ "Mozilla/5.0 (compatible; FatBot 2.0; http://www.thefind.com/main/CrawlerFAQs.fhtml)",
+ "Mozilla/5.0 (compatible; Galbot/1.0; +http://www.galbot.com/bot.html)",
+ "mozilla/5.0 (compatible; genevabot http://www.healthdash.com)",
+ "Mozilla/5.0 (compatible; Googlebot/2.1; http://www.google.com/bot.html)",
+ "mozilla/5.0 (compatible; heritrix/1.0.4 http://innovationblog.com)",
+ "Mozilla/5.0 (compatible; heritrix/1.10.2 +http://i.stanford.edu/)",
+ "Mozilla/5.0 (compatible; heritrix/1.12.1 +http://newstin.com/)",
+ "Mozilla/5.0 (compatible; heritrix/1.12.1 +http://www.page-store.com)",
+ "Mozilla/5.0 (compatible; heritrix/1.12.1 +http://www.page-store.com) [email:paul@page-store.com]",
+ "mozilla/5.0 (compatible; heritrix/1.3.0 http://archive.crawler.org)",
+ "Mozilla/5.0 (compatible; heritrix/1.4.0 +http://www.chepi.net)",
+ "Mozilla/5.0 (compatible; heritrix/1.4t http://www.truveo.com/)",
+ "Mozilla/5.0 (compatible; heritrix/1.5.0 http://www.l3s.de/~kohlschuetter/projects/crawling/)",
+ "Mozilla/5.0 (compatible; heritrix/1.5.0-200506231921 http://pandora.nla.gov.au/crawl.html)",
+ "Mozilla/5.0 (compatible; heritrix/1.6.0 http://www.worio.com/)",
+ "Mozilla/5.0 (compatible; heritrix/1.7.0 +http://www.greaterera.com/)",
+ "Mozilla/5.0 (compatible; heritrix/1.x.x +http://www.accelobot.com)",
+ "Mozilla/5.0 (compatible; heritrix/2.0.0-RC1 +http://www.aol.com)",
+ "Mozilla/5.0 (compatible; Hermit Search. Com; +http://www.hermitsearch.com)",
+ "Mozilla/5.0 (compatible; HyperixScoop/1.3; +http://www.hyperix.com)",
+ "Mozilla/5.0 (compatible; IDBot/1.0; +http://www.id-search.org/bot.html)",
+ "Mozilla/5.0 (compatible; InterseekWeb/3.x)",
+ "Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Exabot-Thumbnails)",
+ "Mozilla/5.0 (compatible; LemSpider 0.1)",
+ "Mozilla/5.0 (compatible; MojeekBot/2.0; http://www.mojeek.com/bot.html)",
+ "Mozilla/5.0 (compatible; MSIE 6.0; Podtech Network; crawler_admin@podtech.net)",
+ "Mozilla/5.0 (compatible; OnetSzukaj/5.0; http://szukaj.onet.pl)",
+ "Mozilla/5.0 (compatible; PalmeraBot; http://www.links24h.com/help/palmera) Version 0.001",
+ "Mozilla/5.0 (compatible; pogodak.ba/3.x)",
+ "Mozilla/5.0 (compatible; Pogodak.hr/3.1)",
+ "Mozilla/5.0 (compatible; PWeBot/3.1; http://www.programacionweb.net/robot.php)",
+ "Mozilla/5.0 (compatible; Quantcastbot/1.0; www.quantcast.com)",
+ "Mozilla/5.0 (compatible; ScoutJet; +http://www.scoutjet.com/)",
+ "Mozilla/5.0 (compatible; Scrubby/2.2; http://www.scrubtheweb.com/)",
+ "Mozilla/5.0 (compatible; ShunixBot/1.x.x +http://www.shunix.com/robot.htm)",
+ "Mozilla/5.0 (compatible; ShunixBot/1.x; http://www.shunix.com/bot.htm)",
+ "Mozilla/5.0 (compatible; SkreemRBot +http://skreemr.com)",
+ "Mozilla/5.0 (compatible; SummizeBot +http://www.summize.com)",
+ "Mozilla/5.0 (compatible; Synoobot/0.9; http://www.synoo.com/search/bot.html)",
+ "Mozilla/5.0 (compatible; Theophrastus/x.x; http://users.cs.cf.ac.uk/N.A.Smith/theophrastus.php)",
+ "Mozilla/5.0 (compatible; TridentSpider/3.1)",
+ "Mozilla/5.0 (compatible; Vagabondo/2.1; webcrawler at wise-guys dot nl; http://webagent.wise-guys.nl/)",
+ "Mozilla/5.0 (compatible; Webduniabot/1.0; +http://search.webdunia.com/bot.aspx)",
+ "Mozilla/5.0 (compatible; worio bot heritrix/1.10.0 +http://worio.com)",
+ "Mozilla/5.0 (compatible; WoW Lemmings Kathune/2.0;http://www.wowlemmings.com/kathune.html)",
+ "Mozilla/5.0 (compatible; Yahoo! DE Slurp; http://help.yahoo.com/help/us/ysearch/slurp)",
+ "Mozilla/5.0 (compatible; Yahoo! Slurp China; http://misc.yahoo.com.cn/help.html)",
+ "Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)",
+ "Mozilla/5.0 (compatible; Yoono; http://www.yoono.com/)",
+ "Mozilla/5.0 (compatible; YoudaoBot/1.0; http://www.youdao.com/help/webmaster/spider/; )",
+ "Mozilla/5.0 (compatible; Zenbot/1.3; +http://zen.co.za/webmasters/)",
+ "Mozilla/5.0 (compatible; zermelo +http://www.powerset.com) [email:paul@page-store.com,crawl@powerset.com]",
+ "Mozilla/5.0 (compatible;archive.org_bot/1.7.1; collectionId=316; Archive-It; +http://www.archive-it.org)",
+ "Mozilla/5.0 (compatible;archive.org_bot/heritrix-1.9.0-200608171144 +http://pandora.nla.gov.au/crawl.html)",
+ "Mozilla/5.0 (compatible;MAINSEEK_BOT)",
+ "Mozilla/5.0 (Slurp/cat; slurp@inktomi.com; http://www.inktomi.com/slurp.html)",
+ "Mozilla/5.0 (Slurp/si; slurp@inktomi.com; http://www.inktomi.com/slurp.html)",
+ "Mozilla/5.0 (Twiceler-0.9 http://www.cuill.com/twiceler/robot.html)",
+ "Mozilla/5.0 (Version: xxxx Type:xx)",
+ "Mozilla/5.0 (wgao@genieknows.com)",
+ "Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.7.7) NimbleCrawler 1.11 obeys UserAgent NimbleCrawler For problems contact: crawler_at_dataalchemy.com",
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.8.1) VoilaBot BETA 1.2 (support.voilabot@orange-ftgroup.com)",
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.8.1) VoilaBot BETA 1.2 (support.voilabot@orange-ftgroup.com)",
+ "Mozilla/5.0 (Windows;) NimbleCrawler 1.12 obeys UserAgent NimbleCrawler For problems contact: crawler@health",
+ "Mozilla/5.0 (Windows;) NimbleCrawler 1.12 obeys UserAgent NimbleCrawler For problems contact: crawler@healthline.com",
+ "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2.1; aggregator:Spinn3r (Spinn3r 3.1); http://spinn3r.com/robot) Gecko/20021130",
+ "Mozilla/5.0 URL-Spider",
+ "Mozilla/5.0 usww.com-Spider-for-w8.net",
+ "Mozilla/5.0 wgao@genieknows.com",
+ "Mozilla/5.0 [en] (compatible; Gulper Web Bot 0.2.4 www.ecsl.cs.sunysb.edu/~maxim/cgi-bin/Link/GulperBot)",
+ "MQbot metaquerier.cs.uiuc.edu/crawler",
+ "MQBOT/Nutch-0.9-dev (MQBOT Nutch Crawler; http://falcon.cs.uiuc.edu; mqbot@cs.uiuc.edu)",
+ "msnbot-media/1.0 (+http://search.msn.com/msnbot.htm)",
+ "msnbot-Products/1.0 (+http://search.msn.com/msnbot.htm)",
+ "MSNBOT/0.xx (http://search.msn.com/msnbot.htm)",
+ "msnbot/x.xx ( http://search.msn.com/msnbot.htm)",
+ "MSNBOT_Mobile MSMOBOT Mozilla/2.0 (compatible; MSIE 4.02; Windows CE; Default)",
+ "MSNPTC/1.0",
+ "MSRBOT (http://research.microsoft.com/research/sv/msrbot)",
+ "multicrawler ( http://sw.deri.org/2006/04/multicrawler/robots.html)",
+ "MultiText/0.1",
+ "MusicWalker2.0 ( http://www.somusical.com)",
+ "MVAClient",
+ "Mylinea.com Crawler 2.0",
+ "Naamah 1.0.1/Blogbot (http://blogbot.de/)",
+ "Naamah 1.0a/Blogbot (http://blogbot.de/)",
+ "NABOT/5.0",
+ "nabot_1.0",
+ "NameOfAgent (CMS Spider)",
+ "NASA Search 1.0",
+ "NationalDirectory-WebSpider/1.3",
+ "NationalDirectoryAddURL/1.0",
+ "NaverBot-1.0 (NHN Corp. / +82-2-3011-1954 / nhnbot@naver.com)",
+ "NaverBot_dloader/1.5",
+ "NavissoBot",
+ "NavissoBot/1.7 (+http://navisso.com/)",
+ "NCSA Beta 1 (http://vias.ncsa.uiuc.edu/viasarchivinginformation.html)",
+ "Nebullabot/2.2 (http://bot.nebulla.info)",
+ "NEC Research Agent -- compuman at research.nj.nec.com",
+ "Net-Seekr Bot/Net-Seekr Bot V1 (http://www.net-seekr.com)",
+ "NetinfoBot/1.0 (http://netinfo.bg/netinfobot.html)",
+ "NetLookout/2.24",
+ "Netluchs/0.8-dev ( ; http://www.netluchs.de/; ___don't___spam_me_@netluchs.de)",
+ "NetNoseCrawler/v1.0",
+ "Netprospector JavaCrawler",
+ "NetResearchServer(http://www.look.com)",
+ "NetResearchServer/x.x(loopimprovements.com/robot.html)",
+ "NetSeer/Nutch-0.9 (NetSeer Crawler; http://www.netseer.com; crawler@netseer.com)",
+ "NetSprint -- 2.0",
+ "NetWhatCrawler/0.06-dev (NetWhatCrawler from NetWhat.com; http://www.netwhat.com; support@netwhat.com)",
+ "NetZippy",
+ "NextGenSearchBot 1 (for information visit http://www.eliyon.com/NextGenSearchBot)",
+ "NextopiaBOT (+http://www.nextopia.com) distributed crawler client beta v0.x",
+ "NG-Search/0.90 (NG-SearchBot; http://www.ng-search.com; )",
+ "NG/1.0",
+ "NG/4.0.1229",
+ "NITLE Blog Spider/0.01",
+ "Noago Spider",
+ "Nokia-WAPToolkit/1.2 googlebot(at)googlebot.com",
+ "Nokia6610/1.0 (3.09) Profile/MIDP-1.0 Configuration/CLDC-1.0 (compatible;YahooSeeker/M1A1-R2D2; http://help.yahoo.com/help/us/ysearch/crawling/crawling-01.html)",
+ "NokodoBot/1.x (+http://nokodo.com/bot.htm)",
+ "Norbert the Spider(Burf.com)",
+ "noxtrumbot/1.0 (crawler@noxtrum.com)",
+ "noyona_0_1",
+ "NP/0.1 (NP; http://www.nameprotect.com; npbot@nameprotect.com)",
+ "NPBot (http://www.nameprotect.com/botinfo.html)",
+ "NPBot-1/2.0",
+ "Nsauditor/1.x",
+ "nsyght.com/Nutch-1.0-dev (nsyght.com; Nsyght.com)",
+ "nsyght.com/Nutch-x.x (nsyght.com; search.nsyght.com)",
+ "nttdirectory_robot/0.9 (super-robot@super.navi.ocn.ne.jp)",
+ "nuSearch Spider <a href='http://www.nusearch.com'>www.nusearch.com</a> (compatible; MSIE 4.01)",
+ "NuSearch Spider (compatible; MSIE 6.0)",
+ "NuSearch Spider www.nusearch.com",
+ "Nutch",
+ "Nutch crawler/Nutch-0.9 (picapage.com; admin@picapage.com)",
+ "Nutch/Nutch-0.9 (Eurobot; http://www.ayell.eu )",
+ "NutchCVS/0.0x-dev (Nutch; http://www.nutch.org/docs/bot.html; nutch-agent@lists.sourceforge.net)",
+ "NutchCVS/0.7.1 (Nutch running at UW; http://www.nutch.org/docs/en/bot.html; sycrawl@cs.washington.edu)",
+ "NutchEC2Test/Nutch-0.9-dev (Testing Nutch on Amazon EC2.; http://lucene.apache.org/nutch/bot.html; ec2test at lucene.com)",
+ "NutchOrg/0.0x-dev (Nutch; http://www.nutch.org/docs/bot.html; nutch-agent@lists.sourceforge.net)",
+ "nutchsearch/Nutch-0.9 (Nutch Search 1.0; herceg_novi at yahoo dot com)",
+ "NutchVinegarCrawl/Nutch-0.8.1 (Vinegar; http://www.cs.washington.edu; eytanadar at gmail dot com)",
+ "obidos-bot (just looking for books.)",
+ "ObjectsSearch/0.01-dev (ObjectsSearch;http://www.ObjectsSearch.com/bot.html; support@thesoftwareobjects.com)",
+ "ObjectsSearch/0.0x (ObjectsSearch; http://www.ObjectsSearch.com/bot.html; support@thesoftwareobjects.com)",
+ "oBot ((compatible;Win32))",
+ "Ocelli/1.x (http://www.globalspec.com/Ocelli)",
+ "Octora Beta - www.octora.com",
+ "Octora Beta Bot - www.octora.com",
+ "OmniExplorer_Bot/1.0x (+http://www.omni-explorer.com) Internet CategorizerOmniExplorer http://www.omni-explorer.com/ car & shopping search (64.62.175.xxx)",
+ "OmniExplorer_Bot/1.0x (+http://www.omni-explorer.com) Job Crawler",
+ "OmniExplorer_Bot/1.1x (+http://www.omni-explorer.com) Torrent Crawler",
+ "OmniExplorer_Bot/x.xx (+http://www.omni-explorer.com) WorldIndexer",
+ "Onet.pl SA- http://szukaj.onet.pl",
+ "OntoSpider/1.0 libwww-perl/5.65",
+ "OOZBOT/0.20 ( http://www.setooz.com/oozbot.html ; agentname at setooz dot_com )",
+ "OpenAcoon v4.0.x (www.openacoon.de)",
+ "Openbot/3.0+(robot-response@openfind.com.tw;+http://www.openfind.com.tw/robot.html)",
+ "Openfind data gatherer- Openbot/3.0+(robot-response@openfind.com.tw;+http://www.openfind.com.tw/robot.html)",
+ "Openfind Robot/1.1A2",
+ "OpenISearch/1.x (www.openisearch.com)",
+ "OpenTaggerBot (http://www.opentagger.com/opentaggerbot.htm)",
+ "OpenTextSiteCrawler/2.9.2",
+ "OpenWebSpider/0.x.x (http://www.openwebspider.org)",
+ "OpenWebSpider/x",
+ "OpidooBOT (larbin2.6.3@unspecified.mail)",
+ "Oracle Ultra Search",
+ "OrangeSpider",
+ "Orbiter/T-2.0 (+http://www.dailyorbit.com/bot.htm)",
+ "Overture-WebCrawler/3.8/Fresh (atw-crawler at fast dot no; http://fast.no/support/crawler.asp)",
+ "ozelot/2.7.3 (Search engine indexer; www.flying-cat.de/ozelot; ozelot@flying-cat.de)",
+ "PADLibrary Spider",
+ "PageBitesHyperBot/600 (http://www.pagebites.com/)",
+ "Pagebull http://www.pagebull.com/",
+ "page_verifier (http://www.securecomputing.com/goto/pv)",
+ "parallelContextFocusCrawler1.1parallelContextFocusCrawler1.1",
+ "ParaSite/1.0b (http://www.ianett.com/parasite/)",
+ "Patwebbot (http://www.herz-power.de/technik.html)",
+ "PBrowse 1.4b",
+ "pd02_1.0.0 pd02_1.0.0@dzimi@post.sk",
+ "PEERbot www.peerbot.com",
+ "PEval 1.4b",
+ "PicoSearch/1.0",
+ "Piffany_Web_Scraper_v0.x",
+ "Piffany_Web_Spider_v0.x",
+ "pipeLiner/0.3a (PipeLine Spider;http://www.pipeline-search.com/webmaster.html; webmaster'at'pipeline-search.com)",
+ "pipeLiner/0.xx (PipeLine Spider; http://www.pipeline-search.com/webmaster.html)",
+ "Pita",
+ "PJspider/3.0 (pjspider@portaljuice.com; http://www.portaljuice.com)",
+ "PlagiarBot/1.0",
+ "PluckFeedCrawler/2.0 (compatible; Mozilla 4.0; MSIE 5.5; http://www.pluck.com; 1 subscribers)",
+ "Pluggd/Nutch-0.9 (automated crawler http://www.pluggd.com;support at pluggd dot com)",
+ "Poirot",
+ "polybot 1.0 (http://cis.poly.edu/polybot/)",
+ "Pompos/1.x http://dir.com/pompos.html",
+ "Pompos/1.x pompos@iliad.fr",
+ "Popdexter/1.0",
+ "Port Huron Labs",
+ "PortalBSpider/2.0 (spider@portalb.com)",
+ "potbot 1.0",
+ "PRCrawler/Nutch-0.9 (data mining development project; crawler@projectrialto.com)",
+ "PrivacyFinder Cache Bot v1.0",
+ "PrivacyFinder/1.1",
+ "Production Bot 0116B",
+ "Production Bot 2016B",
+ "Production Bot DOT 3016B",
+ "Program Shareware 1.0.2",
+ "Project XP5 [2.03.07-111203]",
+ "PROve AnswerBot 4.0",
+ "ProWebGuide Link Checker (http://www.prowebguide.com)",
+ "psbot/0.1 (+http://www.picsearch.com/bot.html)",
+ "PSurf15a 11",
+ "PSurf15a 51",
+ "PSurf15a VA",
+ "psycheclone",
+ "PubCrawl (pubcrawl.stanford.edu)",
+ "pulseBot (pulse Web Miner)",
+ "PWeBot/1.2 Inspector (http://www.programacionweb.net/robot.php)",
+ "PycURL",
+ "Python-urllib/1.1x",
+ "Python-urllib/2.0a1",
+ "Qango.com Web Directory (http://www.qango.com/)",
+ "QEAVis Agent/Nutch-0.9 (Quantitative Evaluation of Academic Websites Visibility; http://nlp.uned.es/qeavis",
+ "QPCreep Test Rig ( We are not indexing- just testing )",
+ "QuepasaCreep ( crawler@quepasacorp.com )",
+ "QuepasaCreep v0.9.1x",
+ "QueryN Metasearch",
+ "QweeryBot/3.01 ( http://qweerybot.qweery.nl)",
+ "Qweery_robot.txt_CheckBot/3.01 (http://qweerybot.qweery.com)",
+ "R6_CommentReader_(www.radian6.com/crawler)",
+ "R6_FeedFetcher_(www.radian6.com/crawler)",
+ "rabaz (rabaz at gigabaz dot com)",
+ "RaBot/1.0 Agent-admin/phortse@hanmail.net",
+ "ramBot xtreme x.x",
+ "RAMPyBot - www.giveRAMP.com/0.1 (RAMPyBot - www.giveRAMP.com; http://www.giveramp.com/bot.html; support@giveRAMP.com)",
+ "RAMPyBot/0.8-dev (Nutch; http://lucene.apache.org/nutch/bot.html; nutch-agent@lucene.apache.org)",
+ "Rankivabot/3.2 (www.rankiva.com; 3.2; vzmxikn)",
+ "Rational SiteCheck (Windows NT)",
+ "Reaper [2.03.10-031204] (http://www.sitesearch.ca/reaper/)",
+ "Reaper/2.0x (+http://www.sitesearch.ca/reaper)",
+ "RedCarpet/1.2 (http://www.redcarpet-inc.com/robots.html)",
+ "RedCell/0.1 (InfoSec Search Bot (Coming Soon); http://www.telegenetic.net/bot.html; lhall@telegenetic.net)",
+ "RedCell/0.1 (RedCell; telegenetic.net/bot.html; lhall_at_telegenetic.net)",
+ "RedKernel WWW-Spider 2/0 (+http://www-spider.redkernel-softwares.com/)",
+ "rico/0.1",
+ "RixBot (http://babelserver.org/rix)",
+ "RoboCrawl (http://www.canadiancontent.net)",
+ "RoboCrawl (www.canadiancontent.net)",
+ "RoboPal (http://www.findpal.com/)",
+ "Robot/www.pj-search.com",
+ "Robot: NutchCrawler- Owner: wdavies@acm.org",
+ "Robot@SuperSnooper.Com",
+ "Robozilla/1.0",
+ "Rotondo/3.1 libwww/5.3.1",
+ "RRC (crawler_admin@bigfoot.com)",
+ "RSSMicro.com RSS/Atom Feed Robot",
+ "RSurf15a 41",
+ "RSurf15a 51",
+ "RSurf15a 81",
+ "RufusBot (Rufus Web Miner; http://64.124.122.252/feedback.html)",
+ "RufusBot (Rufus Web Miner; http://www.webaroo.com/rooSiteOwners.html)",
+ "sait/Nutch-0.9 (SAIT Research; http://www.samsung.com)",
+ "SandCrawler - Compatibility Testing",
+ "SapphireWebCrawler/1.0 (Sapphire Web Crawler using Nutch; http://boston.lti.cs.cmu.edu/crawler/; mhoy@cs.cmu.edu)",
+ "SapphireWebCrawler/Nutch-1.0-dev (Sapphire Web Crawler using Nutch; http://boston.lti.cs.cmu.edu/crawler/; mhoy@cs.cmu.edu)",
+ "savvybot/0.2",
+ "SBIder/0.7 (SBIder; http://www.sitesell.com/sbider.html; http://support.sitesell.com/contact-support.html)",
+ "SBIder/0.8-dev (SBIder; http://www.sitesell.com/sbider.html; http://support.sitesell.com/contact-support.html)",
+ "ScanWeb",
+ "ScholarUniverse/0.8 (Nutch;+http://scholaruniverse.com/bot.jsp; fetch-agent@scholaruniverse.com)",
+ "schwarzmann.biz-Spider_for_paddel.org+(http://www.innerprise.net/usp-spider.asp)",
+ "ScollSpider/2.0 (+http://www.webwobot.com/ScollSpider.php)",
+ "Scooter-3.0.EU",
+ "Scooter-3.0.FS",
+ "Scooter-3.0.HD",
+ "Scooter-3.0.VNS",
+ "Scooter-3.0QI",
+ "Scooter-3.2",
+ "Scooter-3.2.BT",
+ "Scooter-3.2.DIL",
+ "Scooter-3.2.EX",
+ "Scooter-3.2.JT",
+ "Scooter-3.2.NIV",
+ "Scooter-3.2.SF0",
+ "Scooter-3.2.snippet",
+ "Scooter-3.3dev",
+ "Scooter-ARS-1.1",
+ "Scooter-ARS-1.1-ih",
+ "scooter-venus-3.0.vns",
+ "Scooter-W3-1.0",
+ "Scooter-W3.1.2",
+ "Scooter/1.0",
+ "Scooter/1.0 scooter@pa.dec.com",
+ "Scooter/1.1 (custom)",
+ "Scooter/2.0 G.R.A.B. V1.1.0",
+ "Scooter/2.0 G.R.A.B. X2.0",
+ "Scooter/3.3",
+ "Scooter/3.3.QA.pczukor",
+ "Scooter/3.3.vscooter",
+ "Scooter/3.3_SF",
+ "Scooter2_Mercator_x-x.0",
+ "Scooter_bh0-3.0.3",
+ "Scooter_trk3-3.0.3",
+ "ScoutAbout",
+ "ScoutAnt/0.1; +http://www.ant.com/what_is_ant.com/",
+ "scoutmaster",
+ "Scrubby/2.x (http://www.scrubtheweb.com/)",
+ "Scrubby/3.0 (+http://www.scrubtheweb.com/help/technology.html)",
+ "Search+",
+ "Search-Engine-Studio",
+ "search.ch V1.4",
+ "search.ch V1.4.2 (spiderman@search.ch; http://www.search.ch)",
+ "Search/1.0 (http://www.innerprise.net/es-spider.asp)",
+ "searchbot admin@google.com",
+ "SearchByUsa/2 (SearchByUsa; http://www.SearchByUsa.com/bot.html; info@SearchByUsa.com)",
+ "SearchdayBot",
+ "SearchExpress Spider0.99",
+ "SearchGuild/DMOZ/Experiment (searchguild@gmail.com)",
+ "SearchGuild_DMOZ_Experiment (chris@searchguild.com)",
+ "Searchit-Now Robot/2.2 (+http://www.searchit-now.co.uk)",
+ "Searchmee! Spider v0.98a",
+ "SearchSight/2.0 (http://SearchSight.com/)",
+ "SearchSpider.com/1.1",
+ "Searchspider/1.2 (SearchSpider; http://www.searchspider.com; webmaster@searchspider.com)",
+ "SearchTone2.0 - IDEARE",
+ "Seekbot/1.0 (http://www.seekbot.net/bot.html) HTTPFetcher/0.3",
+ "Seekbot/1.0 (http://www.seekbot.net/bot.html) RobotsTxtFetcher/1.0 (XDF)",
+ "Seekbot/1.0 (http://www.seekbot.net/bot.html) RobotsTxtFetcher/1.2",
+ "Seeker.lookseek.com",
+ "Semager/1.1 (http://www.semager.de/blog/semager-bots/)",
+ "Semager/1.x (http://www.semager.de)",
+ "Sensis Web Crawler (search_comments\\at\\sensis\\dot\\com\\dot\\au)",
+ "Sensis.com.au Web Crawler (search_comments\\at\\sensis\\dot\\com\\dot\\au)",
+ "SeznamBot/1.0",
+ "SeznamBot/1.0 (+http://fulltext.seznam.cz/)",
+ "SeznamBot/2.0-test (+http://fulltext.sblog.cz/)",
+ "ShablastBot 1.0",
+ "Shim Crawler",
+ "Shim-Crawler(Mozilla-compatible; http://www.logos.ic.i.u-tokyo.ac.jp/crawler/; crawl@logos.ic.i.u-tokyo.ac.jp)",
+ "ShopWiki/1.0 ( +http://www.shopwiki.com/)",
+ "ShopWiki/1.0 ( +http://www.shopwiki.com/wiki/Help:Bot)",
+ "Shoula.com Crawler 2.0",
+ "SietsCrawler/1.1 (+http://www.siets.biz)",
+ "Sigram/Nutch-1.0-dev (Test agent for Nutch development; http://www.sigram.com/bot.html; bot at sigram dot com)",
+ "Siigle Orumcex v.001 Turkey (http://www.siigle.com)",
+ "silk/1.0",
+ "silk/1.0 (+http://www.slider.com/silk.htm)/3.7",
+ "Sirketcebot/v.01 (http://www.sirketce.com/bot.html)",
+ "SiteSpider +(http://www.SiteSpider.com/)",
+ "SiteTruth.com site rating system",
+ "SiteXpert",
+ "Skampy/0.9.x (http://www.skaffe.com/skampy-info.html)",
+ "Skimpy/0.x (http://www.skaffe.com/skampy-info.html)",
+ "Skywalker/0.1 (Skywalker; anonymous; anonymous)",
+ "Slarp/0.1",
+ "Slider_Search_v1-de",
+ "Slurp/2.0 (slurp@inktomi.com; http://www.inktomi.com/slurp.html)",
+ "Slurp/2.0-KiteWeekly (slurp@inktomi.com; http://www.inktomi.com/slurp.html)",
+ "Slurp/si (slurp@inktomi.com; http://www.inktomi.com/slurp.html)",
+ "Slurpy Verifier/1.0",
+ "SlySearch (slysearch@slysearch.com)",
+ "SlySearch/1.0 http://www.plagiarism.org/crawler/robotinfo.html",
+ "SlySearch/1.x http://www.slysearch.com",
+ "smartwit.com",
+ "SmiffyDCMetaSpider/1.0",
+ "snap.com beta crawler v0",
+ "Snapbot/1.0",
+ "Snapbot/1.0 (Snap Shots, +http://www.snap.com)",
+ "SnykeBot/0.6 (http://www.snyke.com)",
+ "SocSciBot ()",
+ "SoftHypermarketFileCheckBot/1.0+(+http://www.softhypermaket.com)",
+ "sogou develop spider",
+ "Sogou Orion spider/3.0(+http://www.sogou.com/docs/help/webmasters.htm#07)",
+ "sogou spider",
+ "Sogou web spider/3.0(+http://www.sogou.com/docs/help/webmasters.htm#07)",
+ "sohu agent",
+ "sohu-search",
+ "Sosospider+(+http://help.soso.com/webspider.htm)",
+ "speedfind ramBot xtreme 8.1",
+ "Speedy Spider (Beta/x.x; speedy@entireweb.com)",
+ "Speedy Spider (Entireweb; Beta/1.0; http://www.entireweb.com/about/search_tech/speedyspider/)",
+ "Speedy_Spider (http://www.entireweb.com)",
+ "Sphere Scout&v4.0 - scout at sphere dot com",
+ "Sphider",
+ "Spida/0.1",
+ "Spider-Sleek/2.0 (+http://search-info.com/linktous.html)",
+ "spider.batsch.com",
+ "spider.yellopet.com - www.yellopet.com",
+ "Spider/maxbot.com admin@maxbot.com",
+ "SpiderKU/0.x",
+ "SpiderMan",
+ "SpiderMonkey/7.0x (SpiderMonkey.ca info at http://spidermonkey.ca/sm.shtml)",
+ "Spinne/2.0",
+ "Spinne/2.0 med",
+ "Spinne/2.0 med_AH",
+ "Spock Crawler (http://www.spock.com/crawler)",
+ "sportsuchmaschine.de-Robot (Version: 1.02- powered by www.sportsuchmaschine.de)",
+ "sproose/0.1-alpha (sproose crawler; http://www.sproose.com/bot.html; crawler@sproose.com)",
+ "Sqworm/2.9.81-BETA (beta_release; 20011102-760; i686-pc-linux-gnu)",
+ "Sqworm/2.9.85-BETA (beta_release; 20011115-775; i686-pc-linux-gnu)",
+ "SSurf15a 11 ",
+ "StackRambler/x.x ",
+ "stat statcrawler@gmail.com",
+ "Steeler/1.x (http://www.tkl.iis.u-tokyo.ac.jp/~crawler/)",
+ "Steeler/3.3 (http://www.tkl.iis.u-tokyo.ac.jp/~crawler/)",
+ "Strategic Board Bot (+http://www.strategicboard.com)",
+ "Strategic Board Bot (+http://www.strategicboard.com)",
+ "Submission Spider at surfsafely.com",
+ "suchbaer.de",
+ "suchbaer.de (CrawlerAgent v0.103)",
+ "suchbot",
+ "Suchknecht.at-Robot",
+ "suchpadbot/1.0 (+http://www.suchpad.de)",
+ "SurferF3 1/0",
+ "suzuran",
+ "Swooglebot/2.0. (+http://swoogle.umbc.edu/swooglebot.htm)",
+ "SWSBot-Images/1.2 http://www.smartwaresoft.com/swsbot12.html",
+ "SygolBot http://www.sygol.net",
+ "SynoBot",
+ "Syntryx ANT Scout Chassis Pheromone; Mozilla/4.0 compatible crawler",
+ "Szukacz/1.x",
+ "Szukacz/1.x (robot; www.szukacz.pl/jakdzialarobot.html; szukacz@proszynski.pl)",
+ "tags2dir.com/0.8 (+http://tags2dir.com/directory/)",
+ "Tagword (http://tagword.com/dmoz_survey.php)",
+ "Talkro Web-Shot/1.0 (E-mail: webshot@daumsoft.com- Home: http://222.122.15.190/webshot)",
+ "TCDBOT/Nutch-0.8 (PhD student research;http://www.tcd.ie; mcgettrs at t c d dot IE)",
+ "TECOMAC-Crawler/0.x",
+ "Tecomi Bot (http://www.tecomi.com/bot.htm)",
+ "Teemer (NetSeer, Inc. is a Los Angeles based Internet startup company.; http://www.netseer.com/crawler.html; crawler@netseer.com)",
+ "Teoma MP",
+ "teomaagent crawler-admin@teoma.com",
+ "teomaagent1 [crawler-admin@teoma.com]",
+ "teoma_agent1",
+ "Teradex Mapper; mapper@teradex.com; http://www.teradex.com",
+ "terraminds-bot/1.0 (support@terraminds.de)",
+ "TerrawizBot/1.0 (+http://www.terrawiz.com/bot.html)",
+ "Test spider",
+ "TestCrawler/Nutch-0.9 (Testing Crawler for Research ; http://balihoo.com/index.aspx; tgautier at balihoo dot com)",
+ "TheRarestParser/0.2a (http://therarestwords.com/)",
+ "TheSuBot/0.1 (www.thesubot.de)",
+ "thumbshots-de-Bot (Version: 1.02- powered by www.thumbshots.de)",
+ "timboBot/0.9 http://www.breakingblogs.com/timbo_bot.html",
+ "TinEye/1.1 (http://tineye.com/crawler.html)",
+ "tivraSpider/1.0 (crawler@tivra.com)",
+ "TJG/Spider",
+ "Tkensaku/x.x(http://www.tkensaku.com/q.html)",
+ "Topodia/1.2-dev (Topodia - Crawler for HTTP content indexing; http://www.topodia.com/; support@topodia.com)",
+ "Toutatis x-xx.x (hoppa.com)",
+ "Toutatis x.x (hoppa.com)",
+ "Toutatis x.x-x",
+ "traazibot/testengine (+http://www.traazi.de)",
+ "Trampelpfad-Spider",
+ "Trampelpfad-Spider-v0.1",
+ "TSurf15a 11",
+ "Tumblr/1.0 RSS syndication (+http://www.tumblr.com/) (support@tumblr.com)",
+ "TurnitinBot/x.x (http://www.turnitin.com/robot/crawlerinfo.html)",
+ "Turnpike Emporium LinkChecker/0.1",
+ "TutorGig/1.5 (+http://www.tutorgig.com/crawler)",
+ "Tutorial Crawler 1.4 (http://www.tutorgig.com/crawler)",
+ "Twiceler www.cuill.com/robots.html",
+ "Twiceler-0.9 http://www.cuill.com/twiceler/robot.html",
+ "Tycoon Agent/Nutch-1.0-dev",
+ "TygoBot",
+ "TygoProwler",
+ "UIowaCrawler/1.0",
+ "UKWizz/Nutch-0.8.1 (UKWizz Nutch crawler; http://www.ukwizz.com/)",
+ "Ultraseek",
+ "Under the Rainbow 2.2",
+ "UofTDB_experiment (leehyun@cs.toronto.edu)",
+ "updated/0.1-alpha (updated crawler; http://www.updated.com; crawler@updated.com)",
+ "updated/0.1beta (updated.com; http://www.updated.com; crawler@updated.om)",
+ "Uptimebot",
+ "UptimeBot(www.uptimebot.com)",
+ "URL Spider Pro/x.xx (innerprise.net)",
+ "urlfan-bot/1.0; +http://www.urlfan.com/site/bot/350.html",
+ "URL_Spider_Pro/x.x",
+ "URL_Spider_Pro/x.x+(http://www.innerprise.net/usp-spider.asp)",
+ "User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
+ "User-Agent: Mozilla/4.0 (SKIZZLE! Distributed Internet Spider v1.0 - www.SKIZZLE.com)",
+ "USyd-NLP-Spider (http://www.it.usyd.edu.au/~vinci/bot.html)",
+ "VadixBot",
+ "Vagabondo-WAP/2.0 (webcrawler at wise-guys dot nl; http://webagent.wise-guys.nl/)/1.0 Profile",
+ "Vagabondo/1.x MT (webagent@wise-guys.nl)",
+ "Vagabondo/2.0 MT",
+ "Vagabondo/2.0 MT (webagent at wise-guys dot nl)",
+ "Vagabondo/2.0 MT (webagent@NOSPAMwise-guys.nl)",
+ "Vagabondo/3.0 (webagent at wise-guys dot nl)",
+ "Vakes/0.01 (Vakes; http://www.vakes.com/; search@vakes.com)",
+ "versus 0.2 (+http://versus.integis.ch)",
+ "versus crawler eda.baykan@epfl.ch",
+ "VeryGoodSearch.com.DaddyLongLegs",
+ "verzamelgids.nl - Networking4all Bot/x.x",
+ "Verzamelgids/2.2 (http://www.verzamelgids.nl)",
+ "Vespa Crawler",
+ "VisBot/2.0 (Visvo.com Crawler; http://www.visvo.com/bot.html; bot@visvo.com)",
+ "Vision Research Lab image spider at vision.ece.ucsb.edu",
+ "VMBot/0.x.x (VMBot; http://www.VerticalMatch.com/; vmbot@tradedot.com)",
+ "Vortex/2.2 (+http://marty.anstey.ca/robots/vortex/)",
+ "voyager-hc/1.0",
+ "voyager/1.0",
+ "voyager/2.0 (http://www.kosmix.com/html/crawler.html)",
+ "VSE/1.0 (testcrawler@hotmail.com)",
+ "VSE/1.0 (testcrawler@vivisimo.com)",
+ "vspider",
+ "vspider/3.x",
+ "VWBOT/Nutch-0.9-dev (VWBOT Nutch Crawler; http://vwbot.cs.uiuc.edu;+vwbot@cs.uiuc.edu",
+ "W3SiteSearch Crawler_v1.1 http://www.w3sitesearch.de",
+ "wadaino.jp-crawler 0.2 (http://wadaino.jp/)",
+ "Wavefire/0.8-dev (Wavefire; http://www.wavefire.com; info@wavefire.com)",
+ "Waypath development crawler - info at waypath dot com",
+ "Waypath Scout v2.x - info at waypath dot com",
+ "Web Snooper",
+ "web2express.org/Nutch-0.9-dev (leveled playing field; http://web2express.org/; info at web2express.org)",
+ "WebAlta Crawler/1.2.1 (http://www.webalta.ru/bot.html)",
+ "WebarooBot (Webaroo Bot; http://64.124.122.252/feedback.html)",
+ "WebarooBot (Webaroo Bot; http://www.webaroo.com/rooSiteOwners.html)",
+ "webbandit/4.xx.0",
+ "Webclipping.com",
+ "WebCompass 2.0",
+ "WebCorp/1.0",
+ "webcrawl.net",
+ "WebFindBot(http://www.web-find.com)",
+ "Webglimpse 2.xx.x (http://webglimpse.net)",
+ "Weblog Attitude Diffusion 1.0",
+ "webmeasurement-bot, http://rvs.informatik.uni-leipzig.de",
+ "WebRankSpider/1.37 (+http://ulm191.server4you.de/crawler/)",
+ "WebSearch.COM.AU/3.0.1 (The Australian Search Engine; http://WebSearch.COM.AU; Search@WebSearch.COM.AU)",
+ "WebSearchBench WebCrawler v0.1(Experimental)",
+ "WebsiteWorth v1.0",
+ "Webspinne/1.0 webmaster@webspinne.de",
+ "Websquash.com (Add url robot)",
+ "WebStat/1.0 (Unix; beta; 20040314)",
+ "Webster v0.3 ( http://webster.healeys.net/ )",
+ "WebVac (webmaster@pita.stanford.edu)",
+ "Webverzeichnis.de - Telefon: 01908 / 26005",
+ "WebVulnCrawl.unknown/1.0 libwww-perl/5.803",
+ "Wells Search II",
+ "WEP Search 00",
+ "WFARC",
+ "whatUseek_winona/3.0",
+ "WhizBang! Lab",
+ "Willow Internet Crawler by Twotrees V2.1",
+ "WinHTTP Example/1.0",
+ "WinkBot/0.06 (Wink.com search engine web crawler; http://www.wink.com/Wink:WinkBot; winkbot@wink.com)",
+ "WIRE/0.11 (Linux; i686; Bot,Robot,Spider,Crawler,aromano@cli.di.unipi.it)",
+ "WIRE/0.x (Linux; i686; Bot,Robot,Spider,Crawler)",
+ "WISEbot/1.0 (WISEbot@koreawisenut.com; http://wisebot.koreawisenut.com)",
+ "worio heritrix bot (+http://worio.com/)",
+ "woriobot ( http://www.worio.com/)",
+ "WorldLight",
+ "Wotbox/alpha0.6 (bot@wotbox.com; http://www.wotbox.com)",
+ "Wotbox/alpha0.x.x (bot@wotbox.com; http://www.wotbox.com) Java/1.4.1_02",
+ "WSB WebCrawler V1.0 (Beta)- cl@cs.uni-dortmund.de",
+ "WSB, http://websearchbench.cs.uni-dortmund.de",
+ "wume_crawler/1.1 (http://wume.cse.lehigh.edu/~xiq204/crawler/)",
+ "Wwlib/Linux",
+ "www.arianna.it",
+ "WWWeasel Robot v1.00 (http://wwweasel.de)",
+ "wwwster/1.x (Beta- mailto:gue@cis.uni-muenchen.de)",
+ "X-Crawler ",
+ "xirq/0.1-beta (xirq; http://www.xirq.com; xirq@xirq.com)",
+ "xyro_(xcrawler@cosmos.inria.fr)",
+ "Y!J-BSC/1.0 (http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html)",
+ "Y!J-SRD/1.0",
+ "Y!J/1.0 (http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html)",
+ "yacy (www.yacy.net; v20040602; i386 Linux 2.4.26-gentoo-r13; java 1.4.2_06; MET/en)",
+ "yacybot (x86 Windows XP 5.1; java 1.5.0_06; Europe/de) yacy.net",
+ "Yahoo Pipes 1.0",
+ "Yahoo! Mindset",
+ "Yahoo-Blogs/v3.9 (compatible; Mozilla 4.0; MSIE 5.5; http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html )",
+ "Yahoo-MMAudVid/1.0 (mms dash mmaudvidcrawler dash support at yahoo dash inc dot com)",
+ "Yahoo-MMAudVid/2.0(mms dash mm aud vid crawler dash support at yahoo dash inc.com ;Mozilla 4.0 compatible; MSIE 7.0;Windows NT 5.0; .NET CLR 2.0)",
+ "Yahoo-MMCrawler/3.x (mm dash crawler at trd dot overture dot com)",
+ "Yahoo-Test/4.0",
+ "Yahoo-VerticalCrawler-FormerWebCrawler/3.9 crawler at trd dot overture dot com; http://www.alltheweb.com/help/webmaster/crawler",
+ "YahooFeedSeeker/2.0 (compatible; Mozilla 4.0; MSIE 5.5; http://publisher.yahoo.com/rssguide)",
+ "YahooSeeker-Testing/v3.9 (compatible; Mozilla 4.0; MSIE 5.5; http://search.yahoo.com/)",
+ "YahooSeeker/1.0 (compatible; Mozilla 4.0; MSIE 5.5; http://help.yahoo.com/help/us/shop/merchant/)",
+ "YahooSeeker/1.0 (compatible; Mozilla 4.0; MSIE 5.5; http://search.yahoo.com/yahooseeker.html)",
+ "YahooSeeker/1.1 (compatible; Mozilla 4.0; MSIE 5.5; http://help.yahoo.com/help/us/shop/merchant/)",
+ "YahooSeeker/bsv3.9 (compatible; Mozilla 4.0; MSIE 5.5; http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html )",
+ "YahooSeeker/CafeKelsa-dev (compatible; Konqueror/3.2; FreeBSD ;cafekelsa-dev-webmaster@yahoo-inc.com )",
+ "Yandex/1.01.001 (compatible; Win16; I)",
+ "Yanga WorldSearch Bot v1.1/beta (http://www.yanga.co.uk/)",
+ "yarienavoir.net/0.2",
+ "Yeti",
+ "Yeti/0.01 (nhn/1noon, yetibot@naver.com, check robots.txt daily and follows it)",
+ "Yeti/1.0 (NHN Corp.; http://help.naver.com/robots/)",
+ "yggdrasil/Nutch-0.9 (yggdrasil biorelated search engine; www dot biotec dot tu minus dresden do de slash schroeder; heiko dot dietze at biotec dot tu minus dresden dot de)",
+ "YodaoBot/1.0 (http://www.yodao.com/help/webmaster/spider/; )",
+ "yoofind/yoofind-0.1-dev (yoono webcrawler; http://www.yoono.com ; MyEmail)",
+ "yoogliFetchAgent/0.1",
+ "yoono/1.0 web-crawler/1.0",
+ "YottaCars_Bot/4.12 (+http://www.yottacars.com) Car Search Engine ",
+ "YottaShopping_Bot/4.12 (+http://www.yottashopping.com) Shopping Search Engine",
+ "Zao-Crawler",
+ "Zao-Crawler 0.2b",
+ "Zao/0.1 (http://www.kototoi.org/zao/)",
+ "ZBot/1.00 (icaulfield@zeus.com)",
+ "Zearchit",
+ "ZeBot_lseek.net (bot@ze.bz)",
+ "ZeBot_www.ze.bz (ze.bz@hotmail.com)",
+ "zedzo.digest/0.1 (http://www.zedzo.com/)",
+ "zermelo Mozilla/5.0 compatible; heritrix/1.12.1 (+http://www.powerset.com) [email:crawl@powerset.com,email:paul@page-store.com]",
+ "zerxbot/Version 0.6 libwww-perl/5.79",
+ "Zeus ThemeSite Viewer Webster Pro V2.9 Win32",
+ "Zeus xxxxx Webster Pro V2.9 Win32",
+ "Zeusbot/0.07 (Ulysseek's web-crawling robot; http://www.zeusbot.com; agent@zeusbot.com)",
+ "ZipppBot/0.xx (ZipppBot; http://www.zippp.net; webmaster@zippp.net)",
+ "ZIPPPCVS/0.xx (ZipppBot/.xx;http://www.zippp.net; webmaster@zippp.net)",
+ "Zippy v2.0 - Zippyfinder.com",
+ "ZoomSpider - wrensoft.com",
+ "zspider/0.9-dev http://feedback.redkolibri.com/",
+ "ZyBorg/1.0 (ZyBorg@WISEnut.com; http://www.WISEnut.com)"]
+ end
+end
22 app/models/impressionist/impressionable.rb
@@ -0,0 +1,22 @@
+module Impressionist
+ module Impressionable
+ def is_impressionable
+ has_many :impressions, :as=>:impressionable
+ include InstanceMethods
+ end
+
+ module InstanceMethods
+ def impressionable?
+ true
+ end
+
+ def impression_count(start_date=nil,end_date=Time.now)
+ start_date.blank? ? impressions.all.size : impressions.where("created_at>=? and created_at<=?",start_date,end_date).all.size
+ end
+
+ def unique_impression_count(start_date=nil,end_date=Time.now)
+ start_date.blank? ? impressions.group(:ip_address).all.size : impressions.where("created_at>=? and created_at<=?",start_date,end_date).group(:ip_address).all.size
+ end
+ end
+ end
+end
2  config/routes.rb
@@ -0,0 +1,2 @@
+Rails.application.routes.draw do
+end
109 impressionist.gemspec
@@ -0,0 +1,109 @@
+# Generated by jeweler
+# DO NOT EDIT THIS FILE DIRECTLY
+# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
+# -*- encoding: utf-8 -*-
+
+Gem::Specification.new do |s|
+ s.name = %q{impressionist}
+ s.version = "0.1.0"
+
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
+ s.authors = ["cowboycoded"]
+ s.date = %q{2011-02-03}
+ s.description = %q{Log impressions from controller actions or from a model}
+ s.email = %q{john.mcaliley@gmail.com}
+ s.extra_rdoc_files = [
+ "LICENSE.txt",
+ "README.rdoc"
+ ]
+ s.files = [
+ ".document",
+ "Gemfile",
+ "LICENSE.txt",
+ "README.rdoc",
+ "Rakefile",
+ "VERSION",
+ "app/controllers/impressionist_controller.rb",
+ "app/models/impression.rb",
+ "app/models/impressionist/bots.rb",
+ "app/models/impressionist/impressionable.rb",
+ "config/routes.rb",
+ "impressionist.gemspec",
+ "lib/generators/impressionist/impressionist_generator.rb",
+ "lib/generators/impressionist/templates/create_impressions_table.rb",
+ "lib/impressionist.rb",
+ "lib/impressionist/bots.rb",
+ "lib/impressionist/engine.rb",
+ "lib/impressionist/railties/tasks.rake"
+ ]
+ s.homepage = %q{http://github.com/johnmcaliley/impressionist}
+ s.licenses = ["MIT"]
+ s.require_paths = ["lib"]
+ s.rubygems_version = %q{1.3.7}
+ s.summary = %q{Easy way to log impressions}
+
+ if s.respond_to? :specification_version then
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
+ s.specification_version = 3
+
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
+ s.add_development_dependency(%q<shoulda>, [">= 0"])
+ s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
+ s.add_development_dependency(%q<jeweler>, ["~> 1.5.1"])
+ s.add_development_dependency(%q<rcov>, [">= 0"])
+ s.add_development_dependency(%q<ZenTest>, [">= 0"])
+ s.add_development_dependency(%q<autotest>, [">= 0"])
+ s.add_development_dependency(%q<systemu>, [">= 0"])
+ s.add_development_dependency(%q<rspec>, [">= 0"])
+ s.add_development_dependency(%q<rspec-rails>, [">= 0"])
+ s.add_development_dependency(%q<mongrel>, ["= 1.2.0.pre2"])
+ s.add_development_dependency(%q<capybara>, [">= 0"])
+ s.add_development_dependency(%q<database_cleaner>, [">= 0"])
+ s.add_development_dependency(%q<cucumber-rails>, [">= 0"])
+ s.add_development_dependency(%q<cucumber>, [">= 0"])
+ s.add_development_dependency(%q<spork>, [">= 0"])
+ s.add_development_dependency(%q<launchy>, [">= 0"])
+ s.add_development_dependency(%q<autotest-notification>, [">= 0"])
+ s.add_development_dependency(%q<httpclient>, [">= 0"])
+ else
+ s.add_dependency(%q<shoulda>, [">= 0"])
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
+ s.add_dependency(%q<jeweler>, ["~> 1.5.1"])
+ s.add_dependency(%q<rcov>, [">= 0"])
+ s.add_dependency(%q<ZenTest>, [">= 0"])
+ s.add_dependency(%q<autotest>, [">= 0"])
+ s.add_dependency(%q<systemu>, [">= 0"])
+ s.add_dependency(%q<rspec>, [">= 0"])
+ s.add_dependency(%q<rspec-rails>, [">= 0"])
+ s.add_dependency(%q<mongrel>, ["= 1.2.0.pre2"])
+ s.add_dependency(%q<capybara>, [">= 0"])
+ s.add_dependency(%q<database_cleaner>, [">= 0"])
+ s.add_dependency(%q<cucumber-rails>, [">= 0"])
+ s.add_dependency(%q<cucumber>, [">= 0"])
+ s.add_dependency(%q<spork>, [">= 0"])
+ s.add_dependency(%q<launchy>, [">= 0"])
+ s.add_dependency(%q<autotest-notification>, [">= 0"])
+ s.add_dependency(%q<httpclient>, [">= 0"])
+ end
+ else
+ s.add_dependency(%q<shoulda>, [">= 0"])
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
+ s.add_dependency(%q<jeweler>, ["~> 1.5.1"])
+ s.add_dependency(%q<rcov>, [">= 0"])
+ s.add_dependency(%q<ZenTest>, [">= 0"])
+ s.add_dependency(%q<autotest>, [">= 0"])
+ s.add_dependency(%q<systemu>, [">= 0"])
+ s.add_dependency(%q<rspec>, [">= 0"])
+ s.add_dependency(%q<rspec-rails>, [">= 0"])
+ s.add_dependency(%q<mongrel>, ["= 1.2.0.pre2"])
+ s.add_dependency(%q<capybara>, [">= 0"])
+ s.add_dependency(%q<database_cleaner>, [">= 0"])
+ s.add_dependency(%q<cucumber-rails>, [">= 0"])
+ s.add_dependency(%q<cucumber>, [">= 0"])
+ s.add_dependency(%q<spork>, [">= 0"])
+ s.add_dependency(%q<launchy>, [">= 0"])
+ s.add_dependency(%q<autotest-notification>, [">= 0"])
+ s.add_dependency(%q<httpclient>, [">= 0"])
+ end
+end
+
20 lib/generators/impressionist/impressionist_generator.rb
@@ -0,0 +1,20 @@
+require 'rails/generators'
+require 'rails/generators/migration'
+
+class ImpressionistGenerator < Rails::Generators::Base
+ include Rails::Generators::Migration
+ source_root File.join(File.dirname(__FILE__), 'templates')
+
+ def self.next_migration_number(dirname)
+ sleep 1
+ if ActiveRecord::Base.timestamped_migrations
+ Time.now.utc.strftime("%Y%m%d%H%M%S")
+ else
+ "%.3d" % (current_migration_number(dirname) + 1)
+ end
+ end
+
+ def create_migration_file
+ migration_template 'create_impressions_table.rb', 'db/migrate/create_impressions_table.rb'
+ end
+end
20 lib/generators/impressionist/templates/create_impressions_table.rb
@@ -0,0 +1,20 @@
+class CreateImpressionsTable < ActiveRecord::Migration
+ def self.up
+ create_table :impressions, :force => true do |t|
+ t.string :impressionable_type
+ t.integer :impressionable_id
+ t.integer :user_id
+ t.string :controller_name
+ t.string :action_name
+ t.string :view_name
+ t.string :request_hash
+ t.string :ip_address
+ t.string :message
+ t.timestamps
+ end
+ end
+
+ def self.down
+ drop_table :impressions
+ end
+end
5 lib/impressionist.rb
@@ -0,0 +1,5 @@
+PATH = File.dirname(__FILE__) + "/impressionist"
+require "#{PATH}/engine.rb"
+
+module Impressionist
+end
18 lib/impressionist/bots.rb
@@ -0,0 +1,18 @@
+require 'httpclient'
+require 'nokogiri'
+
+module Impressionist
+ module Bots
+ LIST_URL = "http://www.user-agents.org/allagents.xml"
+ def self.consume
+ response = HTTPClient.new.get_content(LIST_URL)
+ doc = Nokogiri::XML(response)
+ list = []
+ doc.xpath('//user-agent').each do |agent|
+ type = agent.xpath("Type").text
+ list << agent.xpath("String").text.gsub("&lt;","<") if ["R","S"].include?(type) #gsub hack for badly formatted data
+ end
+ list
+ end
+ end
+end
18 lib/impressionist/engine.rb
@@ -0,0 +1,18 @@
+require "impressionist"
+require "rails"
+
+module Impressionist
+ class Engine < Rails::Engine
+
+ initializer 'impressionist.controller' do
+ ActiveSupport.on_load(:action_controller) do
+ include ImpressionistController::InstanceMethods
+ extend ImpressionistController::ClassMethods
+ end
+ end
+
+ initializer 'impressionist.extend_ar' do
+ ActiveRecord::Base.extend Impressionist::Impressionable
+ end
+ end
+end
0  lib/impressionist/railties/tasks.rake
No changes.
4 test_app/.gitignore
@@ -0,0 +1,4 @@
+.bundle
+db/*.sqlite3
+log/*.log
+tmp/**/*
1  test_app/.rspec
@@ -0,0 +1 @@
+--colour
24 test_app/Gemfile
@@ -0,0 +1,24 @@
+source 'http://rubygems.org'
+
+gem 'rails', '3.0.3'
+gem 'sqlite3-ruby', :require => 'sqlite3'
+gem 'impressionist', :path=>"/rails_plugins/mine/impressionist"
+
+if ENV['MY_BUNDLE_ENV'] == "dev"
+ group :development do
+ gem 'ZenTest'
+ gem 'autotest'
+ gem 'systemu'
+ gem "rspec"
+ gem "rspec-rails"
+ gem "mongrel", "1.2.0.pre2"
+ gem 'capybara'
+ gem 'database_cleaner'
+ gem 'cucumber-rails'
+ gem 'cucumber'
+ gem 'spork'
+ gem 'launchy'
+ gem 'autotest-notification'
+ end
+end
+
256 test_app/README
@@ -0,0 +1,256 @@
+== Welcome to Rails
+
+Rails is a web-application framework that includes everything needed to create
+database-backed web applications according to the Model-View-Control pattern.
+
+This pattern splits the view (also called the presentation) into "dumb"
+templates that are primarily responsible for inserting pre-built data in between
+HTML tags. The model contains the "smart" domain objects (such as Account,
+Product, Person, Post) that holds all the business logic and knows how to
+persist themselves to a database. The controller handles the incoming requests
+(such as Save New Account, Update Product, Show Post) by manipulating the model
+and directing data to the view.
+
+In Rails, the model is handled by what's called an object-relational mapping
+layer entitled Active Record. This layer allows you to present the data from
+database rows as objects and embellish these data objects with business logic
+methods. You can read more about Active Record in
+link:files/vendor/rails/activerecord/README.html.
+
+The controller and view are handled by the Action Pack, which handles both
+layers by its two parts: Action View and Action Controller. These two layers
+are bundled in a single package due to their heavy interdependence. This is
+unlike the relationship between the Active Record and Action Pack that is much
+more separate. Each of these packages can be used independently outside of
+Rails. You can read more about Action Pack in
+link:files/vendor/rails/actionpack/README.html.
+
+
+== Getting Started
+
+1. At the command prompt, create a new Rails application:
+ <tt>rails new myapp</tt> (where <tt>myapp</tt> is the application name)
+
+2. Change directory to <tt>myapp</tt> and start the web server:
+ <tt>cd myapp; rails server</tt> (run with --help for options)
+
+3. Go to http://localhost:3000/ and you'll see:
+ "Welcome aboard: You're riding Ruby on Rails!"
+
+4. Follow the guidelines to start developing your application. You can find
+the following resources handy:
+
+* The Getting Started Guide: http://guides.rubyonrails.org/getting_started.html
+* Ruby on Rails Tutorial Book: http://www.railstutorial.org/
+
+
+== Debugging Rails
+
+Sometimes your application goes wrong. Fortunately there are a lot of tools that
+will help you debug it and get it back on the rails.
+
+First area to check is the application log files. Have "tail -f" commands
+running on the server.log and development.log. Rails will automatically display
+debugging and runtime information to these files. Debugging info will also be
+shown in the browser on requests from 127.0.0.1.
+
+You can also log your own messages directly into the log file from your code
+using the Ruby logger class from inside your controllers. Example:
+
+ class WeblogController < ActionController::Base
+ def destroy
+ @weblog = Weblog.find(params[:id])
+ @weblog.destroy
+ logger.info("#{Time.now} Destroyed Weblog ID ##{@weblog.id}!")
+ end
+ end
+
+The result will be a message in your log file along the lines of:
+
+ Mon Oct 08 14:22:29 +1000 2007 Destroyed Weblog ID #1!
+
+More information on how to use the logger is at http://www.ruby-doc.org/core/
+
+Also, Ruby documentation can be found at http://www.ruby-lang.org/. There are
+several books available online as well:
+
+* Programming Ruby: http://www.ruby-doc.org/docs/ProgrammingRuby/ (Pickaxe)
+* Learn to Program: http://pine.fm/LearnToProgram/ (a beginners guide)
+
+These two books will bring you up to speed on the Ruby language and also on
+programming in general.
+
+
+== Debugger
+
+Debugger support is available through the debugger command when you start your
+Mongrel or WEBrick server with --debugger. This means that you can break out of
+execution at any point in the code, investigate and change the model, and then,
+resume execution! You need to install ruby-debug to run the server in debugging
+mode. With gems, use <tt>sudo gem install ruby-debug</tt>. Example:
+
+ class WeblogController < ActionController::Base
+ def index
+ @posts = Post.find(:all)
+ debugger
+ end
+ end
+
+So the controller will accept the action, run the first line, then present you
+with a IRB prompt in the server window. Here you can do things like:
+
+ >> @posts.inspect
+ => "[#<Post:0x14a6be8
+ @attributes={"title"=>nil, "body"=>nil, "id"=>"1"}>,
+ #<Post:0x14a6620
+ @attributes={"title"=>"Rails", "body"=>"Only ten..", "id"=>"2"}>]"
+ >> @posts.first.title = "hello from a debugger"
+ => "hello from a debugger"
+
+...and even better, you can examine how your runtime objects actually work:
+
+ >> f = @posts.first
+ => #<Post:0x13630c4 @attributes={"title"=>nil, "body"=>nil, "id"=>"1"}>
+ >> f.
+ Display all 152 possibilities? (y or n)
+
+Finally, when you're ready to resume execution, you can enter "cont".
+
+
+== Console
+
+The console is a Ruby shell, which allows you to interact with your
+application's domain model. Here you'll have all parts of the application
+configured, just like it is when the application is running. You can inspect
+domain models, change values, and save to the database. Starting the script
+without arguments will launch it in the development environment.
+
+To start the console, run <tt>rails console</tt> from the application
+directory.
+
+Options:
+
+* Passing the <tt>-s, --sandbox</tt> argument will rollback any modifications
+ made to the database.
+* Passing an environment name as an argument will load the corresponding
+ environment. Example: <tt>rails console production</tt>.
+
+To reload your controllers and models after launching the console run
+<tt>reload!</tt>
+
+More information about irb can be found at:
+link:http://www.rubycentral.com/pickaxe/irb.html
+
+
+== dbconsole
+
+You can go to the command line of your database directly through <tt>rails
+dbconsole</tt>. You would be connected to the database with the credentials
+defined in database.yml. Starting the script without arguments will connect you
+to the development database. Passing an argument will connect you to a different
+database, like <tt>rails dbconsole production</tt>. Currently works for MySQL,
+PostgreSQL and SQLite 3.
+
+== Description of Contents
+
+The default directory structure of a generated Ruby on Rails application:
+
+ |-- app
+ | |-- controllers
+ | |-- helpers
+ | |-- mailers
+ | |-- models
+ | `-- views
+ | `-- layouts
+ |-- config
+ | |-- environments