Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

executable file 137 lines (117 sloc) 4.936 kB
#!/usr/bin/env ruby
require 'rubygems'
require 'rubberband'
require 'fileutils'
require 'configliere'
Settings.use :commandline, :env_var
#
# Estrus -- an alluringly primitive Elasticsearch stress-testing tool
#
# Example usage:
#
# ~/ics/backend/wonderdog/ruby/estrus.rb --queries=100 --output_dir=~/ics/backend/estrus_data
#
# Output:
#
# idx datetime secs msec/query hits shards_successful index nodename query_term
#
# Setup
#
# sudo apt-get install -y libcurl4-dev wamerican-large
# sudo gem install rubberband configliere
# ,tweet-2010q1
Settings.define :words_file, :default => "/usr/share/dict/words", :description => "Flat file with words to use"
Settings.define :offset_start, :default => 50_000, :description => "Where to start reading words", :type => Integer
Settings.define :offset_scale, :default => 100, :description => "How far in the file to range", :type => Integer
Settings.define :queries, :default => 10, :description => "Number of queries to run", :type => Integer
Settings.define :es_indexes, :default => 'tweet-2009q3pre,tweet-2009q4,tweet-2010q1,tweet-201004,tweet-201005,tweet-201005,tweet-201006,tweet-201007,tweet-201008,tweet-201009,tweet-201010,tweet-201011', :description => "Elasticsearch index to query against", :type => Array
Settings.define :output_dir, :default => nil, :description => "If given, the output is directed to a file named :output_dir/{date}/es-{datetime}-{comment}-{hostname}.tsv"
Settings.define :comment, :default => nil, :description => "If given, it is included in the filename"
Settings.define :host, :default => `hostname`.chomp, :description => "Host of ES query server"
Settings.define :port, :default => '9200', :description => "Port for ES query server"
Settings.resolve!
NODENAME = File.read('/etc/node_name').chomp rescue `hostname`.chomp
CLIENTS = Settings.es_indexes.inject([]){|clients, index| clients << [index, ElasticSearch.new("#{Settings.host}:#{Settings.port}", :index => index, :type => "tweet")] ; clients }
class StressTester
attr_accessor :started_at
def initialize
self.started_at = Time.now.utc
end
def words_file &block
File.open(Settings.words_file, &block)
end
def random_offset
Settings.offset_start + rand(1000)*Settings.offset_scale rescue nil
end
def output_file
return @output_file if @output_file
return $stdout if Settings.output_dir.blank?
datehr = started_at.strftime("%Y%m%d%H")
datetime = started_at.to_flat
output_filename = File.expand_path(File.join(Settings.output_dir, datehr,
["es", datetime, NODENAME, Settings.comment].compact.join('-')+".tsv"))
FileUtils.mkdir_p(File.dirname(output_filename))
@output_file = File.open(output_filename, "a")
end
def dump *args
output_file << args.join("\t")+"\n"
end
def each_word &block
words_file do |words_file|
random_offset.times{ words_file.readline }
loop do
word = words_file.readline.chomp rescue nil
break unless word
next if word =~ /\W/
yield word
end
end
end
end
class Time ; def to_flat() strftime("%Y%m%d%H%M%S"); end ; end
class Array ; def random() self[rand(length)] ; end ; end
tester = StressTester.new
n_queries_executed = 0
tester.each_word do |query_string|
index, client = CLIENTS.random
result = client.search "text:#{query_string}"
elapsed = Time.now.utc - tester.started_at
n_queries_executed += 1
tester.dump(
n_queries_executed, Time.now.utc.to_flat, "%7.1f"%elapsed,
"%7.1f"%( 1000 * elapsed / n_queries_executed.to_f ),
result.total_entries, result._shards['successful'],
index, NODENAME,
query_string)
$stderr.puts(n_queries_executed) if n_queries_executed % 20 == 0
break if n_queries_executed >= Settings.queries
end
# query_string = 'verizon'
# CLIENTS.each do |index,client|
# result = client.search "text:#{query_string}"
# elapsed = Time.now.utc - tester.started_at
# n_queries_executed += 1
# tester.dump(
# n_queries_executed, Time.now.utc.to_flat, "%7.1f"%elapsed,
# "%7.1f"%( 1000 * elapsed / n_queries_executed.to_f ),
# result.total_entries, result._shards['successful'],
# index, NODENAME,
# query_string)
#
# end
#
# TODO: monkeypatch rubberband to use keepalives:
#
# def connect!
# unless defined?(@@patron_session)
# @@patron_session = Patron::Session.new
# @session = @@patron_session
# @session.base_url = @server
# @session.timeout = @options[:timeout]
# @session.headers['User-Agent'] = 'ElasticSearch.rb v0.1'
# @session.headers['Connection'] = 'Keep-Alive'
# else
# @session = @@patron_session
# end
# @request_count = 1
# end
Jump to Line
Something went wrong with that request. Please try again.