Permalink
Browse files

super pimped environment with automatic clustering by any timebase an…

…d polynomial regression with derivatives
  • Loading branch information...
1 parent d365f54 commit b74fcfdcdf607c69c3b31638d3807661f43d1a49 Thomas Fankhauser committed Apr 19, 2012
View
BIN .DS_Store
Binary file not shown.
View
@@ -5,6 +5,7 @@
require 'eventmachine'
require 'em-http'
require 'hirb'
+require 'matrix'
require "bigbench/float_extensions"
require "bigbench/version"
@@ -18,6 +19,7 @@
require "bigbench/store"
require "bigbench/bot"
require "bigbench/output"
+require "bigbench/post_processor/environment"
require "bigbench/post_processor"
require "bigbench/post_processor/statistics"
@@ -118,31 +118,6 @@ def message
end
end
- # The environment in which the post processors are evaluated. Every method defined here is available in
- # the post_process block and run! methods of the predefined post processors
- module Environment
-
- # Iterates through every tracking and returns a tracking hash of the following form:
- #
- # {
- # :elapsed => 2.502132,
- # :start => 1333986292.1755981,
- # :stop => 1333986293.618884,
- # :duration => 1443,
- # :benchmark => "index page",
- # :url => "http://www.google.de/",
- # :path => "/",
- # :method => "get",
- # :status => 200
- # }
- #
- def each_tracking
- File.open(BigBench.config.output, "r+") do |file|
- file.each_line { |line| yield JSON.parse(line).inject({}){|memo,(k,v)| memo[k.to_sym] = v; memo} unless line.blank? }
- end
- end
- end
-
end
# To setup a post processor simply do this:
@@ -0,0 +1,213 @@
+module BigBench
+ module PostProcessor
+
+ # The environment in which the post processors are evaluated. Every method defined here is available in
+ # the post_process block and run! methods of the predefined post processors
+ module Environment
+
+ @trackings = nil
+ @cluster = nil
+ @regressions = nil
+
+ # Iterates through every tracking and returns a tracking hash of the following form:
+ #
+ # {
+ # :elapsed => 2.502132,
+ # :start => 1333986292.1755981,
+ # :stop => 1333986293.618884,
+ # :duration => 1443,
+ # :benchmark => "index page",
+ # :url => "http://www.google.de/",
+ # :path => "/",
+ # :method => "get",
+ # :status => 200
+ # }
+ #
+ def each_tracking
+ File.open(BigBench.config.output, "r+") do |file|
+ file.each_line { |line| yield JSON.parse(line).inject({}){|memo,(k,v)| memo[k.to_sym] = v; memo} unless line.blank? }
+ end
+ end
+
+ # Puts all tracking hashes into a huge array. Warning, this method call might take quite long!
+ # The results are cached, so you can call <tt>trackings</tt> in the future without any pain
+ def trackings
+ return @trackings unless @trackings.nil?
+ @trackings = []
+ each_tracking{ |tracking| @trackings << tracking }
+ @trackings
+ end
+
+ # Returns a clustered overview of all trackings. By default the trackings are clustered by second, but you
+ # can also specify any ammount of seconds to group together. A cluster then has the following methods:
+ #
+ # # Duration was 120 seconds
+ # cluster.timesteps # => [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,..., 120] (seconds)
+ # cluster.durations # => [50.3, 51.2, 40.3, 51.3, 50.3, 55.3, 52.3, 50.3, 51.3, 50.3, 54.3,..., 50.3] (average duration in milliseconds)
+ # cluster.requests # => [580, 569, 540, 524, 524, 525, 528, 520, 529, 527, 523,..., 524] (requests in that second)
+ #
+ # # Duration was 120 seconds = 2 minutes
+ # cluster(1.minute).timesteps # => [0, 1] (minutes)
+ # cluster(1.minute).durations # => [50.3, 51.2] (average duration in milliseconds)
+ # cluster(1.minute).requests # => [27836, 27684] (requests in that minute)
+ #
+ def cluster(timebase = 1.second)
+ @cluster = [] if @cluster.nil?
+ return @cluster[timebase] unless @cluster[timebase].nil?
+ @cluster[timebase] = Cluster.new(timebase)
+ end
+
+ # Returns a polynomial regression of a degree, a derivation and a timebase. Possible options are:
+ #
+ # [:degree] By default the degree is 1 which results in a linear regression. There's no limit to the degree.
+ # [:derivation] By default the normal function, which means no derivation is returned. Currently only the first derivation is supported.
+ # [:timebase] By default the cluster size is 1.second. Any timelimit can be added here, e.g. 1.minute
+ #
+ #
+ # # Return a linear regression for the durations, clustered by seconds
+ # polynomial_regression.durations.y
+ # polynomial_regression(:degree => 1, :timebase => 1.second).durations.y
+ # polynomial_regression(:degree => 1, :timebase => 1.second).durations.derivation(0)
+ #
+ # # Return the first derivation of the linear regression for the durations, clustered by seconds
+ # polynomial_regression(:degree => 1).durations.derivation(1)
+ # polynomial_regression(:degree => 1, :timebase => 1.second).durations.derivation(1)
+ #
+ # # Return a second degree polynomial regression for the durations, clustered by seconds
+ # polynomial_regression(:degree => 2).durations.derivation(0)
+ # polynomial_regression(:degree => 2, :timebase => 1.second).durations.derivation(0)
+ #
+ # # Return the first derivation of the second degree polynomial regression for the durations, clustered by seconds
+ # polynomial_regression(:degree => 2).durations.derivation(0)
+ # polynomial_regression(:degree => 2, :timebase => 1.second).durations.derivation(0)
+ #
+ def polynomial_regression(new_options = {})
+ options = { :degree => 1, :derivation => 0, :timebase => 1.second }.merge(new_options)
+ degree_and_timebase = [options[:degree], options[:timebase]]
+
+ @regressions = {} if @regressions.nil?
+ return @regressions[degree_and_timebase] unless @regressions[degree_and_timebase].nil?
+
+ @regressions[degree_and_timebase] ||= PolynomialCluster.new(options[:degree], options[:timebase])
+ end
+
+ # Adding the sum and average methods to the default array
+ class ::Array
+ def sum
+ reduce(:+).to_f
+ end
+
+ def average
+ sum / size.to_f
+ end
+ end
+
+ # Clusters the trackings in the specified timebase. By default everything is clustered by seconds.
+ class Cluster
+ include Environment
+
+ attr_accessor :timesteps
+ attr_accessor :durations
+ attr_accessor :requests
+
+ def initialize(timebase = 1.second)
+ @timesteps, @durations, @durations_array, @requests = [], [], [], []
+
+ # Cluster trackings
+ each_tracking do |tracking|
+ timestep = tracking[:elapsed].to_i / timebase
+
+ @timesteps[timestep] = timestep
+
+ @durations_array[timestep] = [] unless @durations_array[timestep].is_a?(Array)
+ @durations_array[timestep] << tracking[:duration]
+
+ @requests[timestep] = 0 if @requests[timestep].nil?
+ @requests[timestep] += 1
+ end
+
+ # Compute mean of durations
+ @timesteps.each { |timestep| @durations[timestep] = @durations_array[timestep].average }
+ end
+ end
+
+
+ # This class performs the actual regression for a specfied degree and timebase. As x it returns the timebase
+ # values for the corresponding timebase - e.g. the seconds - and as y it returns the corresponding regression
+ # values. Additionally it offers the derivations for the regressions with the <tt>deviation</tt> method.
+ class PolynomialRegression
+
+ # An array with the seconds in the timebase
+ #
+ # [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+ #
+ attr_reader :x
+ attr_reader :degree
+
+ # Returns the coefficients calculated for the regression and the degree like
+ #
+ # [3.428734, 1.176235]
+ #
+ attr_reader :coefficients
+
+ def initialize x, y, degree
+ @x, @degree, @derivations, @formulas = x, degree, [], []
+
+ # Perform regression
+ x_data = x.map { |xi| (0..degree).map { |pow| (xi**pow).to_f } }
+ mx = ::Matrix[*x_data]
+ my = ::Matrix.column_vector(y)
+
+ # Calculate coefficients
+ @coefficients = ((mx.t * mx).inv * mx.t * my).transpose.to_a[0]
+
+ # Setup functions that map the actual polynom
+ @derivations << lambda { |x| (0..@degree).to_a.inject(0) { |result, d| result + (@coefficients[d] * x**d) }}
+ @derivations << lambda { |x| (1..@degree).to_a.inject(0) { |result, d| result + (d * (@coefficients[d] * x**(d-1))) }}
+
+ # Store formulas for printing
+ @formulas << (0..@degree).to_a.map { |d| d == 0 ? @coefficients[d] : "#{@coefficients[d]}x^#{d}" }
+ @formulas << (1..@degree).to_a.map { |d| d == 1 ? @coefficients[d] : "#{d}*#{@coefficients[d]}x^#{d-1}" }
+ end
+
+ # Returns an array with the computed y-values for the corresponding derivation. The default derivation is the
+ # 0. derivation which is the original regression that is equal to the <tt>y</tt> method. The result looks like this:
+ #
+ # [2.5, 2.6, 2.7, 2.8, 3.0, 3.2, 3.4, 3.8, 4.0, 4.9]
+ #
+ def derivation(derivation = 0)
+ @x.map{ |x| @derivations[derivation].call(x) }
+ end
+
+ # Returns an array with the regression values like this:
+ #
+ # [2.5, 2.6, 2.7, 2.8, 3.0, 3.2, 3.4, 3.8, 4.0, 4.9]
+ #
+ def y
+ derivation(0)
+ end
+
+ # Returns the printed formula of this polynom
+ def formula(derivation = 0)
+ @formulas[derivation].join " + "
+ end
+
+ end
+
+
+ class PolynomialCluster
+ include Environment
+
+ attr_reader :durations
+ attr_reader :requests
+
+ def initialize degree = 0, timebase = 1.second
+ @durations = PolynomialRegression.new(cluster(timebase).timesteps, cluster(timebase).durations, degree)
+ @requests = PolynomialRegression.new(cluster(timebase).timesteps, cluster(timebase).requests, degree)
+ end
+
+ end
+
+ end
+ end
+end
@@ -0,0 +1,19 @@
+module BigBench
+ module PostProcessor
+ module Graphs
+ require 'gruff'
+
+ def self.run!(options)
+ puts "Seconds are: #{cluster.timesteps}"
+ puts "Durations are: #{cluster.durations}"
+ puts "Requests are: #{cluster.requests}"
+
+ puts "Minutes are: #{cluster(1.minute).timesteps}"
+ puts "Durations are: #{cluster(1.minute).durations}"
+ puts "Requests are: #{cluster(1.minute).requests}"
+
+ end
+
+ end
+ end
+end
@@ -0,0 +1,96 @@
+require_relative "../helpers"
+
+describe BigBench::PostProcessor::Environment do
+
+ before(:each) do
+ BigBench.config.duration = 2.minutes
+ BigBench.config.output = "spec/tests/sample_results_big.ljson"
+
+ class << self
+ include BigBench::PostProcessor::Environment
+ end
+
+ @total_trackings = 51_925
+ end
+
+ it "should allow to iterate over each tracking" do
+ all_trackings = 0
+ each_tracking do |tracking|
+ tracking.is_a?(Hash).should be_true
+ all_trackings += 1
+ end
+
+ all_trackings.should == @total_trackings
+ end
+
+ it "should collect a trackings array with all trackings" do
+ trackings.size.should == @total_trackings
+ trackings.each do |tracking|
+ tracking.is_a?(Hash).should be_true
+ end
+ end
+
+ it "should cluster to seconds by default" do
+ cluster.timesteps.size.should == 120
+ cluster(1.second).timesteps.size.should == 120
+ cluster(1.minute).timesteps.size.should == 2
+ cluster.durations.size.should == 120
+ cluster.requests.size.should == 120
+ cluster.durations.each{ |duration| duration.is_a?(Float) }
+ cluster.requests.each{ |requests| requests.is_a?(Integer) }
+ end
+
+ context "linear regression" do
+
+ it "should create durations with the default timebase" do
+ polynomial_regression.durations.x.size.should == 120
+ polynomial_regression.durations.y.size.should == 120
+ polynomial_regression.durations.derivation(0).size.should == 120
+ polynomial_regression.durations.derivation(1).size.should == 120
+ polynomial_regression.durations.degree.should == 1
+ polynomial_regression.durations.coefficients.should == [2.557222558648473, -0.008579877560828252]
+ polynomial_regression.durations.formula.should == "2.557222558648473 + -0.008579877560828252x^1"
+ polynomial_regression.durations.formula(1).should == "-0.008579877560828252"
+ end
+
+ it "should create requests with the default timebase" do
+ polynomial_regression.requests.x.size.should == 120
+ polynomial_regression.requests.y.size.should == 120
+ polynomial_regression.requests.derivation(0).size.should == 120
+ polynomial_regression.requests.derivation(1).size.should == 120
+
+ polynomial_regression.requests.degree.should == 1
+ polynomial_regression.requests.coefficients.should == [498.5461432506892, -1.1065178137370641]
+ polynomial_regression.requests.formula.should == "498.5461432506892 + -1.1065178137370641x^1"
+ polynomial_regression.requests.formula(1).should == "-1.1065178137370641"
+ end
+
+ it "should create durations with a custom timebase" do
+ polynomial_regression(:timebase => 1.minute).durations.x.size.should == 2
+ polynomial_regression(:timebase => 1.minute).durations.y.size.should == 2
+ polynomial_regression(:timebase => 1.minute).durations.derivation(0).size.should == 2
+ polynomial_regression(:timebase => 1.minute).durations.derivation(1).size.should == 2
+ polynomial_regression(:timebase => 1.minute).durations.degree.should == 1
+ polynomial_regression(:timebase => 1.minute).durations.coefficients.should == [1.5834530823394166, 0.3267548964060689]
+ polynomial_regression(:timebase => 1.minute).durations.formula.should == "1.5834530823394166 + 0.3267548964060689x^1"
+ polynomial_regression(:timebase => 1.minute).durations.formula(1).should == "0.3267548964060689"
+ end
+
+ end
+
+ context "quadratic regression" do
+
+ it "should create durations with the default timebase" do
+ polynomial_regression(:degree => 2).durations.x.size.should == 120
+ polynomial_regression(:degree => 2).durations.y.size.should == 120
+ polynomial_regression(:degree => 2).durations.derivation(0).size.should == 120
+ polynomial_regression(:degree => 2).durations.derivation(1).size.should == 120
+ polynomial_regression(:degree => 2).durations.degree.should == 2
+ polynomial_regression(:degree => 2).durations.coefficients.should == [3.702832696564527, -0.06683124050571232, 0.0004895072516376826]
+ polynomial_regression(:degree => 2).durations.formula.should == "3.702832696564527 + -0.06683124050571232x^1 + 0.0004895072516376826x^2"
+ polynomial_regression(:degree => 2).durations.formula(1).should == "-0.06683124050571232 + 2*0.0004895072516376826x^1"
+ end
+
+ end
+
+end
Oops, something went wrong.

0 comments on commit b74fcfd

Please sign in to comment.