Merge pull request #90 from kbrock/save_results

Save job (alternative to hold)
evanphx · Nov 25, 2018 · 0bb23ea · 0bb23ea
2 parents 8a9060d + dfe7d29
commit 0bb23ea
Show file tree

Hide file tree

Showing 9 changed files with 173 additions and 69 deletions.
diff --git a/README.md b/README.md
@@ -155,6 +155,11 @@ This will run only one benchmarks each time you run the command, storing
 results in the specified file. The file is deleted when all results have been
 gathered and the report is shown.
 
+Alternatively, if you prefer a different approach, the `save!` command is
+available. Examples for [hold!](examples/hold.rb) and [save!](examples/save.rb) are available in
+the `examples/` directory.
+
+
 ### Multiple iterations
 
 In some cases you may want to run multiple iterations of the warmup and

diff --git a/examples/save.rb b/examples/save.rb
@@ -0,0 +1,50 @@
+#!/usr/bin/env ruby
+
+# example to explain save!
+# The save! feature expects to be run twice, generally with different Rubys.
+# save! can also be used to compare modules changes which impact the run time
+#
+# If you're comparing ruby versions, Just use the version in the label
+#
+#     x.report("ruby #{RUBY_VERSION}") { 'Bruce'.inspect }
+#
+# Or use a hash
+# 
+#     x.report("version" => RUBY_VERSION, "method" => 'bruce') { 'Bruce'.inspect }
+#
+# RUN_1: SAVE_FILE='run1.out' ruby examples/hold.rb
+# Warming up --------------------------------------
+#             without   172.168k i/100ms
+# Calculating -------------------------------------
+#             without      2.656M (± 3.3%) i/s -     13.429M in   5.062098s
+#
+# RUN_2: SAVE_FILE='run1.out' WITH_MODULE=true ruby examples/hold.rb
+# Warming up --------------------------------------
+#                 with    92.087k i/100ms
+# Calculating -------------------------------------
+#                 with      1.158M (± 1.4%) i/s -      5.801M in   5.010084s
+#
+# Comparison:
+#              without:  2464721.3 i/s
+#                 with:  1158179.6 i/s - 2.13x  slower
+# CLEANUP: rm run1.out
+
+require 'benchmark/ips'
+
+Benchmark.ips do |x|
+  x.report(ENV['WITH_MODULE'] == 'true' ? 'with' : 'without') do
+    'Bruce'.inspect
+  end
+
+  if ENV['WITH_MODULE'] == 'true'
+    class String
+      def inspect
+        result = %w[Bruce Wayne is Batman]
+        result.join(' ')
+      end
+    end
+  end
+
+  x.save! ENV['SAVE_FILE'] if ENV['SAVE_FILE']
+  x.compare!
+end
diff --git a/lib/benchmark/compare.rb b/lib/benchmark/compare.rb
@@ -40,18 +40,14 @@ def compare(*entries)
 
       $stdout.puts "\nComparison:"
 
-      $stdout.printf "%20s: %10.1f i/s\n", best.label, best.stats.central_tendency
+      $stdout.printf "%20s: %10.1f i/s\n", best.label.to_s, best.stats.central_tendency
 
       sorted.each do |report|
         name = report.label.to_s
 
         $stdout.printf "%20s: %10.1f i/s - ", name, report.stats.central_tendency
 
-        best_low = best.stats.central_tendency - best.stats.error
-        report_high = report.stats.central_tendency + report.stats.error
-        overlaps = report_high > best_low
-
-        if overlaps
+        if report.stats.overlaps?(best.stats)
           $stdout.print "same-ish: difference falls within error"
         else
           slowdown, error = report.stats.slowdown(best.stats)

diff --git a/lib/benchmark/ips.rb b/lib/benchmark/ips.rb
@@ -1,6 +1,7 @@
 # encoding: utf-8
 require 'benchmark/timing'
 require 'benchmark/compare'
+require 'benchmark/ips/stats/stats_metric'
 require 'benchmark/ips/stats/sd'
 require 'benchmark/ips/stats/bootstrap'
 require 'benchmark/ips/report'
@@ -54,10 +55,17 @@ def ips(*args)
 
       yield job
 
-      job.load_held_results if job.hold? && job.held_results?
+      job.load_held_results
 
       job.run
 
+      if job.run_single? && job.all_results_have_been_run?
+        job.clear_held_results
+      else
+        job.save_held_results
+        puts '', 'Pausing here -- run Ruby again to measure the next benchmark...' if job.run_single?
+      end
+
       $stdout.sync = sync
       job.run_comparison
       job.generate_json

diff --git a/lib/benchmark/ips/job.rb b/lib/benchmark/ips/job.rb
@@ -58,6 +58,7 @@ def initialize opts={}
         @stdout = opts[:quiet] ? nil : StdoutReport.new
         @list = []
         @compare = false
+        @run_single = false
         @json_path = false
         @held_path = nil
         @held_results = nil
@@ -94,7 +95,7 @@ def compare?
         @compare
       end
 
-      # Set @compare to true.
+      # Run comparison utility.
       def compare!
         @compare = true
       end
@@ -105,9 +106,27 @@ def hold?
         !!@held_path
       end
 
-      # Set @hold to true.
+      # Hold after each iteration.
+      # @param held_path [String] File name to store hold file.
       def hold!(held_path)
         @held_path = held_path
+        @run_single = true
+      end
+
+      # Save interim results. Similar to hold, but all reports are run
+      # The report label must change for each invocation.
+      # One way to achieve this is to include the version in the label.
+      # @param held_path [String] File name to store hold file.
+      def save!(held_path)
+        @held_path = held_path
+        @run_single = false
+      end
+
+      # Return true if items are to be run one at a time.
+      # For the traditional hold, this is true
+      # @return [Boolean] Run just a single item?
+      def run_single?
+        @run_single
       end
 
       # Return true if job needs to generate json.
@@ -116,7 +135,7 @@ def json?
         !!@json_path
       end
 
-      # Set @json_path to given path, defaults to "data.json".
+      # Generate json to given path, defaults to "data.json".
       def json!(path="data.json")
         @json_path = path
       end
@@ -167,16 +186,38 @@ def iterations_per_sec cycles, time_us
         MICROSECONDS_PER_SECOND * (cycles.to_f / time_us.to_f)
       end
 
-      def held_results?
-        File.exist?(@held_path)
+      def load_held_results
+        return unless @held_path && File.exist?(@held_path)
+        require "json"
+        @held_results = {}
+        JSON.load(IO.read(@held_path)).each do |result|
+          @held_results[result['item']] = result
+          create_report(result['item'], result['measured_us'], result['iter'],
+                        create_stats(result['samples']), result['cycles'])
+        end
       end
 
-      def load_held_results
+      def save_held_results
+        return unless @held_path
         require "json"
-        @held_results = Hash[File.open(@held_path).map { |line|
-          result = JSON.parse(line)
-          [result['item'], result]
-        }]
+        data = full_report.entries.map { |e|
+          {
+            'item' => e.label,
+            'measured_us' => e.microseconds,
+            'iter' => e.iterations,
+            'samples' => e.samples,
+            'cycles' => e.measurement_cycle
+          }
+        }
+        IO.write(@held_path, JSON.generate(data) << "\n")
+      end
+
+      def all_results_have_been_run?
+        @full_report.entries.size == @list.size
+      end
+
+      def clear_held_results
+        File.delete @held_path if File.exist?(@held_path)
       end
 
       def run
@@ -189,24 +230,17 @@ def run
 
         @stdout.start_running if @stdout
 
-        held = nil
-
         @iterations.times do |n|
-          held = run_benchmark
+          run_benchmark
         end
 
         @stdout.footer if @stdout
-
-        if held
-          puts
-          puts 'Pausing here -- run Ruby again to measure the next benchmark...'
-        end
       end
 
       # Run warmup.
       def run_warmup
         @list.each do |item|
-          next if hold? && @held_results && @held_results.key?(item.label)
+          next if run_single? && @held_results && @held_results.key?(item.label)
 
           @suite.warming item.label, @warmup if @suite
           @stdout.warming item.label, @warmup if @stdout
@@ -232,19 +266,14 @@ def run_warmup
           @stdout.warmup_stats warmup_time_us, @timing[item] if @stdout
           @suite.warmup_stats warmup_time_us, @timing[item] if @suite
 
-          break if hold?
+          break if run_single?
         end
       end
 
       # Run calculation.
       def run_benchmark
         @list.each do |item|
-          if hold? && @held_results && @held_results.key?(item.label)
-           result = @held_results[item.label]
-            create_report(item.label, result['measured_us'], result['iter'],
-                          create_stats(result['samples']), result['cycles'])
-            next
-          end
+          next if run_single? && @held_results && @held_results.key?(item.label)
 
           @suite.running item.label, @time if @suite
           @stdout.running item.label, @time if @stdout
@@ -291,28 +320,8 @@ def run_benchmark
           @stdout.add_report rep, caller(1).first if @stdout
           @suite.add_report rep, caller(1).first if @suite
 
-          if hold? && item != @list.last
-            File.open @held_path, "a" do |f|
-              require "json"
-              f.write JSON.generate({
-                :item => item.label,
-                :measured_us => measured_us,
-                :iter => iter,
-                :samples => samples,
-                :cycles => cycles
-              })
-              f.write "\n"
-            end
-
-            return true
-          end
+          break if run_single?
         end
-
-        if hold? && @full_report.entries.size == @list.size
-          File.delete @held_path if File.exist?(@held_path)
-        end
-
-        false
       end
 
       def create_stats(samples)

diff --git a/lib/benchmark/ips/report.rb b/lib/benchmark/ips/report.rb
@@ -52,6 +52,10 @@ def ips_sd
           @stats.error
         end
 
+        def samples
+          @stats.samples
+        end
+
         # Number of Cycles.
         # @return [Integer] number of cycles.
         attr_reader :measurement_cycle
@@ -72,7 +76,7 @@ def seconds
         # Return entry's standard deviation of iteration per second in percentage.
         # @return [Float] +@ips_sd+ in percentage.
         def error_percentage
-          100.0 * (@stats.error.to_f / @stats.central_tendency)
+          @stats.error_percentage
         end
 
         alias_method :runtime, :seconds
@@ -84,7 +88,7 @@ def error_percentage
         def body
           case Benchmark::IPS.options[:format]
           when :human
-            left = "%s (±%4.1f%%) i/s" % [Helpers.scale(@stats.central_tendency), error_percentage]
+            left = "%s (±%4.1f%%) i/s" % [Helpers.scale(@stats.central_tendency), @stats.error_percentage]
             iters = Helpers.scale(@iterations)
 
             if @show_total_time
@@ -93,7 +97,7 @@ def body
               left.ljust(20) + (" - %s" % iters)
             end
           else
-            left = "%10.1f (±%.1f%%) i/s" % [@stats.central_tendency, error_percentage]
+            left = "%10.1f (±%.1f%%) i/s" % [@stats.central_tendency, @stats.error_percentage]
 
             if @show_total_time
               left.ljust(20) + (" - %10d in %10.6fs" % [@iterations, runtime])

diff --git a/lib/benchmark/ips/stats/bootstrap.rb b/lib/benchmark/ips/stats/bootstrap.rb
@@ -3,27 +3,30 @@ module IPS
     module Stats
 
       class Bootstrap
-
-        attr_reader :data
+        include StatsMetric
+        attr_reader :data, :error, :samples
 
         def initialize(samples, confidence)
           dependencies
           @iterations = 10_000
           @confidence = (confidence / 100.0).to_s
+          @samples = samples
           @data = Kalibera::Data.new({[0] => samples}, [1, samples.size])
           interval = @data.bootstrap_confidence_interval(@iterations, @confidence)
           @median = interval.median
           @error = interval.error
         end
 
+        # Average stat value
+        # @return [Float] central_tendency
         def central_tendency
           @median
         end
 
-        def error
-          @error
-        end
-
+        # Determines how much slower this stat is than the baseline stat
+        # if this average is lower than the faster baseline, higher average is better (e.g. ips) (calculate accordingly)
+        # @param baseline [SD|Bootstrap] faster baseline
+        # @returns [Array<Float, nil>] the slowdown and the error (not calculated for standard deviation)
         def slowdown(baseline)
           low, slowdown, high = baseline.data.bootstrap_quotient(@data, @iterations, @confidence)
           error = Timing.mean([slowdown - low, high - slowdown])