Merge 3c6c7eb into f817d97

bencheeorg · Mar 27, 2019 · a56684b · a56684b
2 parents f817d97 + 3c6c7eb
commit a56684b
Show file tree

Hide file tree

Showing 10 changed files with 314 additions and 234 deletions.
diff --git a/README.md b/README.md
@@ -718,6 +718,7 @@ map_fun = fn i -> [i, i * i] end
 )
 |> Benchee.collect()
 |> Benchee.statistics()
+|> Benchee.relative_statistics()
 |> Benchee.Formatter.output(Benchee.Formatters.Console)
 # Instead of the last call you could also just use Benchee.Formatter.output()
 # to just output all configured formatters
@@ -729,8 +730,9 @@ This is a take on the _functional transformation_ of data applied to benchmarks:
 2. Gather System data
 3. Define the functions to be benchmarked
 4. Run benchmarks with the given configuration gathering raw run times per function
-5. Generate statistics based on the raw run times
-6. Format the statistics in a suitable way and print them out
+5. Calculate statistics based on the raw run times
+6. Calculate statistics between the scenarios (faster/slower...)
+7. Format the statistics in a suitable way and print them out
 
 This is also part of the **official API** and allows for more **fine grained control**. (It's also what benchee does internally when you use `Benchee.run/2`).
 

diff --git a/lib/benchee.ex b/lib/benchee.ex
@@ -61,6 +61,7 @@ for {module, moduledoc} <- [{Benchee, elixir_doc}, {:benchee, erlang_doc}] do
       |> Benchee.collect()
       |> Benchee.statistics()
       |> Benchee.load()
+      |> Benchee.relative_statistics()
       |> Formatter.output()
     end
 
@@ -86,6 +87,7 @@ for {module, moduledoc} <- [{Benchee, elixir_doc}, {:benchee, erlang_doc}] do
     defdelegate collect(suite), to: Benchee.Benchmark
     defdelegate collect(suite, printer), to: Benchee.Benchmark
     defdelegate statistics(suite), to: Benchee.Statistics
+    defdelegate relative_statistics(suite), to: Benchee.RelativeStatistics
     defdelegate load(suite), to: Benchee.ScenarioLoader
   end
 end
diff --git a/lib/benchee/formatters/console.ex b/lib/benchee/formatters/console.ex
@@ -8,7 +8,7 @@ defmodule Benchee.Formatters.Console do
       flat_map           2.40 K      417.00 μs     ±9.40%      411.45 μs      715.21 μs
       map.flatten        1.24 K      806.89 μs    ±16.62%      768.02 μs     1170.67 μs
 
-      Comparison: 
+      Comparison:
       flat_map           2.40 K
       map.flatten        1.24 K - 1.93x slower
 
@@ -26,8 +26,6 @@ defmodule Benchee.Formatters.Console do
   alias Benchee.Suite
   alias Benchee.Formatters.Console.{Memory, RunTime}
 
-  def format(suite), do: format(suite, %{})
-
   @doc """
   Formats the benchmark statistics to a report suitable for output on the CLI.
 
@@ -80,7 +78,7 @@ defmodule Benchee.Formatters.Console do
   """
   @impl true
   @spec format(Suite.t(), map) :: [any]
-  def format(%Suite{scenarios: scenarios, configuration: config}, options) do
+  def format(%Suite{scenarios: scenarios, configuration: config}, options \\ %{}) do
     if Map.has_key?(options, :unit_scaling), do: warn_unit_scaling()
 
     config =
@@ -109,14 +107,12 @@ defmodule Benchee.Formatters.Console do
     end
   end
 
-  def write(suite), do: write(suite, %{})
-
   @doc """
   Takes the output of `format/1` and writes that to the console.
   """
   @impl true
   @spec write(any, map) :: :ok | {:error, String.t()}
-  def write(output, _options) do
+  def write(output, _options \\ %{}) do
     IO.write(output)
   rescue
     _ -> {:error, "Unknown Error"}

diff --git a/lib/benchee/relative_statistics.ex b/lib/benchee/relative_statistics.ex
@@ -0,0 +1,98 @@
+defmodule Benchee.RelativeStatistics do
+  @moduledoc """
+  Statistics that are relative from one scenario to another.
+
+  Such as how much slower/faster something is or what the absolute difference is in the measured
+  values.
+  Is its own step because it has to be executed after scenarios have been loaded via
+  `Benchee.ScenarioLoader` to include them in the calculation, while `Benchee.Statistics`
+  has to happen before they are loaded to avoid recalculating their statistics.
+  """
+
+  alias Benchee.{Scenario, Statistics, Suite}
+
+  @doc """
+  Calculate the statistics of scenarios relative to each other and sorts scenarios.
+
+  Such as `relative_more`, `relative_less` and `absolute_difference`,
+  see `t:Benchee.Statistics.t/0` for more.
+
+  The sorting of scenarios is important so that they always have the same order in
+  all formatters. Scenarios are sorted first by run time average, then by memory average.
+  """
+  @spec relative_statistics(Suite.t()) :: Suite.t()
+  def relative_statistics(suite) do
+    scenarios =
+      suite.scenarios
+      |> sort()
+      |> calculate_relative_statistics(suite.configuration.inputs)
+
+    %Suite{suite | scenarios: scenarios}
+  end
+
+  defp calculate_relative_statistics([], _inputs), do: []
+
+  defp calculate_relative_statistics(scenarios, inputs) do
+    scenarios
+    |> scenarios_by_input(inputs)
+    |> Enum.flat_map(fn scenarios_with_same_input ->
+      {reference, others} = split_reference_scenario(scenarios_with_same_input)
+      others_with_relative = statistics_relative_to(others, reference)
+      [reference | others_with_relative]
+    end)
+  end
+
+  @spec sort([Scenario.t()]) :: [Scenario.t()]
+  defp sort(scenarios) do
+    Enum.sort_by(scenarios, fn scenario ->
+      {scenario.run_time_data.statistics.average, scenario.memory_usage_data.statistics.average}
+    end)
+  end
+
+  defp scenarios_by_input(scenarios, nil), do: [scenarios]
+
+  # we can't just group_by `input_name` because that'd lose the order of inputs which might
+  # be important
+  defp scenarios_by_input(scenarios, inputs) do
+    Enum.map(inputs, fn {input_name, _} ->
+      Enum.filter(scenarios, fn scenario -> scenario.input_name == input_name end)
+    end)
+  end
+
+  # right now we take the first scenario as we sorted them and it is the fastest,
+  # whenever we implement #179 though this becomesd more involved
+  defp split_reference_scenario(scenarios) do
+    [reference | others] = scenarios
+    {reference, others}
+  end
+
+  defp statistics_relative_to(scenarios, reference) do
+    Enum.map(scenarios, fn scenario ->
+      scenario
+      |> update_in([Access.key!(:run_time_data), Access.key!(:statistics)], fn statistics ->
+        add_relative_statistics(statistics, reference.run_time_data.statistics)
+      end)
+      |> update_in([Access.key!(:memory_usage_data), Access.key!(:statistics)], fn statistics ->
+        add_relative_statistics(statistics, reference.memory_usage_data.statistics)
+      end)
+    end)
+  end
+
+  # we might not run time/memory --> we shouldn't crash then ;)
+  defp add_relative_statistics(statistics = %{average: nil}, _reference), do: statistics
+  defp add_relative_statistics(statistics, %{average: nil}), do: statistics
+
+  defp add_relative_statistics(statistics, reference_statistics) do
+    %Statistics{
+      statistics
+      | relative_more: zero_safe_division(statistics.average, reference_statistics.average),
+        relative_less: zero_safe_division(reference_statistics.average, statistics.average),
+        absolute_difference: statistics.average - reference_statistics.average
+    }
+  end
+
+  defp zero_safe_division(0.0, 0.0), do: 1.0
+  defp zero_safe_division(_, 0), do: :infinity
+  defp zero_safe_division(_, 0.0), do: :infinity
+  defp zero_safe_division(a, b), do: a / b
+end
diff --git a/lib/benchee/statistics.ex b/lib/benchee/statistics.ex
@@ -6,7 +6,7 @@ defmodule Benchee.Statistics do
   See `statistics/1` for a breakdown of the included statistics.
   """
 
-  alias Benchee.{Conversion.Duration, Scenario, Suite, Utility.Parallel}
+  alias Benchee.{Conversion.Duration, Suite, Utility.Parallel}
 
   alias Benchee.Statistics.Mode
   alias Benchee.Statistics.Percentile
@@ -166,11 +166,7 @@ defmodule Benchee.Statistics do
   def statistics(suite = %Suite{scenarios: scenarios}) do
     config = suite.configuration
 
-    scenarios_with_statistics =
-      scenarios
-      |> calculate_per_scenario_statistics(config)
-      |> sort()
-      |> calculate_relative_statistics(config.inputs)
+    scenarios_with_statistics = calculate_per_scenario_statistics(scenarios, config)
 
     %Suite{suite | scenarios: scenarios_with_statistics}
   end
@@ -245,147 +241,4 @@ defmodule Benchee.Statistics do
         std_dev_ips: standard_dev_ips
     }
   end
-
-  defp calculate_relative_statistics([], _inputs), do: []
-
-  defp calculate_relative_statistics(scenarios, inputs) do
-    scenarios
-    |> scenarios_by_input(inputs)
-    |> Enum.flat_map(fn scenarios_with_same_input ->
-      {reference, others} = split_reference_scenario(scenarios_with_same_input)
-      others_with_relative = statistics_relative_to(others, reference)
-      [reference | others_with_relative]
-    end)
-  end
-
-  defp scenarios_by_input(scenarios, nil), do: [scenarios]
-
-  # we can't just group_by `input_name` because that'd lose the order of inputs which might
-  # be important
-  defp scenarios_by_input(scenarios, inputs) do
-    Enum.map(inputs, fn {input_name, _} ->
-      Enum.filter(scenarios, fn scenario -> scenario.input_name == input_name end)
-    end)
-  end
-
-  # right now we take the first scenario as we sorted them and it is the fastest,
-  # whenever we implement #179 though this becomesd more involved
-  defp split_reference_scenario(scenarios) do
-    [reference | others] = scenarios
-    {reference, others}
-  end
-
-  defp statistics_relative_to(scenarios, reference) do
-    Enum.map(scenarios, fn scenario ->
-      scenario
-      |> update_in([Access.key!(:run_time_data), Access.key!(:statistics)], fn statistics ->
-        add_relative_statistics(statistics, reference.run_time_data.statistics)
-      end)
-      |> update_in([Access.key!(:memory_usage_data), Access.key!(:statistics)], fn statistics ->
-        add_relative_statistics(statistics, reference.memory_usage_data.statistics)
-      end)
-    end)
-  end
-
-  # we might not run time/memory --> we shouldn't crash then ;)
-  defp add_relative_statistics(statistics = %{average: nil}, _reference), do: statistics
-  defp add_relative_statistics(statistics, %{average: nil}), do: statistics
-
-  defp add_relative_statistics(statistics, reference_statistics) do
-    %__MODULE__{
-      statistics
-      | relative_more: zero_safe_division(statistics.average, reference_statistics.average),
-        relative_less: zero_safe_division(reference_statistics.average, statistics.average),
-        absolute_difference: statistics.average - reference_statistics.average
-    }
-  end
-
-  defp zero_safe_division(0.0, 0.0), do: 1.0
-  defp zero_safe_division(_, 0), do: :infinity
-  defp zero_safe_division(_, 0.0), do: :infinity
-  defp zero_safe_division(a, b), do: a / b
-
-  @doc """
-  Calculate additional percentiles and add them to the
-  `run_time_data.statistics`. Should only be used after `statistics/1`, to
-  calculate extra values that may be needed for reporting.
-
-  ## Examples
-
-      iex> scenarios = [
-      ...>   %Benchee.Scenario{
-      ...>     job_name: "My Job",
-      ...>     run_time_data: %Benchee.CollectionData{
-      ...>       samples: [200, 400, 400, 400, 500, 500, 500, 700, 900]
-      ...>     },
-      ...>     memory_usage_data: %Benchee.CollectionData{
-      ...>       samples: [200, 400, 400, 400, 500, 500, 500, 700, 900]
-      ...>     },
-      ...>     input_name: "Input",
-      ...>     input: "Input"
-      ...>   }
-      ...> ]
-      iex> %Benchee.Suite{scenarios: scenarios}
-      ...> |> Benchee.Statistics.statistics
-      ...> |> Benchee.Statistics.add_percentiles([25, 75])
-      %Benchee.Suite{
-        scenarios: [
-          %Benchee.Scenario{
-            job_name: "My Job",
-            input_name: "Input",
-            input: "Input",
-            run_time_data: %Benchee.CollectionData{
-              samples: [200, 400, 400, 400, 500, 500, 500, 700, 900],
-              statistics: %Benchee.Statistics{
-                average:       500.0,
-                ips:           2_000_000.0,
-                std_dev:       200.0,
-                std_dev_ratio: 0.4,
-                std_dev_ips:   800_000.0,
-                median:        500.0,
-                percentiles:   %{25 => 400.0, 50 => 500.0, 75 => 600.0, 99 => 900.0},
-                mode:          [500, 400],
-                minimum:       200,
-                maximum:       900,
-                sample_size:   9
-              }
-            },
-            memory_usage_data: %Benchee.CollectionData{
-              samples: [200, 400, 400, 400, 500, 500, 500, 700, 900],
-              statistics: %Benchee.Statistics{
-                average:       500.0,
-                ips:           nil,
-                std_dev:       200.0,
-                std_dev_ratio: 0.4,
-                std_dev_ips:   nil,
-                median:        500.0,
-                percentiles:   %{50 => 500.0, 99 => 900.0},
-                mode:          [500, 400],
-                minimum:       200,
-                maximum:       900,
-                sample_size:   9
-              }
-            }
-          }
-        ]
-      }
-  """
-  def add_percentiles(suite = %Suite{scenarios: scenarios}, percentile_ranks) do
-    new_scenarios =
-      Parallel.map(scenarios, fn scenario ->
-        update_in(scenario.run_time_data.statistics.percentiles, fn existing ->
-          new = Percentile.percentiles(scenario.run_time_data.samples, percentile_ranks)
-          Map.merge(existing, new)
-        end)
-      end)
-
-    %Suite{suite | scenarios: new_scenarios}
-  end
-
-  @spec sort([Scenario.t()]) :: [Scenario.t()]
-  defp sort(scenarios) do
-    Enum.sort_by(scenarios, fn scenario ->
-      {scenario.run_time_data.statistics.average, scenario.memory_usage_data.statistics.average}
-    end)
-  end
 end
diff --git a/samples/run_verbose.exs b/samples/run_verbose.exs
@@ -9,8 +9,9 @@ map_fun = fn i -> [i, i * i] end
   "map.flatten",
   fn -> list |> Enum.map(map_fun) |> List.flatten() end
 )
-|> Benchee.measure()
+|> Benchee.collect()
 |> Benchee.statistics()
+|> Benchee.relative_statistics()
 |> Benchee.Formatter.output(Benchee.Formatters.Console)
 
 # Operating System: Linux