Skip to content

Commit

Permalink
Send minimum of data to async statistics calculation
Browse files Browse the repository at this point in the history
Done to avoid copying potentially hige inputs and functions to
the processes when all they need to do is crunch some silly small
numbers. HUGE performance gain while reducing memory usage a lot
for benchmarks with bigger data in use.

Also use Task.async_stream instead of our good old `Parallel.map`
as with the dawn of `inputs` I have seen people do 30+ scenarios
(10 benchmarks with 3 inputs will do that to you), where if we're
running on a 4 core system we might be doing too much in parallel
also potentially skyrocketing memory consumption.
  • Loading branch information
PragTob committed Dec 13, 2023
1 parent 63a5c5e commit beb8210
Showing 1 changed file with 56 additions and 27 deletions.
83 changes: 56 additions & 27 deletions lib/benchee/statistics.ex
Expand Up @@ -6,7 +6,7 @@ defmodule Benchee.Statistics do
See `statistics/1` for a breakdown of the included statistics.
"""

alias Benchee.{CollectionData, Conversion.Duration, Scenario, Suite, Utility.Parallel}
alias Benchee.{CollectionData, Conversion.Duration, Scenario, Suite}
alias Benchee.Output.ProgressPrinter

require Integer
Expand Down Expand Up @@ -168,39 +168,68 @@ defmodule Benchee.Statistics do
percentiles = suite.configuration.percentiles

update_in(suite.scenarios, fn scenarios ->
Parallel.map(scenarios, fn scenario ->
# send only the samples, percentiles and whatevs along
# be mindful that we might (soon (tm)) pass on the sample size from here as well
# theoretically we could only pass on collection_data
calculate_scenario_statistics(scenario, percentiles)
end)
scenario_statistics = compute_statistics_in_parallel(scenarios, percentiles)

update_scenarios_with_statistics(scenarios, scenario_statistics)
end)
end

defp compute_statistics_in_parallel(scenarios, percentiles) do
scenarios
|> Enum.map(fn scenario ->
# we filter down the data here to avoid sending the input and benchmarking function to
# the other processes
# we send over all of the collection data as in the future (tm) we might want to already
# provide the sample size, which this gives us a way to do that and not touch this code
# again
{scenario.run_time_data, scenario.memory_usage_data, scenario.reductions_data}
end)
# async_stream as we might run a ton of scenarios depending on the benchmark
|> Task.async_stream(
fn scenario_collection_data ->
calculate_scenario_statistics(scenario_collection_data, percentiles)
end,
timeout: :infinity,
ordered: true
)
|> Enum.map(fn {:ok, stats} -> stats end)
end

defp calculate_scenario_statistics(scenario, percentiles) do
defp update_scenarios_with_statistics(scenarios, scenario_statistics) do
# we can zip them as they retained order
Enum.zip_with(
scenarios,
scenario_statistics,
fn scenario, {run_time_stats, memory_stats, reductions_stats} ->
%Scenario{
scenario
| run_time_data: %CollectionData{
scenario.run_time_data
| statistics: run_time_stats
},
memory_usage_data: %CollectionData{
scenario.memory_usage_data
| statistics: memory_stats
},
reductions_data: %CollectionData{
scenario.reductions_data
| statistics: reductions_stats
}
}
end
)
end

defp calculate_scenario_statistics({run_time_data, memory_data, reductions_data}, percentiles) do
run_time_stats =
scenario.run_time_data.samples
run_time_data.samples
|> calculate_statistics(percentiles)
|> add_ips

memory_stats = calculate_statistics(scenario.memory_usage_data.samples, percentiles)
reductions_stats = calculate_statistics(scenario.reductions_data.samples, percentiles)

%Scenario{
scenario
| run_time_data: %CollectionData{
scenario.run_time_data
| statistics: run_time_stats
},
memory_usage_data: %CollectionData{
scenario.memory_usage_data
| statistics: memory_stats
},
reductions_data: %CollectionData{
scenario.reductions_data
| statistics: reductions_stats
}
}
memory_stats = calculate_statistics(memory_data.samples, percentiles)
reductions_stats = calculate_statistics(reductions_data.samples, percentiles)

{run_time_stats, memory_stats, reductions_stats}
end

defp calculate_statistics([], _) do
Expand Down

0 comments on commit beb8210

Please sign in to comment.