Skip to content

Commit

Permalink
Merge pull request snabbco#59 from lukego/nix-benchmarks
Browse files Browse the repository at this point in the history
Import revamped nix benchmark framework
  • Loading branch information
lukego committed Jul 9, 2017
2 parents 67864a9 + 66fab90 commit f8bd4d8
Show file tree
Hide file tree
Showing 7 changed files with 219 additions and 9 deletions.
34 changes: 34 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,40 @@ $ make

... but make sure you have at least `make`, `clang`, and `luajit` in your `$PATH`.

### Run the benchmarks

Nix can also run the full benchmark suite and generate visualizations
with R/ggplot2.

The simplest incantation tests one branch:

```shell
$ nix-build testsuite/bench --arg Asrc ./. # note: ./. means ./
```

You can also test several branches (A-E), give them names, specify
command-line arguments, say how many tests to run, and allow parallel
execution:

```shell
# Run the benchmarks and create result visualizations result/
$ nix-build testsuite/bench \
--arg Asrc ~/git/raptorjit \
--argstr Aname master \
--arg Bsrc ~/git/raptorjit-hack \
--argstr Bname hacked \
--arg Csrc ~/git/raptorjit-hack2 \
--argstr Cname hacked-O1 \
--argstr Cargs -O1 \
--arg runs 100 \
-j 5 # Run up to 5 tests in parallel
```

If you are using a distributed nix environment such
as [Hydra](https://nixos.org/hydra/) then the tests can be
automatically parallelized and distributed across a suitable build
farm.

### Quotes

Here are some borrowed words to put this branch into context:
Expand Down
13 changes: 6 additions & 7 deletions testsuite/bench/PARAM_x86_CI.txt
Original file line number Diff line number Diff line change
@@ -1,22 +1,21 @@
array3d 300
array3d 500
binary-trees 16
chameneos 1e7
coroutine-ring 2e7
coroutine-ring 5e7
euler14-bit 2e7
fannkuch 11
fasta 5e6
life
mandelbrot 5000
mandelbrot-bit 5000
md5 20000
nbody 5e6
md5 30000
nbody 8e6
nsieve 12
nsieve-bit 12
nsieve-bit 13
nsieve-bit-fp 12
partialsums 1e7
partialsums 3e7
pidigits-nogmp 5000
ray 9
recursive-ack 10
recursive-fib 40
scimark-fft 50000
scimark-lu 5000
Expand Down
49 changes: 49 additions & 0 deletions testsuite/bench/bench.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# R subroutines for reading and visualizing benchmark results.

suppressPackageStartupMessages({
library(dplyr)
library(ggplot2)
})

## R library routines for analyzing benchmark results
bench.read <- function(filename) {
data <- read.csv(filename)
## baseline is the mean performance of the "A" version
baseline <- data %>%
filter(letter=="A") %>%
group_by(benchmark) %>%
summarize(baseline = mean(cycles))
## Add 'relative' performance column: compared to mean from baseline branch
relative <- data %>%
left_join(baseline, by="benchmark") %>%
group_by(benchmark, version) %>%
mutate(relative = first(baseline) / cycles)
return(relative)
}

## Jitter plot faceted by benchmark
bench.jitterplot <- function(data) {
ggplot(aes(y=relative, x=version, color=version), data=data) +
geom_jitter(shape=1, alpha=0.5) +
scale_y_continuous(breaks=seq(0, 3, 0.1), labels=scales::percent) +
theme(aspect.ratio = 1) +
theme(axis.text.x = element_text(angle=90)) +
ylab("Performance relative to baseline average") +
ggtitle("Comparative performance between RaptorJIT versions") +
facet_wrap(~ benchmark, scales="free_x")
}

## ECDF plot faceted by benchmark
bench.ecdfplot <- function(data) {
ggplot(aes(x=relative, color=version), data=data) +
stat_ecdf() +
scale_x_continuous(labels=scales::percent) +
scale_y_log10(labels=scales::percent) +
theme(aspect.ratio = 1) +
theme(axis.text.x = element_text(angle=90)) +
ylab("Performance relative to baseline average") +
xlab("Percentage of results at or above this performance level") +
ggtitle("Comparative performance between RaptorJIT variants") +
facet_wrap(~ benchmark)
}

103 changes: 103 additions & 0 deletions testsuite/bench/default.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# Run a large parallel benchmark campaign and generate R/ggplot2 reports.

{ pkgs ? (import ../../pkgs.nix) {},
Asrc, Aname ? "A", Aargs ? "",
Bsrc ? null, Bname ? "B", Bargs ? "",
Csrc ? null, Cname ? "C", Cargs ? "",
Dsrc ? null, Dname ? "D", Dargs ? "",
Esrc ? null, Ename ? "E", Eargs ? "",
hardware ? null,
runs ? 30 }:

with pkgs;
with stdenv;

# Derivation to run benchmarks and produce a CSV result.
let benchmark = letter: name: src: args: run:
let raptorjit = (import src {inherit pkgs; version = name;}).raptorjit; in
mkDerivation {
name = "benchmark-${name}-${toString run}";
src = pkgs.lib.cleanSource ./.;
# Force consistent hardware
requiredSystemFeatures = if hardware != null then [hardware] else [];
buildInputs = [ raptorjit linuxPackages.perf utillinux ];
buildPhase = ''
# Run multiple iterations of the benchmarks
echo "Run $run"
mkdir -p result/$run
# Run each individual benchmark
cat PARAM_x86_CI.txt |
(while read benchmark params; do
echo "running $benchmark"
# Execute with performance monitoring & time supervision
# Note: discard stdout due to overwhelming output
timeout -sKILL 60 \
perf stat -x, -o result/$run/$benchmark.perf \
raptorjit ${args} -e "math.randomseed(${toString run})" $benchmark.lua $params \
> /dev/null || \
rm result/$run/$benchmark.perf
done)
'';
installPhase = ''
# Copy the raw perf output for reference
cp -r result $out
# Log the exact CPU
lscpu > $out/cpu.txt
# Create a CSV file
# Create the rows based on the perf logs
for result in result/*.perf; do
version=${name}
benchmark=$(basename -s.perf -a $result)
instructions=$(awk -F, -e '$3 == "instructions" { print $1; }' $result)
cycles=$( awk -F, -e '$3 == "cycles" { print $1; }' $result)
echo ${letter},$version,$benchmark,${toString run},$instructions,$cycles >> $out/bench.csv
done
'';
};

# Run a set of benchmarks and aggregate the results into a CSV file.
# Each benchmark run is a separate derivation. This allows nix to
# parallelize and distribute the benchmarking.
benchmarkSet = letter: name: src: args:
let benchmarks = map (benchmark letter name src args) (pkgs.lib.range 1 runs);
in
runCommand "benchmarks-${name}" { buildInputs = benchmarks; } ''
source $stdenv/setup
mkdir -p $out
for dir in ${pkgs.lib.fold (acc: x: "${acc} ${x}") "" benchmarks}; do
cat $dir/bench.csv >> $out/bench.csv
done
'';

benchA = (benchmarkSet "A" Aname Asrc Aargs);
benchB = if Bsrc != null then (benchmarkSet "B" Bname Bsrc Bargs) else "";
benchC = if Csrc != null then (benchmarkSet "C" Cname Csrc Cargs) else "";
benchD = if Dsrc != null then (benchmarkSet "D" Dname Dsrc Dargs) else "";
benchE = if Esrc != null then (benchmarkSet "E" Ename Esrc Eargs) else "";
in

rec {
benchmarkResults = mkDerivation {
name = "benchmark-results";
buildInputs = with pkgs.rPackages; [ pkgs.R ggplot2 dplyr ];
builder = pkgs.writeText "builder.csv" ''
source $stdenv/setup
# Get the CSV file
mkdir -p $out/nix-support
echo "letter,version,benchmark,run,instructions,cycles" > bench.csv
cat ${benchA}/bench.csv >> bench.csv
[ -n "${benchB}" ] && cat ${benchB}/bench.csv >> bench.csv
[ -n "${benchC}" ] && cat ${benchC}/bench.csv >> bench.csv
[ -n "${benchD}" ] && cat ${benchD}/bench.csv >> bench.csv
[ -n "${benchE}" ] && cat ${benchE}/bench.csv >> bench.csv
cp bench.csv $out
echo "file CSV $out/bench.csv" >> $out/nix-support/hydra-build-products
# Generate the report
(cd ${./.}; Rscript ./generate.R $out/bench.csv $out)
for png in $out/*.png; do
echo "file PNG $png" >> $out/nix-support/hydra-build-products
done
'';
};
}

25 changes: 25 additions & 0 deletions testsuite/bench/generate.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/usr/bin/env nix-shell
#!nix-shell -i Rscript -p R rpkgs.dplyr rpkgs.ggplot2

# R command-line program for making visualizations from benchmark results.

suppressWarnings(source("bench.R"))

args <- commandArgs(trailingOnly=T)
if (length(args) != 2) {
message("Usage: generate.R <csv> <outdir>"); quit(status=1)
}

filename <- args[[1]]
outdir <- args[[2]]

data <- bench.read(filename)
if (!dir.exists(outdir)) { dir.create(outdir, recursive=T) }

ggsave(filename = file.path(outdir,"bench-jitter.png"),
plot = bench.jitterplot(data),
width=12, height=12)

ggsave(filename = file.path(outdir,"bench-ecdf.png"),
plot = bench.ecdfplot(data),
width=12, height=12)
2 changes: 1 addition & 1 deletion testsuite/bench/life.lua
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ function LIFE(w,h)
thisgen:draw()
write("Life - generation ",gen,"\n")
gen=gen+1
if gen>2000 then break end
if gen>10000 then break end
--delay() -- no delay
end
end
Expand Down
2 changes: 1 addition & 1 deletion testsuite/bench/roulette.lua
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
-- (Let the test harness determine the random seed)
-- math.randomseed(os.time())

local population = 100e6
local population = 200e6
local live = 0
local die = 0

Expand Down

0 comments on commit f8bd4d8

Please sign in to comment.