forked from rigtorp/ipc-bench
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Rework thigns a bit so that the tests themselves dump raw timestamps
and the the various summary stats are computed after the fact by a separate program.
- Loading branch information
Steven Smith
committed
Nov 4, 2011
1 parent
f14f79d
commit 22c5980
Showing
4 changed files
with
298 additions
and
252 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,290 @@ | ||
#include <assert.h> | ||
#include <err.h> | ||
#include <math.h> | ||
#include <stdbool.h> | ||
#include <stdio.h> | ||
#include <stdlib.h> | ||
|
||
struct summary_stats { | ||
const double *data; | ||
int nr_items; | ||
|
||
double mean; | ||
double sample_sd; | ||
double sample_skew; | ||
double sample_kurtosis; | ||
}; | ||
|
||
static void | ||
read_input(double **_res, int *nr_samples) | ||
{ | ||
int n = 0; | ||
int n_alloced = 0; | ||
double *res = NULL; | ||
double i; | ||
int r; | ||
|
||
while (!feof(stdin)) { | ||
if (n_alloced == n) { | ||
n_alloced += 1024; | ||
res = realloc(res, sizeof(res[0]) * n_alloced); | ||
if (!res) | ||
err(1, "realloc"); | ||
} | ||
r = scanf("%le\n", &i); | ||
if (r < 0) | ||
err(1, "scanf"); | ||
if (r == 0 && feof(stdin)) | ||
break; | ||
if (r != 1) | ||
errx(1, "scanf returned unexpected value %d", r); | ||
res[n] = i; | ||
n++; | ||
} | ||
*_res = res; | ||
*nr_samples = n; | ||
} | ||
|
||
static int | ||
compare_double(const void *_a, const void *_b) | ||
{ | ||
const double *a = _a; | ||
const double *b = _b; | ||
if (*a < *b) | ||
return -1; | ||
else if (*a == *b) | ||
return 0; | ||
else | ||
return 1; | ||
} | ||
|
||
static void | ||
calc_summary_stats(const double *data, int nr_items, struct summary_stats *out) | ||
{ | ||
/* On-line calculation of mean, variance, skew and kurtosis | ||
lifted straight from wikipedia. */ | ||
double mean = 0; | ||
double m2 = 0; | ||
double m3 = 0; | ||
double m4 = 0; | ||
double delta; | ||
double delta_n; | ||
double variance; | ||
double sd; | ||
double skew; | ||
double kurtosis; | ||
double n; | ||
int i; | ||
|
||
for (i = 0; i < nr_items; i++) { | ||
n = i + 1; | ||
delta = data[i] - mean; | ||
delta_n = delta / n; | ||
mean = (mean * i) / n + data[i]/n; | ||
m4 = m4 + delta_n * delta_n * delta_n * delta * (n - 1) * (n * n - 3 * n + 3) + 6 * delta_n * delta_n * m2 - 4 * delta_n * m3; | ||
m3 = m3 + delta_n * delta_n * delta * (n - 1) * (n - 2) - 3 * delta_n * m2; | ||
m2 = m2 + delta_n * delta * (n - 1); | ||
} | ||
|
||
variance = m2 / nr_items; | ||
sd = sqrt(variance); | ||
skew = m3/(nr_items * sd * sd * sd); | ||
kurtosis = nr_items * m4 / (m2*m2) - 3; | ||
|
||
out->mean = mean; | ||
out->sample_sd = sd; | ||
out->sample_skew = skew; | ||
out->sample_kurtosis = kurtosis; | ||
|
||
out->data = data; | ||
out->nr_items = nr_items; | ||
} | ||
|
||
static double | ||
point_to_percentile(const struct summary_stats *ss, double point) | ||
{ | ||
double y1, y2, num, denum; | ||
int low, high; | ||
int probe; | ||
|
||
if (point < ss->data[0]) | ||
return 0; | ||
else if (point > ss->data[ss->nr_items-1]) | ||
return 100; | ||
low = 0; | ||
high = ss->nr_items; | ||
while (low + 1 < high) { | ||
/* Invariant: everything in slots before @low is less than @point, | ||
everything in slots at or after @high is greater than | ||
@point. */ | ||
probe = (high + low) / 2; | ||
assert(probe != low); | ||
if (point > ss->data[probe]) { | ||
low = probe + 1; | ||
} else if (point < ss->data[probe]) { | ||
high = probe; | ||
} else { | ||
/* The probe is now in the range of data which is equal to | ||
point. */ | ||
goto probe_is_point; | ||
} | ||
} | ||
if (high == low + 1) { | ||
if (point < ss->data[low]) { | ||
assert(low != 0); | ||
assert(point > ss->data[low-1]); | ||
low--; | ||
high--; | ||
} | ||
if (ss->data[low] == point) { | ||
probe = low; | ||
goto probe_is_point; | ||
} else if (ss->data[high] == point) { | ||
probe = high; | ||
goto probe_is_point; | ||
} else { | ||
goto linear_interpolate; | ||
} | ||
} else { | ||
assert(high == low); | ||
if (low == 0) { | ||
return 0; | ||
} else { | ||
low = high - 1; | ||
goto linear_interpolate; | ||
} | ||
} | ||
|
||
probe_is_point: | ||
low = probe; | ||
while (low >= 0 && ss->data[low] == point) | ||
low--; | ||
high = probe; | ||
while (high < ss->nr_items && ss->data[high] == point) | ||
high++; | ||
return (high + low) * 50.0 / ss->nr_items; | ||
|
||
linear_interpolate: | ||
y1 = ss->data[low]; | ||
y2 = ss->data[high]; | ||
num = (point + y2 * low - high * y1) * 100.0 / ss->nr_items; | ||
denum = y2 - y1; | ||
if (fabs(denum / num) < 0.01) { | ||
/* The two points we're trying to interpolate between are so close | ||
together that we risk numerical error, so we can't use the | ||
normal formula. Fortunately, if they're that close together | ||
then it doesn't really matter, and we can use a simple | ||
average. */ | ||
return (low + high) * 50.0 / ss->nr_items; | ||
} else { | ||
return num / denum; | ||
} | ||
} | ||
|
||
static void | ||
print_summary_stats(const struct summary_stats *ss) | ||
{ | ||
double sd_percentiles[7]; | ||
int i; | ||
|
||
printf("\tMean %e, sample sd %e, sample skew %e, sample kurtosis %e\n", | ||
ss->mean, ss->sample_sd, ss->sample_skew, ss->sample_kurtosis); | ||
printf("\tQuintiles: %e, %e, %e, %e, %e, %e\n", | ||
ss->data[0], | ||
ss->data[ss->nr_items / 5], | ||
ss->data[ss->nr_items * 2 / 5], | ||
ss->data[ss->nr_items * 3 / 5], | ||
ss->data[ss->nr_items * 4 / 5], | ||
ss->data[ss->nr_items - 1]); | ||
printf("\t5%% %e, median %e, 95%% %e\n", | ||
ss->data[ss->nr_items / 20], | ||
ss->data[ss->nr_items / 2], | ||
ss->data[ss->nr_items * 19 / 20]); | ||
|
||
/* Also look at how deltas from the mean, in multiples of the SD, | ||
map onto percentiles, to get more hints about non-normality. */ | ||
for (i = 0; i < 7; i++) { | ||
double point = ss->mean + ss->sample_sd * (i - 3); | ||
sd_percentiles[i] = point_to_percentile(ss, point); | ||
} | ||
printf("\tSD percentiles: -3 -> %f%%, -2 -> %f%%, -1 -> %f%%, 0 -> %f%%, 1 -> %f%%, 2 -> %f%%, 3 -> %f%%\n", | ||
sd_percentiles[0], | ||
sd_percentiles[1], | ||
sd_percentiles[2], | ||
sd_percentiles[3], | ||
sd_percentiles[4], | ||
sd_percentiles[5], | ||
sd_percentiles[6]); | ||
} | ||
|
||
static void | ||
summarise_samples(double *data, int nr_samples) | ||
{ | ||
struct summary_stats whole_dist_stats; | ||
struct summary_stats low_outliers; | ||
struct summary_stats high_outliers; | ||
struct summary_stats excl_outliers; | ||
int i; | ||
int low_thresh, high_thresh; | ||
int discard; | ||
|
||
/* Discard the first few samples, so as to avoid startup | ||
transients. */ | ||
discard = nr_samples / 20; | ||
data += discard; | ||
nr_samples -= discard; | ||
|
||
if (nr_samples >= 30) { | ||
printf("By tenths of total run:\n"); | ||
for (i = 0; i < 10; i++) { | ||
struct summary_stats stats; | ||
int start = (nr_samples * i) / 10; | ||
int end = (nr_samples * (i+1)) / 10; | ||
qsort(data + start, end - start, sizeof(data[0]), compare_double); | ||
calc_summary_stats(data + start, end - start, &stats); | ||
printf("Slice %d/10:\n", i); | ||
print_summary_stats(&stats); | ||
} | ||
} | ||
|
||
qsort(data, nr_samples, sizeof(data[0]), compare_double); | ||
|
||
calc_summary_stats(data, nr_samples, &whole_dist_stats); | ||
|
||
printf("Distribution of all values:\n"); | ||
print_summary_stats(&whole_dist_stats); | ||
|
||
#define OUTLIER 10 | ||
low_thresh = nr_samples / OUTLIER; | ||
high_thresh = nr_samples - nr_samples / OUTLIER; | ||
#undef OUTLIER | ||
if (low_thresh >= high_thresh || | ||
low_thresh == 0 || | ||
high_thresh == nr_samples) | ||
return; | ||
calc_summary_stats(data, low_thresh, &low_outliers); | ||
calc_summary_stats(data + low_thresh, high_thresh - low_thresh, &excl_outliers); | ||
calc_summary_stats(data + high_thresh, nr_samples - high_thresh, &high_outliers); | ||
|
||
printf("Low outliers:\n"); | ||
print_summary_stats(&low_outliers); | ||
|
||
printf("Bulk distribution:\n"); | ||
print_summary_stats(&excl_outliers); | ||
|
||
printf("High outliers:\n"); | ||
print_summary_stats(&high_outliers); | ||
} | ||
|
||
int | ||
main() | ||
{ | ||
double *times; | ||
int nr_samples; | ||
|
||
read_input(×, &nr_samples); | ||
|
||
summarise_samples(times, nr_samples); | ||
|
||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.