Permalink
Browse files

Merge pull request #1 from basho/adt-fix-eqc

Improve EQC tests and fix bug in histogram code
  • Loading branch information...
2 parents fbb957b + 4ea08bd commit 4ae47ba1033f153751a2efb790d7cb2acb05771f @Vagabond Vagabond committed Feb 7, 2012
Showing with 97 additions and 30 deletions.
  1. +57 −18 src/basho_stats_histogram.erl
  2. +11 −5 src/basho_stats_sample.erl
  3. +29 −7 src/basho_stats_utils.erl
@@ -28,6 +28,10 @@
observations/1,
summary_stats/1]).
+-ifdef(EQC).
+-export([prop_count/0, prop_quantile/0]).
+-endif.
+
-include("stats.hrl").
-record(hist, { n = 0,
@@ -135,10 +139,10 @@ which_bin(Value, Hist) ->
if
Value > Upper ->
erlang:min(Bin + 1, Hist#hist.capacity - 1);
-
Value =< Lower ->
erlang:max(Bin - 1, 0);
-
+ Value == Hist#hist.max ->
+ Hist#hist.capacity-1;
true ->
Bin
end.
@@ -180,49 +184,84 @@ simple_test() ->
-ifdef(EQC).
+
qc_count_check(Min, Max, Bins, Xs) ->
LCounts = counts(update_all(Xs, new(Min, Max, Bins))),
- RCounts = basho_stats_utils:r_run(Xs, ?FMT("hist(x, seq(~w,~w,length.out=~w), plot=FALSE)$counts",
+ RCounts = basho_stats_utils:r_run(Xs,
+ ?FMT("hist(x, seq(~w,~w,length.out=~w), plot=FALSE)$counts",
[Min, Max, Bins+1])),
- LCounts == RCounts.
+ case LCounts == RCounts of
+ true ->
+ true;
+ _ ->
+ io:format("LCounts ~p, RCounts ~p~n", [LCounts, RCounts]),
+ false
+ end.
+prop_count() ->
+ ?FORALL({Min, Bins, Xlen}, {choose(0, 99), choose(2, 20), choose(2, 100)},
+ ?LET(Max, choose(Min+1, 100),
+ ?LET(Xs, vector(Xlen, choose(Min, Max)),
+ ?WHENFAIL(
+ begin
+ io:format("Min ~p, Max ~p, Bins ~p, Xs ~w~n",
+ [Min, Max, Bins, Xs]),
+ Command = ?FMT("hist(x, seq(~w,~w,length.out=~w), plot=FALSE)$counts",
+ [Min, Max, Bins+1]),
+ InputStr = [integer_to_list(I) || I <- Xs],
+ io:format(?FMT("x <- c(~s)\n", [string:join(InputStr, ",")])),
+ io:format(?FMT("write(~s, ncolumns=1, file=stdout())\n", [Command]))
+ end,
+ qc_count_check(Min, Max, Bins, Xs))))).
qc_count_test() ->
- P = ?LET({Min, Bins, Xlen}, {choose(0, 99), choose(2, 20), choose(2, 100)},
- ?LET(Max, choose(Min+1, 100),
- ?FORALL(Xs, vector(Xlen, choose(Min, Max)),
- qc_count_check(Min, Max, Bins, Xs)))),
- true = eqc:quickcheck(P).
+ true = eqc:quickcheck(prop_count()).
qc_quantile_check(Q, Min, Max, Bins, Xs) ->
- Lq = quantile(Q * 0.01, update_all(Xs, new(Min, Max, Bins))),
- [Rq] = basho_stats_utils:r_run(Xs, ?FMT("quantile(x, ~4.2f)", [Q * 0.01])),
+ Hist = new(Min, Max, Bins),
+ LCounts = counts(update_all(Xs, Hist)),
+ Lq = quantile(Q * 0.01, update_all(Xs, Hist)),
+ [Rq] = basho_stats_utils:r_run(Xs, ?FMT("quantile(x, ~4.2f, type=4)", [Q * 0.01])),
case abs(Lq - Rq) < 1 of
true ->
true;
false ->
?debugMsg("----\n"),
?debugFmt("Q: ~p Min: ~p Max: ~p Bins: ~p\n", [Q, Min, Max, Bins]),
?debugFmt("Lq: ~p != Rq: ~p\n", [Lq, Rq]),
- ?debugFmt("Xs: ~p\n", [Xs]),
+ ?debugFmt("Xs: ~w\n", [Xs]),
false
end.
-qc_quantile_test() ->
+prop_quantile() ->
%% Loosey-goosey checking of the quantile estimation against R's more precise method.
%%
%% To ensure a minimal level of accuracy, we ensure that we have between 50-200 bins
%% and between 100-500 data points.
%%
%% TODO: Need to nail down the exact error bounds
- P = ?LET({Min, Bins, Xlen, Q}, {choose(1, 99), choose(50, 200), choose(100, 500),
+ %%
+ %% XXX since we try to generate the quantile from the histogram, not the
+ %% original data, our results and Rs don't always agree and this means the
+ %% test will occasionally fail. There's not an easy way to fix this.
+ ?FORALL({Min, Bins, Xlen, Q}, {choose(1, 99), choose(50, 200), choose(100, 500),
choose(0,100)},
?LET(Max, choose(Min+1, 100),
- ?FORALL(Xs, vector(Xlen, choose(Min, Max)),
- qc_quantile_check(Q, Min, Max, Bins, Xs)))),
- true = eqc:quickcheck(P).
+ ?LET(Xs, vector(Xlen, choose(Min, Max)),
+ ?WHENFAIL(
+ begin
+ io:format("Min ~p, Max ~p, Bins ~p, Q ~p, Xs ~w~n",
+ [Min, Max, Bins, Q, Xs]),
+ Command = ?FMT("quantile(x, ~4.2f, type=4)", [Q * 0.01]),
+ InputStr = [integer_to_list(I) || I <- Xs],
+ io:format(?FMT("x <- c(~s)\n", [string:join(InputStr, ",")])),
+ io:format(?FMT("write(~s, ncolumns=1, file=stdout())\n", [Command]))
+ end,
+ qc_quantile_check(Q, Min, Max, Bins, Xs))))).
+qc_quantile_test() ->
+ true = eqc:quickcheck(prop_quantile()).
--endif.
+-endif.
-endif.
View
@@ -30,6 +30,10 @@
-include("stats.hrl").
+-ifdef(EQC).
+-export([prop_main/0]).
+-endif.
+
-record(state, { n = 0,
min = 'NaN',
max = 'NaN',
@@ -130,12 +134,14 @@ lists_equal([V1 | R1], [V2 | R2]) ->
false
end.
+prop_main() ->
+ ?FORALL(Xlen, choose(2, 100),
+ ?LET(Xs, vector(Xlen, int()),
+ lists_equal(basho_stats_utils:r_run(Xs,"c(min(x), mean(x), max(x), var(x), sd(x))"),
+ tuple_to_list(summary(update_all(Xs, new())))))).
+
qc_test() ->
- Prop = ?LET(Xlen, choose(2, 100),
- ?FORALL(Xs, vector(Xlen, int()),
- lists_equal(basho_stats_utils:r_run(Xs,"c(min(x), mean(x), max(x), var(x), sd(x))"),
- tuple_to_list(summary(update_all(Xs, new())))))),
- true = eqc:quickcheck(Prop).
+ true = eqc:quickcheck(prop_main()).
-endif.
View
@@ -58,21 +58,43 @@ r_port() ->
end,
%% Check the status of the port
- port_command(Port, "write('', file=stdout())\n"),
- receive
- {Port, {data, {eol, []}}} ->
- {ok, Port};
- {Port, {data, {eol, Other}}} ->
+ try port_command(Port, "write('', file=stdout())\n") of
+ _ ->
+ receive
+ {Port, {data, {eol, []}}} ->
+ {ok, Port};
+ {Port, {data, {eol, Other}}} ->
+ erlang:erase(r_port),
+ {error, Other}
+ end
+ catch
+ error:badarg ->
erlang:erase(r_port),
- {error, Other}
+ {error, port_closed}
end.
r_simple_read_loop(Port, Acc) ->
receive
{Port, {data, {eol, []}}} ->
lists:reverse(Acc);
{Port, {data, {eol, Line}}} ->
- r_simple_read_loop(Port, [to_number(Line) | Acc]);
+ case Line of
+ "Error"++_ ->
+ Error = get_error(Port, [Line]),
+ exit({error, Error});
+ _ ->
+ r_simple_read_loop(Port, [to_number(Line) | Acc])
+ end;
+ {Port, {exit_status, _}} ->
+ lists:reverse(Acc)
+ end.
+
+get_error(Port, Acc) ->
+ receive
+ {Port, {data, {eol, []}}} ->
+ lists:reverse(Acc);
+ {Port, {data, {eol, Line}}} ->
+ get_error(Port, [Line | Acc]);
{Port, {exit_status, _}} ->
lists:reverse(Acc)
end.

0 comments on commit 4ae47ba

Please sign in to comment.