Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Initial commit

  • Loading branch information...
commit a26fc1f2506a4f7bc5a82830b771854211a0682a 0 parents
@dizzyd dizzyd authored
1  .hgignore
@@ -0,0 +1 @@
+.beam
11 ebin/stats.app
@@ -0,0 +1,11 @@
+{application, stats,
+ [{description, "Erlang Statistics Library"},
+ {vsn, "1"},
+ {modules, [ stats_sample,
+ stats_histogram ]},
+ {registered, []},
+ {applications, [kernel,
+ stdlib,
+ sasl]},
+ {env, []}
+]}.
124 src/stats_histogram.erl
@@ -0,0 +1,124 @@
+%% -------------------------------------------------------------------
+%%
+%% stats: Statistics Suite for Erlang
+%%
+%% Copyright (c) 2009 Dave Smith (dizzyd@dizzyd.com)
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+-module(stats_histogram).
+
+-export([new/3,
+ update/2,
+ quantile/2]).
+
+-record(hist, { n = 0,
+ min,
+ max,
+ bin_scale,
+ bins,
+ capacity }).
+
+%% ===================================================================
+%% Public API
+%% ===================================================================
+
+new(MinVal, MaxVal, NumBins) ->
+ #hist { min = MinVal,
+ max = MaxVal,
+ bin_scale = NumBins / (MaxVal - MinVal),
+ bins = gb_trees:empty(),
+ capacity = NumBins }.
+
+%%
+%% Update the histogram with a new observation.
+%%
+%% NOTE: update/2 caps values within #hist.min and #hist.max;
+%% if you provide a value outside those boundaries the first or last
+%% bin, respectively, get updated and the histogram is consequently
+%% skewed.
+%%
+update(Value, Hist) ->
+ Bin = which_bin(Value, Hist),
+ case gb_trees:lookup(Bin, Hist#hist.bins) of
+ {value, Counter} ->
+ ok;
+ none ->
+ Counter = 1
+ end,
+ Hist#hist { n = Hist#hist.n + 1,
+ bins = gb_trees:enter(Bin, Counter + 1, Hist#hist.bins) }.
+
+
+%%
+%% Estimate the quantile from the histogram. Quantile should be a value
+%% between 0 and 1. Returns 'NaN' if the histogram is currently empty.
+%%
+quantile(_Quantile, #hist { n = 0 }) ->
+ 'NaN';
+quantile(Quantile, Hist)
+ when Quantile > 0; Quantile < 1 ->
+ %% Sort out how many samples we need to satisfy the requested quantile
+ MaxSamples = trunc(Quantile * Hist#hist.n),
+
+ %% Now iterate over the bins, until we have gathered enough samples
+ %% to satisfy the request. The resulting bin is an estimate.
+ case quantile_itr(gb_trees:iterator(Hist#hist.bins), 0, MaxSamples) of
+ max ->
+ Hist#hist.max;
+ EstBin ->
+ %% We have an estimated bin -- determine the lower bound of said
+ %% bin
+ (EstBin / Hist#hist.bin_scale) - Hist#hist.min
+ end.
+
+
+%% ===================================================================
+%% Internal functions
+%% ===================================================================
+
+which_bin(Value, Hist) ->
+ Bin = trunc(Value * Hist#hist.bin_scale),
+ if
+ Bin < 0 ->
+ 0;
+ Bin > Hist#hist.max ->
+ Hist#hist.capacity - 1;
+ true ->
+ Bin
+ end.
+
+
+quantile_itr(none, _Samples, _MaxSamples) ->
+ max;
+quantile_itr({Bin, Counter, Itr2}, Samples, MaxSamples) ->
+ Samples2 = Samples + Counter,
+ if
+ Samples2 < MaxSamples ->
+ %% Not done yet, move to next bin
+ quantile_itr(gb_trees:next(Itr2), Samples2, MaxSamples);
+ Samples2 == MaxSamples ->
+ %% This bin (in its entirety) satisfies our criteria
+ Bin + 1;
+ true ->
+ %% We only need some of the samples in this bin; we make
+ %% the assumption that values within the bin are uniformly
+ %% distributed.
+ Bin + ((MaxSamples - Samples) / Counter)
+ end.
+
+
+
81 src/stats_sample.erl
@@ -0,0 +1,81 @@
+%% -------------------------------------------------------------------
+%%
+%% stats: Statistics Suite for Erlang
+%%
+%% Copyright (c) 2009 Dave Smith (dizzyd@dizzyd.com)
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+-module(stats_sample).
+
+-export([new/0,
+ update/2,
+ count/1,
+ min/1, mean/1, max/1,
+ variance/1, sdev/1]).
+
+-record(state, { n = 0,
+ min,
+ max,
+ sum = 0,
+ sum2 = 0 }).
+
+
+%% ===================================================================
+%% Public API
+%% ===================================================================
+
+new() ->
+ #state{}.
+
+update(Value, State) ->
+ State#state {
+ n = State#state.n + 1,
+ min = erlang:min(Value, State#state.min),
+ max = erlang:max(Value, State#state.max),
+ sum = State#state.sum + Value,
+ sum2= State#state.sum + (Value * Value)}.
+
+
+count(State) ->
+ State#state.n.
+
+min(State) ->
+ State#state.min.
+
+mean(#state{n = 0}) ->
+ 'NaN';
+mean(State) ->
+ State#state.sum / State#state.n.
+
+max(State) ->
+ State#state.max.
+
+variance(#state { n = N}) when N < 2 ->
+ 'NaN';
+variance(State) ->
+ SumSq = State#state.sum * State#state.sum,
+ (State#state.sum2 - (SumSq / State#state.n)) / (State#state.n - 1).
+
+
+sdev(State) ->
+ case variance(State) of
+ 'NaN' ->
+ 'NaN';
+ Value ->
+ math:sqrt(Value)
+ end.
+
Please sign in to comment.
Something went wrong with that request. Please try again.