Skip to content
Permalink
Browse files
merge bear and bear_scutil
  • Loading branch information
joewilliams committed Mar 30, 2012
1 parent eb22734 commit 7c9fc5f16315b6e23543817c10ba641f85c10e64
Showing 2 changed files with 66 additions and 76 deletions.
@@ -240,7 +240,7 @@ get_kendall_correlation(_, Values) when length(Values) < ?STATS_MIN ->
get_kendall_correlation(Values1, Values2) when length(Values1) /= length(Values2) ->
0.0;
get_kendall_correlation(Values1, Values2) ->
bear_scutil:kendall_correlation(Values1, Values2).
bear:kendall_correlation(Values1, Values2).

get_spearman_correlation(Values, _) when length(Values) < ?STATS_MIN ->
0.0;
@@ -370,3 +370,68 @@ get_bin_width(StdDev, Count) ->
get_bin_count(Min, Max, Width) ->
%io:format("min: ~p, max: ~p, width ~p~n", [Min, Max, Width]),
round((Max - Min) / Width) + 1.

%% taken from http://crunchyd.com/scutil/
%% All code here is MIT Licensed
%% http://scutil.com/license.html

% seems to match the value returned by the 'cor' (method="kendal") R function
% http://en.wikipedia.org/wiki/Kendall_tau_rank_correlation_coefficient
kendall_correlation(List1, List2) when is_list(List1), is_list(List2) ->
{RA,_} = lists:unzip(tied_ordered_ranking(List1)),
{RB,_} = lists:unzip(tied_ordered_ranking(List2)),

Ordering = lists:keysort(1, lists:zip(RA,RB)),
{_,OrdB} = lists:unzip(Ordering),

N = length(List1),
P = lists:sum(kendall_right_of(OrdB, [])),

-(( (4*P) / (N * (N - 1))) - 1).

simple_ranking(List) when is_list(List) ->
lists:zip(lists:seq(1,length(List)),lists:reverse(lists:sort(List))).

tied_ranking(List) ->
tied_rank_worker(simple_ranking(List), [], no_prev_value).

tied_ordered_ranking(List) when is_list(List) ->
tied_ordered_ranking(List, tied_ranking(List), []).

tied_ordered_ranking([], [], Work) ->
lists:reverse(Work);

tied_ordered_ranking([Front|Rem], Ranks, Work) ->
{value,Item} = lists:keysearch(Front,2,Ranks),
{IRank,Front} = Item,
tied_ordered_ranking(Rem, Ranks--[Item], [{IRank,Front}]++Work).

kendall_right_of([], Work) ->
lists:reverse(Work);
kendall_right_of([F|R], Work) ->
kendall_right_of(R, [kendall_right_of_item(F,R)]++Work).

kendall_right_of_item(B, Rem) ->
length([R || R <- Rem, R < B]).

tied_add_prev(Work, {FoundAt, NewValue}) ->
lists:duplicate( length(FoundAt), {lists:sum(FoundAt)/length(FoundAt), NewValue} ) ++ Work.

tied_rank_worker([], Work, PrevValue) ->
lists:reverse(tied_add_prev(Work, PrevValue));

tied_rank_worker([Item|Remainder], Work, PrevValue) ->
case PrevValue of
no_prev_value ->
{BaseRank,BaseVal} = Item,
tied_rank_worker(Remainder, Work, {[BaseRank],BaseVal});
{FoundAt,OldVal} ->
case Item of
{Id,OldVal} ->
tied_rank_worker(Remainder, Work, {[Id]++FoundAt,OldVal});
{Id,NewVal} ->
tied_rank_worker(Remainder, tied_add_prev(Work, PrevValue), {[Id],NewVal})

end
end.

This file was deleted.

0 comments on commit 7c9fc5f

Please sign in to comment.