/
binning_rules.jl
132 lines (92 loc) · 3.36 KB
/
binning_rules.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
"""
abstract type BinningAlgorithm
Abstract type for binning algorithms.
"""
abstract type BinningAlgorithm end
export BinningAlgorithm
# ToDo: Create a public API function for this:
function _get_binedges(marginal_points::NTuple{N,AbstractVector}, algorithm::BinningAlgorithm, ::BATContext) where N
vs = marginal_points
n_samples = length(LinearIndices(first(vs)))
# The nd estimator is the key to most automatic binning methods, and is modified for twodimensional histograms to include correlation
nd = n_samples^(1/(2+N))
nd = N == 2 ? min(n_samples^(1/(2+N)), nd / (1-cor(first(vs), last(vs))^2)^(3//8)) : nd # the >2-dimensional case does not have a nice solution to correlations
edges = map(vs) do v
nbins::Int = _get_bining_impl(algorithm, n_samples, nd, v)
range(minimum(v), maximum(v), length = nbins + 1)
end
return edges
end
const _max_auto_n_bins = 10_000
_autobinning_cl(x) = min(ceil(Int, max(x, one(x))), _max_auto_n_bins)
"""
FixedNBins(nbins::Int)
Selects a fixed number of bins.
Constructor: `$(FUNCTIONNAME)(; fields...)`
Fields:
$(TYPEDFIELDS)
"""
@with_kw struct FixedNBins <: BinningAlgorithm
nbins::Int = 200
end
export FixedNBins
function _binning_rule_impl(algorithm::FixedNBins, n_samples, nd, v)
algorithm.nbins
end
"""
struct SquareRootBinning <: BinningAlgorithm
Selects automatic binning based on the
[Square-root choice](https://en.wikipedia.org/wiki/Histogram#Square-root_choice).
Constructor: `SquareRootBinning()`
"""
struct SquareRootBinning <: BinningAlgorithm end
export SquareRootBinning
function _get_bining_impl(::SquareRootBinning, n_samples, nd, v)
_autobinning_cl(sqrt(n_samples))
end
"""
struct SturgesBinning <: BinningAlgorithm
Selects automatic binning based on
[Sturges' formula](https://en.wikipedia.org/wiki/Histogram#Sturges'_formula).
Constructor: `SturgesBinning()`
"""
struct SturgesBinning <: BinningAlgorithm end
export SturgesBinning
function _get_bining_impl(::SturgesBinning, n_samples, nd, v)
_autobinning_cl(log2(n_samples) + 1)
end
"""
struct RiceBinning <: BinningAlgorithm
Selects automatic binning based on the
[Rice rule](https://en.wikipedia.org/wiki/Histogram#Rice_Rule).
Constructor: `RiceBinning()`
"""
struct RiceBinning <: BinningAlgorithm end
export RiceBinning
function _get_bining_impl(::RiceBinning, n_samples, nd, v)
_autobinning_cl(2 * nd)
end
"""
struct ScottBinning <: BinningAlgorithm
Selects automatic binning based on
[Scott's normal reference rule](https://en.wikipedia.org/wiki/Histogram#Scott's_normal_reference_rule).
Constructor: `ScottBinning()`
"""
struct ScottBinning <: BinningAlgorithm end
export ScottBinning
function _get_bining_impl(::ScottBinning, n_samples, nd, v)
_autobinning_cl((maximum(v) - minimum(v)) / (3.5 * std(v) / nd))
end
"""
struct FreedmanDiaconisBinning <: BinningAlgorithm
Selects automatic binning based on the
[Freedman–Diaconis](https://en.wikipedia.org/wiki/Freedman%E2%80%93Diaconis_rule) rule.
Constructor: `FreedmanDiaconisBinning()`
"""
struct FreedmanDiaconisBinning <: BinningAlgorithm end
export FreedmanDiaconisBinning
# Freedman–Diaconis rule
function _get_bining_impl(::FreedmanDiaconisBinning, n_samples, nd, v)
_iqr(v) = (q = quantile(v, 0.75) - quantile(v, 0.25); q > 0 ? q : oftype(q, 1))
_autobinning_cl((maximum(v) - minimum(v)) / (2 * _iqr(v) / nd))
end