/
densities.py
211 lines (181 loc) · 7.24 KB
/
densities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
import numpy as np
from scipy.interpolate import splrep, splev, RectBivariateSpline
class DensitiesError(Exception):
pass
defaultContours = [0.68, 0.95]
def getContourLevels(inbins, contours=defaultContours, missing_norm=0, half_edge=True):
"""
Get contour levels enclosing "contours" of the probability.
inbins is the density. If half_edge, then edge bins are only half integrated over in each direction.
missing_norm accounts of any points not included in inbins (e.g. points in far tails that are not in inbins)
Works for any dimension bins array
"""
contour_levels = np.zeros(len(contours))
if half_edge:
abins = inbins.copy()
lastindices = [-1] + [slice(None, None, None) for _ in abins.shape[1:]]
firstindices = [0] + [slice(None, None, None) for _ in abins.shape[1:]]
for _ in abins.shape:
abins[tuple(lastindices)] /= 2
abins[tuple(firstindices)] /= 2
lastindices = np.roll(lastindices, 1)
firstindices = np.roll(firstindices, 1)
else:
abins = inbins
norm = np.sum(abins)
targets = (1 - np.array(contours)) * norm - missing_norm
if True:
bins = abins.reshape(-1)
indexes = inbins.reshape(-1).argsort()
sortgrid = bins[indexes]
cumsum = np.cumsum(sortgrid)
ixs = np.searchsorted(cumsum, targets)
for i, ix in enumerate(ixs):
if ix == 0:
raise DensitiesError("Contour level outside plotted ranges")
h = cumsum[ix] - cumsum[ix - 1]
d = (cumsum[ix] - targets[i]) / h
contour_levels[i] = sortgrid[ix] * (1 - d) + d * sortgrid[ix - 1]
else:
# old method, twice or so slower
try_t = np.max(inbins)
lastcontour = 0
for i, (contour, target) in enumerate(zip(contours, targets)):
if contour < lastcontour: raise DensitiesError('contour levels must be decreasing')
lastcontour = contour
try_b = 0
lasttry = -1
while True:
try_sum = np.sum(abins[inbins < (try_b + try_t) / 2])
if try_sum > target:
try_t = (try_b + try_t) / 2
else:
try_b = (try_b + try_t) / 2
if try_sum == lasttry: break
lasttry = try_sum
contour_levels[i] = (try_b + try_t) / 2
return contour_levels
class GridDensity(object):
def normalize(self, by='integral', in_place=False):
if by == 'integral':
norm = self.norm_integral()
elif by == 'max':
norm = np.max(self.P)
if norm == 0:
raise DensitiesError('no samples in bin')
else:
raise DensitiesError("Density: unknown normalization")
if in_place: self.P /= norm
else: self.setP(self.P / norm)
self.spl = None
return self
def setP(self, P=None):
if P is not None:
for size, ax in zip(P.shape, self.axes):
if size != ax.size:
raise DensitiesError("Array size mismatch in Density arrays: P %s, axis %s" % (size, ax.size))
self.P = P
else:
self.P = np.zeros([ax.size for ax in self.axes])
self.spl = None
def bounds(self):
# give bounds in order x, y, z..
if self.view_ranges is not None:
return self.view_ranges
else:
b = [(ax[0], ax[-1]) for ax in self.axes]
b.reverse()
return b
def getContourLevels(self, contours=defaultContours):
return getContourLevels(self.P, contours)
class Density1D(GridDensity):
def __init__(self, x, P=None, view_ranges=None):
self.n = x.size
self.axes = [x]
self.x = x
self.view_ranges = view_ranges
self.spacing = x[1] - x[0]
self.setP(P)
def bounds(self):
if self.view_ranges is not None:
return self.view_ranges
return self.x[0], self.x[-1]
def _initSpline(self):
self.spl = splrep(self.x, self.P, s=0)
def Prob(self, x, derivative=0):
if self.spl is None: self._initSpline()
if isinstance(x, np.ndarray):
return splev(x, self.spl, derivative, ext=1)
else:
return splev([x], self.spl, derivative, ext=1)
def integrate(self, P):
return ((P[0] + P[-1]) / 2 + np.sum(P[1:-1])) * self.spacing
def norm_integral(self):
return self.integrate(self.P)
def initLimitGrids(self, factor=None):
class InterpGrid(object): pass
if self.spl is None: self._initSpline()
g = InterpGrid()
if factor is None:
g.factor = max(2, 20000 // self.n)
else:
g.factor = factor
g.bign = (self.n - 1) * g.factor + 1
vecx = self.x[0] + np.arange(g.bign) * self.spacing / g.factor
g.grid = splev(vecx, self.spl)
norm = np.sum(g.grid)
g.norm = norm - (0.5 * self.P[-1]) - (0.5 * self.P[0])
g.sortgrid = np.sort(g.grid)
g.cumsum = np.cumsum(g.sortgrid)
return g
def getLimits(self, p, interpGrid=None, accuracy_factor=None):
g = interpGrid or self.initLimitGrids(accuracy_factor)
parr = np.atleast_1d(p)
targets = (1 - parr) * g.norm
ixs = np.searchsorted(g.cumsum, targets)
results = []
for ix, target in zip(ixs, targets):
trial = g.sortgrid[ix]
if ix > 0:
d = g.cumsum[ix] - g.cumsum[ix - 1]
frac = (g.cumsum[ix] - target) / d
trial = (1 - frac) * trial + frac * g.sortgrid[ix + 1]
finespace = self.spacing / g.factor
lim_bot = (g.grid[0] >= trial)
if lim_bot:
mn = self.x[0]
else:
i = np.argmax(g.grid > trial)
d = (g.grid[i] - trial) / (g.grid[i] - g.grid[i - 1])
mn = self.x[0] + (i - d) * finespace
lim_top = (g.grid[-1] >= trial)
if lim_top:
mx = self.x[-1]
else:
i = g.bign - np.argmax(g.grid[::-1] > trial) - 1
d = (g.grid[i] - trial) / (g.grid[i] - g.grid[i + 1])
mx = self.x[0] + (i + d) * finespace
if parr is not p: return mn, mx, lim_bot, lim_top
results.append((mn, mx, lim_bot, lim_top))
return results
class Density2D(GridDensity, RectBivariateSpline):
def __init__(self, x, y, P=None, view_ranges=None):
self.x = x
self.y = y
self.axes = [y, x]
self.view_ranges = view_ranges
self.spacing = (self.x[1] - self.x[0]) * (self.y[1] - self.y[0])
self.setP(P)
def integrate(self, P):
norm = np.sum(P[1:-1, 1:-1]) + (P[0, 0] + P[0, -1] + P[-1, 0] + P[-1, -1]) / 4.0 \
+ (np.sum(P[1:-1, 0]) + np.sum(P[0, 1:-1]) + np.sum(P[1:-1, -1]) + np.sum(P[-1, 1:-1])) / 2.0
norm *= self.spacing
return norm
def norm_integral(self):
return self.integrate(self.P)
def _initSpline(self):
RectBivariateSpline.__init__(self, self.x, self.y, self.P.T, s=0)
self.spl = self
def Prob(self, x, y):
if self.spl is None: self._initSpline()
return self.spl.ev(x, y)