/
test_stats.py
167 lines (143 loc) · 7.18 KB
/
test_stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# ====================================================================================== #
# Testing module for helper functions with statistical analysis of data.
# Author: Eddie Lee, edlee@alumni.princeton.edu
# ====================================================================================== #
from .stats import *
from scipy.integrate import quad
ALPHA=1.5
def test_ECDF():
x = np.arange(10)
print(ECDF(x, conf_interval=(5,95)))
def test_call_format():
pl = PowerLaw(alpha=1.5, lower_bound=1.111, upper_bound=10.111)
assert pl.alpha==1.5 and pl.lower_bound==1.111 and pl.upper_bound==10.111
dpl = DiscretePowerLaw(alpha=1.5, lower_bound=1.111, upper_bound=10.111)
assert dpl.alpha==1.5 and dpl.lower_bound==1.111 and dpl.upper_bound==10.111
def test_normalization():
# check normalization of the log likelihood
X=np.arange(1,1001)
np.isclose(np.exp(DiscretePowerLaw.log_likelihood(X, 2,
lower_bound=1,
upper_bound=1000,
return_sum=False,
normalize=True)).sum(), 1)
X=np.arange(10,1001)
np.isclose(np.exp(DiscretePowerLaw.log_likelihood(X, 2,
lower_bound=10,
upper_bound=1000,
return_sum=False,
normalize=True)).sum(), 1)
# alpha=3/2
X=np.arange(1,1001)
totalp=np.exp(DiscretePowerLaw.log_likelihood(X, 1.5,
lower_bound=1,
upper_bound=1000,
return_sum=False,
normalize=True)).sum()
assert np.isclose(totalp, 1), totalp
# change lower bound
X=np.arange(10,1001)
totalp=np.exp(DiscretePowerLaw.log_likelihood(X, 1.5,
lower_bound=10,
upper_bound=1000,
return_sum=False,
normalize=True)).sum()
assert np.isclose(totalp, 1), totalp
# check normalization of pdf
# don't specify lower bound (None gets passed to zeta function equivalent to passing 1)
X=np.arange(1,1001)
totalp=DiscretePowerLaw.pdf(1.5,
upper_bound=1000)(X).sum()
assert np.isclose(totalp, 1), totalp
X=np.arange(10,1001)
totalp=DiscretePowerLaw.pdf(1.5,
lower_bound=10,
upper_bound=1000)(X).sum()
assert np.isclose(totalp, 1), totalp
X=np.arange(10,100001)
totalp=DiscretePowerLaw.pdf(1.5,
lower_bound=10,
upper_bound=X.max())(X).sum()
assert np.isclose(totalp, 1), totalp
# log likelihood: compare pdf calculation with log_likelihood and _log_likelihood
logp=np.log( DiscretePowerLaw.pdf(1.5,
lower_bound=10,
upper_bound=X.max())(X) )
logL=DiscretePowerLaw.log_likelihood(X, 1.5,
lower_bound=10,
upper_bound=X.max(),
normalize=True,
return_sum=False)
logLquick=DiscretePowerLaw._log_likelihood(X, 1.5,
10,
X.max())
assert np.isclose(logp, logL).all()
assert np.isclose(logLquick, logL.sum())
def test_cdf_with_pdf():
x = np.array(list(range(1,501)))
cdfFromPdf = np.cumsum(DiscretePowerLaw.pdf(alpha=ALPHA, lower_bound=1, upper_bound=500)(x))
cdf = DiscretePowerLaw.cdf(alpha=ALPHA, lower_bound=1, upper_bound=500)(x)
gen = DiscretePowerLaw.cdf_as_generator(alpha=ALPHA, lower_bound=1, upper_bound=500)
cdfFromGen = [next(gen) for i in x]
assert np.isclose(cdfFromPdf, cdf).all()
assert np.isclose(cdfFromPdf, cdfFromGen).all()
def test_pdf():
# check that pdf decays correctly (relative probabilities)
for a in np.arange(1.5,5.5,.5):
p = DiscretePowerLaw.pdf(a, 1)(2**np.arange(30))
assert np.isclose( np.diff(np.log(p)), np.log(2**-a) ).all()
def test_max_likelihood_flow():
"""Make sure returned estimates are what I've found before."""
# continuous
X = PowerLaw.rvs(size=1000, rng=np.random.RandomState(0))
alphaML = PowerLaw.max_likelihood(X)
assert alphaML==1.997014843072137
alphaML, lb = PowerLaw.max_likelihood(X, lower_bound_range=(1,10), initial_guess=1.76)
assert alphaML==1.9582463655267062 and lb==1.7
# d = ExpTruncPowerLaw(2, 1e-3, rng=np.random.RandomState(0))
# X = d.rvs(size=1000)
# alphaML, elML = d.max_likelihood(X)
# assert abs(2-alphaML)<1e-2 and abs(1e-3-elML)<2e-3, (alphaML, elML)
#
# d = ExpTruncPowerLaw(1.75, 1e-3, rng=np.random.RandomState(0))
# X = d.rvs(size=1000)
# alphaML, elML = d.max_likelihood(X)
# assert abs(1.75-alphaML)<1e-2 and abs(1e-3-elML)<2e-3, (alphaML, elML)
# discrete
X = DiscretePowerLaw.rvs(2., size=1000, rng=np.random.RandomState(0))
alphaML = DiscretePowerLaw.max_likelihood(X)
assert alphaML==1.9939545133636511
alphaML, lb = DiscretePowerLaw.max_likelihood(X, lower_bound_range=(1,10), initial_guess=1.76)
assert alphaML==1.9939545644486907 and lb==1
def test_ExpTruncPowerLaw():
d = ExpTruncPowerLaw(2, 1e-3, rng=np.random.RandomState(0))
assert np.isclose(quad(d.pdf(), 1, np.inf)[0], 1)
X = d.rvs(size=1000)
assert np.isclose([2,1e-3], d.max_likelihood(X), atol=2e-3, rtol=0).all()
print("Test passed: probability distribution is normalized.")
dpl = ExpTruncPowerLaw(2., el=.001, rng=np.random.RandomState(0))
y = dpl.rvs(100_000)
soln = dpl.max_likelihood(y)
assert abs(soln[0] - 2)<=1e-2 and abs(soln[1] - 1e-3)<=5e-4
dpl = ExpTruncPowerLaw(1.5,
el=.001,
rng=np.random.RandomState(0),
lower_bound=10)
y = dpl.rvs(100_000)
soln = dpl.max_likelihood(y)
assert abs(soln[0] - 1.5)<=1e-2 and abs(soln[1] - 1e-3)<=5e-4
print("Test passed: Max likelihood recovers correct parameters.")
def test_ExpTruncDiscretePowerLaw():
dpl = ExpTruncDiscretePowerLaw(2., el=.001, rng=np.random.RandomState(0))
y = dpl.rvs(100_000)
soln = dpl.max_likelihood(y)
assert abs(soln[0] - 2)<=1e-2 and abs(soln[1] - 1e-3)<=5e-4
print("Test passed: Max likelihood recovers correct parameters.")
dpl = ExpTruncDiscretePowerLaw(1.5,
el=.001,
rng=np.random.RandomState(0),
lower_bound=10)
y = dpl.rvs(100_000)
soln = dpl.max_likelihood(y)
assert abs(soln[0] - 1.5)<=1e-2 and abs(soln[1] - 1e-3)<=5e-4
print("Test passed: Max likelihood recovers correct parameters.")