/
estimate_beta_dist.py
77 lines (70 loc) · 3.19 KB
/
estimate_beta_dist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# Given x_0 (which is the furthest point on a Lorenz curve to the diagonal), compute Alpha and Beta for the underlying Beta distribution of the number of reads in a bin.
import math
from scipy.stats import beta
from scipy.optimize import newton_krylov
from scipy.optimize.nonlin import NoConvergence
import numpy as np
#IxAlpha = 0.4
#IxAlphap1 = 0.3
#total_it = 100
def beta_function(Alpha, Beta):
#print Alpha, Beta
return math.gamma(Alpha)*math.gamma(Beta)/math.gamma(Alpha + Beta)
def get_series(Alpha, Beta, x, total_it, it):
if it < total_it:
if it % 2 == 1:
# odd
m = (it - 1)/2
d_this = -(Alpha + m) * (Alpha + Beta + m) * x / ((Alpha + 2 * m ) * (Alpha + 2 * m + 1))
else:
# even
m = it / 2
d_this = m * (Beta - m ) * x / ((Alpha + 2*m - 1)*(Alpha + 2*m))
return d_this / 1 + get_series(Alpha, Beta, x, total_it, it + 1)
else:
return 0
def get_alpha_beta(x0, y0):
def F(P):
Alpha = P[0]
Beta = P[1]
#x0 = 0.45
#y0 = 0.35
#X = Alpha**2 + Beta**2 - x0
#Y = Alpha**2 - Beta**2 - y0
#x = Alpha / (Alpha + Beta)
#series = 1 / (1 + get_series(Alpha, Beta, x, total_it, 1))
#print series
# approximate so that X and Y are zeros
#X = (x**Alpha) * ((1 - x)**Beta) / (Alpha * beta_function(Alpha, Beta)) * series - x0
#Y = (x**(Alpha + 1)) * ((1 - x)**Beta) / ((Alpha + 1) * beta_function((Alpha + 1), Beta)) * series - y0
#X = ((Alpha/(Alpha+Beta))**Alpha) * ((1 - (Alpha/(Alpha+Beta)))**Beta) / (Alpha * beta_function(Alpha, Beta)) * (1 / (1 + get_series(Alpha, Beta, Alpha/(Alpha+Beta), total_it, 1))) - x0
#Y = ((Alpha/(Alpha+Beta))**(Alpha + 1)) * ((1 - (Alpha/(Alpha+Beta)))**Beta) / ((Alpha + 1) * beta_function((Alpha + 1), Beta)) * (1 / (1 + get_series(Alpha, Beta, Alpha/(Alpha+Beta), total_it, 1))) - y0
X = beta.cdf(float(Alpha)/(Alpha+Beta), Alpha, Beta) - x0
Y = beta.cdf(float(Alpha)/(Alpha+Beta), Alpha+1, Beta) - y0
return [X, Y]
guess = [10, 5]
max_n = 1000
n = 1
while n < max_n:
try:
sol = newton_krylov(F, guess, method = 'lgmres', verbose = 0, rdiff = 0.1, maxiter=50)
break
except NoConvergence as e:
guess = np.random.rand(2) * 10 + 0.1
print(guess)
except ValueError as e:
guess = np.random.rand(2) * 10 + 0.1
print(guess)
n = n + 1
if n == max_n:
print("Error: the chosen value is not within the computation range. Please re-choose. Suggested value: 0.3 <= x, y <= 0.5, and x >= y")
return [0, 0]
else:
return sol
def get_beta(u, v):
return get_alpha_beta(u, v)
#Alpha = guess[0]
#Beta = guess[1]
#X = ((Alpha/(Alpha+Beta))**Alpha) * ((1 - (Alpha/(Alpha+Beta)))**Beta) / (Alpha * beta_function(Alpha, Beta)) * (1 / (1 + get_series(Alpha, Beta, Alpha/(Alpha+Beta), total_it, 1)))
#Y = ((Alpha/(Alpha+Beta))**(Alpha + 1)) * ((1 - (Alpha/(Alpha+Beta)))**Beta) / ((Alpha + 1) * beta_function((Alpha + 1), Beta)) * (1 / (1 + get_series((Alpha+1), Beta, Alpha/(Alpha+Beta), total_it, 1)))
#print X, Y