-
Notifications
You must be signed in to change notification settings - Fork 10
/
fit_disease_model.py
156 lines (127 loc) · 5.11 KB
/
fit_disease_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import inspect
import numpy as np
import pymc as mc
import probabilistic_utils
import beta_binomial_rate as rate_model
MCMC_PARAMS = {
'most accurate': [500, 20, 10000],
'testing fast': [5, 5, 5],
}
MAP_PARAMS = {
'most accurate': [ 1500, 'fmin_powell'],
'try powells method': [ 100, 'fmin_powell'],
'testing fast': [ 1, 'fmin' ],
}
def map_fit(dm, speed='most accurate'):
"""
tuck the pymc vars into the disease_model.vars, if they don't
exist and then fit them with map
"""
setup_disease_model(dm)
map = mc.MAP(dm.vars)
print "searching for maximum likelihood point estimate (%s)" % speed
iterlim, method = MAP_PARAMS[speed]
map.fit(verbose=10, iterlim=iterlim, method=method)
for rf in [dm.i, dm.r, dm.p, dm.f, dm.X]:
rf.notes += '\nFit by disease model %d' % dm.id
probabilistic_utils.save_map(dm.i)
probabilistic_utils.save_map(dm.r)
probabilistic_utils.save_map(dm.f)
probabilistic_utils.save_map(dm.p)
probabilistic_utils.save_map(dm.X)
def mcmc_fit(dm, speed='most accurate'):
#speed = 'testing fast'
map_fit(dm, speed)
# clear any data from previous mcmc fit
# TODO: refactor this, or really switch to an open notebook
# approach, where we save _all_ fits
for rf in [dm.i, dm.r, dm.p, dm.f]:
rf.fit.pop('mcmc_mean', '')
rf.fit.pop('mcmc_median', '')
rf.fit.pop('mcmc_upper_cl', '')
rf.fit.pop('mcmc_lower_cl', '')
rf.save()
trace_len, thin, burn = MCMC_PARAMS[speed]
mcmc = mc.MCMC(dm.vars)
# TODO: refactor the step method setup code into the rate_model
for rf in [dm.i, dm.r, dm.p, dm.f]:
try:
mcmc.use_step_method(mc.AdaptiveMetropolis, rf.vars['beta_binom_stochs'], verbose=0)
except (KeyError, ValueError):
pass
mcmc.sample(trace_len*thin+burn, burn, thin, verbose=1)
probabilistic_utils.save_mcmc(dm.i)
probabilistic_utils.save_mcmc(dm.r)
probabilistic_utils.save_mcmc(dm.f)
probabilistic_utils.save_mcmc(dm.p)
probabilistic_utils.save_mcmc(dm.X)
def initialized_rate_vars(rf, rate_stoch=None):
# store the rate model code in the asrf for future reference
try:
rf.fit['rate_model'] = inspect.getsource(rate_model)
except:
rf.fit['rate_model'] = 'WARNING: could not find source code'
rf.fit['out_age_mesh'] = range(probabilistic_utils.MAX_AGE)
# do normal approximation first, to generate a good starting point
M,C = probabilistic_utils.normal_approx(rf)
rate_model.setup_rate_model(rf, rate_stoch)
return probabilistic_utils.flatten(rf.vars.values()), rf.rate_stoch
#################### Code for generating a hierarchical bayesian model of the asrf interactions
def setup_disease_model(dm):
"""
Generate a list of the PyMC variables for the generic disease
model, and store it as dm.vars
See comments in the code for exact details on the model.
"""
dm.vars = {}
out_age_mesh = range(probabilistic_utils.MAX_AGE)
dm.vars['i'], i = initialized_rate_vars(dm.i_in())
dm.vars['r'], r = initialized_rate_vars(dm.r_in())
dm.vars['f'], f = initialized_rate_vars(dm.f_in())
m = probabilistic_utils.mortality_for(dm, out_age_mesh)
# TODO: make error in C_0 a semi-informative stochastic variable
logit_C_0 = mc.Normal('logit(C_0)', 0., 1.e-2)
@mc.deterministic
def C_0(logit_C_0=logit_C_0):
return mc.invlogit(logit_C_0)
@mc.deterministic
def S_0(C_0=C_0):
return max(0.0, 1.0 - C_0)
dm.vars['bins'] = [S_0, C_0, logit_C_0]
# iterative solution to difference equations to obtain bin sizes for all ages
@mc.deterministic
def S_C_D_M(S_0=S_0, C_0=C_0, i=i, r=r, f=f, m=m):
S = np.zeros(len(out_age_mesh))
C = np.zeros(len(out_age_mesh))
D = np.zeros(len(out_age_mesh))
M = np.zeros(len(out_age_mesh))
S[0] = S_0
C[0] = C_0
D[0] = 0.0
M[0] = 0.0
for a in range(len(out_age_mesh)-1):
S[a+1] = S[a]*(1-i[a]-m[a]) + C[a]*r[a]
C[a+1] = S[a]*i[a] + C[a]*(1-r[a]-m[a]-f[a])
D[a+1] = C[a]*f[a] + D[a]
M[a+1] = S[a]*m[a] + C[a]*m[a] + M[a]
return S,C,D,M
dm.vars['bins'] += [S_C_D_M]
# prevalence = # with condition / (# with condition + # without)
@mc.deterministic
def p(S_C_D_M=S_C_D_M, tau_p=1./.01**2):
S,C,D,M = S_C_D_M
return probabilistic_utils.trim(C/(S+C),
probabilistic_utils.NEARLY_ZERO,
1. - probabilistic_utils.NEARLY_ZERO)
dm.vars['p'], p = initialized_rate_vars(dm.p_in(), rate_stoch=p)
# duration = E[time in bin C]
@mc.deterministic
def X(r=r, m=m, f=f):
pr_exit = 1 - r - m - f
X = np.empty(len(pr_exit))
t = 1.0
for i in xrange(len(X)-1,-1,-1):
X[i] = t*pr_exit[i]
t = 1+X[i]
return X
dm.vars['X'], X = initialized_rate_vars(dm.X_in(), rate_stoch=X)