-
Notifications
You must be signed in to change notification settings - Fork 10
/
model_utils.py
145 lines (118 loc) · 5.61 KB
/
model_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import pylab as pl
import numpy as np
import pymc as mc
from pymc import gp
from dismod3.settings import *
from bayesian_models import probabilistic_utils
from bayesian_models.probabilistic_utils import trim, uninformative_prior_gp, NEARLY_ZERO, MAX_AGE
MISSING = -99
PRIOR_SEP_STR = ','
def indices_for_range(age_mesh, age_start, age_end):
return [ ii for ii, a in enumerate(age_mesh) if a >= age_start and a <= age_end ]
def generate_prior_potentials(prior_str, age_mesh, rate, confidence_stoch):
"""
return a list of potentials that model priors on the rate_stoch
prior_str may have entries in the following format:
smooth <tau> [<age_start> <age_end>]
zero <age_start> <age_end>
confidence <mean> <tau>
increasing <age_start> <age_end>
decreasing <age_start> <age_end>
convex_up <age_start> <age_end>
convex_down <age_start> <age_end>
unimodal <age_start> <age_end>
value <mean> <tau> [<age_start> <age_end>]
for example: 'smooth .1, zero 0 5, zero 95 100'
age_mesh[i] indicates what age the value of rate[i] corresponds to
confidence_stoch can be an additional stochastic variable that is used
in the beta-binomial model, but it is not required
"""
def derivative_sign_prior(rate, prior, deriv, sign):
age_start = int(prior[1])
age_end = int(prior[2])
age_indices = indices_for_range(age_mesh, age_start, age_end)
@mc.potential(name='deriv_sign_{%d,%d,%d,%d}^%s' % (deriv, sign, age_start, age_end, rate))
def deriv_sign_rate(f=rate,
age_indices=age_indices,
tau=10000.,
deriv=deriv, sign=sign):
df = np.diff(f[age_indices], deriv)
return -tau * np.dot(df**2, (sign * df < 0))
return [deriv_sign_rate]
priors = []
for line in prior_str.split(PRIOR_SEP_STR):
prior = line.strip().split()
if len(prior) == 0:
continue
if prior[0] == 'smooth':
tau_smooth_rate = float(prior[1])
if len(prior) == 4:
age_start = int(prior[2])
age_end = int(prior[3])
else:
age_start = 0
age_end = MAX_AGE
age_indices = indices_for_range(age_mesh, age_start, age_end)
@mc.potential(name='smooth_{%d,%d}^%s' % (age_start, age_end, rate))
def smooth_rate(f=rate, age_indices=age_indices, tau=tau_smooth_rate):
return mc.normal_like(np.diff(np.log(np.maximum(NEARLY_ZERO, f[age_indices]))), 0.0, tau)
priors += [smooth_rate]
elif prior[0] == 'zero':
tau_zero_rate = 1./(1e-4)**2
age_start = int(prior[1])
age_end = int(prior[2])
age_indices = indices_for_range(age_mesh, age_start, age_end)
@mc.potential(name='zero_{%d,%d}^%s' % (age_start, age_end, rate))
def zero_rate(f=rate, age_indices=age_indices, tau=tau_zero_rate):
return mc.normal_like(f[age_indices], 0.0, tau)
priors += [zero_rate]
elif prior[0] == 'value':
val = float(prior[1])
tau = float(prior[2])
if len(prior) == 4:
age_start = int(prior[3])
age_end = int(prior[4])
else:
age_start = 0
age_end = MAX_AGE
age_indices = indices_for_range(age_mesh, age_start, age_end)
@mc.potential(name='value_{%2f,%2f,%d,%d}^%s' \
% (val, tau, age_start, age_end, rate))
def val_for_rate(f=rate, age_indices=age_indices, val=val, tau=tau):
return mc.normal_like(f[age_indices], val, tau)
priors += [val_for_rate]
elif prior[0] == 'confidence':
# prior only affects beta_binomial_rate model
if not confidence_stoch:
continue
mu = float(prior[1])
tau = float(prior[2])
@mc.potential(name='prior_%s' % confidence_stoch)
def confidence_potential(f=confidence_stoch, mu=mu, tau=tau):
return mc.normal_like(f, mu, tau)
priors += [confidence_potential]
elif prior[0] == 'increasing':
priors += derivative_sign_prior(rate, prior, deriv=1, sign=1)
elif prior[0] == 'decreasing':
priors += derivative_sign_prior(rate, prior, deriv=1, sign=-1)
elif prior[0] == 'convex_down':
priors += derivative_sign_prior(rate, prior, deriv=2, sign=-1)
elif prior[0] == 'convex_up':
priors += derivative_sign_prior(rate, prior, deriv=2, sign=1)
elif prior[0] == 'unimodal':
age_start = int(prior[1])
age_end = int(prior[2])
age_indices = indices_for_range(age_mesh, age_start, age_end)
@mc.potential(name='unimodal_{%d,%d}^%s' % (age_start, age_end, rate))
def unimodal_rate(f=rate, age_indices=age_indices, tau=1000.):
df = np.diff(f[age_indices])
sign_changes = pl.find((df[:-1] > NEARLY_ZERO) & (df[1:] < -NEARLY_ZERO))
sign = np.ones(len(age_indices)-2)
if len(sign_changes) > 0:
change_age = sign_changes[len(sign_changes)/2]
sign[change_age:] = -1.
return -tau*np.dot(np.abs(df[:-1]), (sign * df[:-1] < 0))
priors += [unimodal_rate]
else:
raise KeyError, 'Unrecognized prior: %s' % prior_str
return priors