/
code_7.14.py
81 lines (62 loc) · 2.08 KB
/
code_7.14.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# From: Bayesian Models for Astrophysical Data, Cambridge Univ. Press
# (c) 2017, Joseph M. Hilbe, Rafael S. de Souza and Emille E. O. Ishida
#
# you are kindly asked to include the complete citation if you used this
# material in a publication
# Code 7.14 - Bayesian lognormal-logit hurdle model in Python using Stan
# 1 response (y) and 2 explanatory variables (x1, x2)
#
import numpy as np
import pystan
import statsmodels.api as sm
from scipy.stats import uniform, bernoulli
# Data
np.random.seed(33559) # set seed to replicate example
nobs= 2000 # number of obs in model
x1 = uniform.rvs(loc=0, scale=2.5, size=nobs)
xc = 0.6 + 1.25 * x1 # linear predictor, xb
y = np.random.lognormal(sigma=0.4, mean=np.exp(xc))
xb = -3.0 + 4.5 * x1 # construct filter
pi = 1.0/(1.0 + np.exp(-xb))
bern = [bernoulli.rvs(1-pi[i]) for i in range(nobs)]
ly = [y[i]*bern[i] for i in range(nobs)] # Add structural zeros
X = np.transpose(x1)
X = sm.add_constant(X)
mydata = {} # build data dictionary
mydata['Y'] = ly # response variable
mydata['N'] = nobs # sample size
mydata['Xb'] = X # predictors
mydata['Xc'] = X
mydata['Kb'] = X.shape[1] # number of coefficients
mydata['Kc'] = X.shape[1]
# Fit
stan_code = """
data{
int<lower=0> N;
int<lower=0> Kb;
int<lower=0> Kc;
matrix[N, Kb] Xb;
matrix[N, Kc] Xc;
real<lower=0> Y[N];
}
parameters{
vector[Kc] beta;
vector[Kb] gamma;
real<lower=0> sigmaLN;
}
model{
vector[N] mu;
vector[N] Pi;
mu = exp(Xc * beta);
for (i in 1:N) Pi[i] = inv_logit(Xb[i] * gamma);
for (i in 1:N) {
(Y[i] == 0) ~ bernoulli(Pi[i]);
if (Y[i] > 0) Y[i] ~ lognormal(mu[i], sigmaLN);
}
}
"""
# Run mcmc
fit = pystan.stan(model_code=stan_code, data=mydata, iter=7000, chains=3,
warmup=4000, n_jobs=3)
# Output
print(fit)