In [1]:
import numpy as np, pandas as pd, pystan as ps, numpy.random as npr, matplotlib.pyplot as plt, h5py
%matplotlib inline 
from time import time
from pylab import plot, show, legend
from scipy.stats import pearsonr, spearmanr, norm, invgamma

#### Compile Stan model:

In [2]:
sm_HS = ps.StanModel(file="horseshoe_logistic.stan") 

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_5218e2900be47b3d461b8ba25a5dd4f5 NOW.


#### Load data from file:

In [4]:
data = h5py.File("GZZ_data2.jld", "r")
X = data["X"].value
y = data["y"].value
ξ_true = data["xi_true"].value
d, Nobs = np.shape(X.transpose())

data = dict(N=Nobs, d=d, y=y.astype(int), X=X)

#### Run HMC with Stan:

In [5]:
control = dict(stepsize=1e-2, int_time=1e0, adapt_engaged=False) 

In [7]:
start = time()
fit_HS = sm_HS.sampling(data=data, 
                        thin=1, 
                        control=control, 
                        n_jobs=4, 
                        init="random", 
                        iter=2500, 
                        algorithm="HMC", 
                        warmup=0)
print(round((time()-start)/60, 2), "mins to run")
print(fit_HS);



0.38 mins to run
Inference for Stan model: anon_model_5218e2900be47b3d461b8ba25a5dd4f5.
4 chains, each with iter=2500; warmup=0; thin=1; 
post-warmup draws per chain=2500, total post-warmup draws=10000.

              mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
xi[1]        -1.72    0.09   0.39  -2.61  -1.95  -1.67  -1.46  -1.03     20   1.15
xi[2]        -0.13    0.02   0.62  -1.85  -0.26  -0.04   0.11    1.0    667   1.01
xi[3]        -0.16    0.02   0.62  -1.82  -0.27  -0.02   0.08    0.8    757   1.01
xi[4]         0.02  7.8e-3   0.36  -0.69  -0.16 3.8e-3   0.17   0.84   2100    1.0
xi[5]         0.08    0.02    0.4   -0.7  -0.08   0.02   0.22   0.94    340    1.0
xi[6]        -0.46    0.07    0.8  -2.66   -0.7  -0.16 7.0e-3    0.4    120   1.04
xi[7]         0.49    0.12   0.73  -0.37 5.7e-4   0.25   0.79   2.44     36   1.09
xi[8]        -0.21    0.02    0.5  -1.63  -0.36  -0.06   0.05   0.54    439   1.02
xi[9]         -0.1    0.02   0.37  -1.07  -0.21  

### ESS:

In [20]:
a = fit_HS.summary()["summary"]
ess = a[:,-2]
print("Mean effective sample size:", np.round(np.mean(ess),1))

Mean effective sample size: 768.2


### Coverage:

In [11]:
trace = fit_HS.extract()
xi_samples = trace["xi"]

In [12]:
np.shape(xi_samples)

(10000, 100)

In [13]:
cover = np.zeros(d)
ci = np.zeros((d,2))
for i in range(d) :
    ci[i,:] = np.percentile(xi_samples[:,i], q=[5, 95])
    cover[i] = (ci[i,0]<ξ_true[i])&(ξ_true[i]<ci[i,1])

In [14]:
print(100*np.mean(cover))

94.0
