In [1]:
import numpy as np, pandas as pd, pystan as ps, numpy.random as npr, matplotlib.pyplot as plt, h5py
%matplotlib inline 
from time import time
from pylab import plot, show, legend
from scipy.stats import pearsonr, spearmanr, norm, invgamma

#### Compile Stan model:

In [3]:
sm_HS = ps.StanModel(file="/home/postdoc/dsen/Desktop/G-ZZ/stan_files/horseshoe_logistic.stan") 

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_5218e2900be47b3d461b8ba25a5dd4f5 NOW.


#### Load data from file:

In [5]:
data = h5py.File("/xtmp/GZZ_data/data/GZZ_data3.jld", "r")
X = data["X"].value
y = data["y"].value
ξ_true = data["xi_true"].value
d, Nobs = np.shape(X.transpose())

data = dict(N=Nobs, d=d, y=y.astype(int), X=X)

#### Run HMC with Stan:

In [10]:
start = time()
fit_HS = sm_HS.sampling(data=data, 
                        thin=1, 
                        control=control, 
                        n_jobs=4, 
                        init="random", 
                        iter=1000, 
                        algorithm="HMC")
print(round((time()-start)/60, 2), "mins to run")
print(fit_HS);



0.68 mins to run
Inference for Stan model: anon_model_5218e2900be47b3d461b8ba25a5dd4f5.
4 chains, each with iter=2500; warmup=0; thin=1; 
post-warmup draws per chain=2500, total post-warmup draws=10000.

               mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
xi[1]          0.85    0.04    0.4   0.11   0.58   0.83    1.1   1.71    109   1.03
xi[2]          0.66    0.07   1.02  -0.48   0.02   0.37   1.02   3.57    204   1.02
xi[3]         -0.74    0.11   1.56  -5.43   -1.1  -0.23   0.06   1.06    211   1.01
xi[4]          0.34    0.03    0.7  -0.69  -0.04   0.14   0.59   2.16    484   1.01
xi[5]          0.02    0.03   0.85  -1.95  -0.28-1.6e-4    0.3   2.03    652    1.0
xi[6]          0.16    0.03   0.75  -1.25  -0.15   0.03   0.37   2.01    873   1.01
xi[7]        2.9e-3    0.01   0.49  -1.05  -0.19 4.5e-3   0.19   1.06   1695    1.0
xi[8]         -0.19    0.02    0.6  -1.72  -0.37  -0.07   0.07   0.87    877    1.0
xi[9]          0.02    0.02   0.59  -1.2

### ESS:

In [11]:
a = fit_HS.summary()["summary"]
ess = a[:,-2]
print("Mean effective sample size:", np.round(np.mean(ess),1))

Mean effective sample size: 849.1


In [15]:
np.min(a[:,-1]), np.max(a[:,-1])

(1.000224682481058, 1.1408586590773633)

### Coverage:

In [11]:
trace = fit_HS.extract()
xi_samples = trace["xi"]

In [12]:
np.shape(xi_samples)

(10000, 100)

In [13]:
cover = np.zeros(d)
ci = np.zeros((d,2))
for i in range(d) :
    ci[i,:] = np.percentile(xi_samples[:,i], q=[5, 95])
    cover[i] = (ci[i,0]<ξ_true[i])&(ξ_true[i]<ci[i,1])

In [14]:
print(100*np.mean(cover))

94.0
