In [1]:
import numpy as np, pandas as pd, pystan as ps, numpy.random as npr, matplotlib.pyplot as plt, h5py
%matplotlib inline 
from time import time
from pylab import plot, show, legend
from scipy.stats import pearsonr, spearmanr, norm, invgamma

#### Compile Stan model:

In [2]:
sm_ng = ps.StanModel(file="ng_logistic.stan") 

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_ec2d62b169a03cfdb12340a1ec25c195 NOW.


#### Load data from file:

In [4]:
data = h5py.File("GZZ_data2.jld", "r")
X = data["X"].value
y = data["y"].value
ξ_true = data["xi_true"].value
d, Nobs = np.shape(X.transpose())

data = dict(N=Nobs, d=d, y=y.astype(int), X=X)

#### Run HMC with Stan:

In [12]:
control = dict(stepsize=1e-3, int_time=1e0, adapt_engaged=False) 

In [13]:
start = time()
fit_ng = sm_ng.sampling(data=data, 
                        thin=1, 
                        control=control, 
                        n_jobs=4, 
                        init="random", 
                        iter=2500, 
                        algorithm="HMC", 
                        warmup=0)
print(round((time()-start)/60, 2), "mins to run")
print(fit_ng);



3.12 mins to run
Inference for Stan model: anon_model_ec2d62b169a03cfdb12340a1ec25c195.
4 chains, each with iter=2500; warmup=0; thin=1; 
post-warmup draws per chain=2500, total post-warmup draws=10000.

               mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
xi[1]          -1.6    0.02   0.35  -2.33  -1.84  -1.58  -1.35  -0.97    316   1.01
xi[2]         -0.18    0.02   0.68   -1.9  -0.39  -0.03   0.09   1.03   1360    1.0
xi[3]         -0.24    0.02   0.66  -2.07  -0.44  -0.04   0.06   0.75    902    1.0
xi[4]          0.08  6.7e-3   0.36  -0.59  -0.07   0.01   0.19   0.97   2840    1.0
xi[5]          0.07  9.1e-3   0.41  -0.71  -0.09 6.6e-3   0.18   1.11   2042    1.0
xi[6]         -0.61    0.05   0.95  -3.08  -0.98  -0.26-9.3e-7   0.46    299   1.02
xi[7]          0.53    0.04   0.71  -0.35 2.5e-3    0.3   0.93    2.3    329   1.01
xi[8]         -0.23    0.01   0.53  -1.62  -0.41  -0.06   0.03   0.59   1344    1.0
xi[9]         -0.12  8.8e-3    0.4  -1.1

### ESS:

In [7]:
a = fit_ng.summary()["summary"]
ess = a[:,-2]
print("Mean effective sample size:", np.round(np.mean(ess),1))

Mean effective sample size: 768.8


### Coverage:

In [8]:
trace = fit_ng.extract()
xi_samples = trace["xi"]

In [9]:
np.shape(xi_samples)

(10000, 100)

In [10]:
cover = np.zeros(d)
ci = np.zeros((d,2))
for i in range(d) :
    ci[i,:] = np.percentile(xi_samples[:,i], q=[5, 95])
    cover[i] = (ci[i,0]<ξ_true[i])&(ξ_true[i]<ci[i,1])

In [11]:
100*np.mean(cover)

93.0