#### Testing out Gibbs zig-zag code:

In [1]:
using Distributions, TimeIt, ProgressMeter, PyPlot

In [2]:
include("zz_structures.jl")
include("mbsampler.jl")

get_clustering (generic function with 1 method)

In [3]:
d, Nobs = 15, 100
p = 1e-1
X = sprandn(d, Nobs, 1e-1)
X[1,:] = ones(Nobs)
ξ_true = rand(Normal(),d).* rand(Binomial(1,0.5), d)
ξ_true[1] = rand(Normal())
y = [rand(Binomial(1, 1/(1+exp(-ξ_true'X[:,j]))), 1)[1] + 0. for j in 1:Nobs];
mb_size = 10
σ02 = 1

# prior = HS_prior(d, σ02)
# prior = GDP_prior(d, σ02)
prior = gaussian_prior_nh(zeros(d), 10*rand(d))

gaussian_prior_nh([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [4.4096, 6.4294, 7.02946, 3.59418, 3.13586, 2.91606, 4.39017, 0.887921, 0.106138, 6.15882, 9.02206, 6.28709, 2.38809, 7.65176, 5.22121])

In [4]:
my_ll = ll_logistic_sp(X,y);
my_model = model(my_ll, prior)
root = find_root(my_model, rand(d));

In [36]:
# Stratified sub-sampling with CV and with weights

norm_Xj = [norm(X[:,j]) for j in 1:Nobs]
weights_cv = spzeros(d, Nobs)
for i in 1:d 
    nz_ind = X[i,:].nzind
    weights_cv[i,nz_ind] = abs.(X[i,nz_ind]).*norm_Xj[nz_ind] 
    weights_cv[i,nz_ind] /= sum(weights_cv[i,nz_ind])
end

ll_pd_root_list = [partial_derivative_vec(my_model.ll, root, i, 1:Nobs) for i in 1:d]

cmbsamplers = Array{mbsampler}(d)
cmbsamplers[1] = umbsampler(Nobs, mb_size)
N_cluster = mb_size
#mode = "wc"

for dim in 2:d
    N_cluster_d = min(N_cluster, length(ll_pd_root_list[dim].nzval))
    
    csamplers = Array{mbsampler}(N_cluster_d)
    #clusters = get_clustering_vec(ll_pd_root_list[dim], N_cluster_d, mode)
    clusters = get_clustering(N_cluster_d, ll_pd_root_list[dim].nzval)
    #clusters = get_clustering_vec(ll_pd_root_list[dim].nzval, N_cluster_d, mode)
    for i in 1:N_cluster_d
        clusters[i] =  ll_pd_root_list[dim].nzind[clusters[i]]
    end

    scp = ones(N_cluster_d)
    for (ci, c) in enumerate(clusters)
        
        csamplers[ci] = wumbsampler(size(c)[1], scp[ci], weights_cv[dim,c])
    end
    #cmbsamplers[dim] = cmbsampler(csamplers, clusters, ones(Nobs)/Nobs)
    cmbsamplers[dim] = spcmbsampler(csamplers, clusters, weights_cv[dim,:])
end
gs_list = cvmbsampler_list(my_model, cmbsamplers, root, true);

In [37]:
A = eye(d)
opf = projopf(A, 100, hyperparam_size(prior))
opf.alpha = 10*rand(d)
opt = maxa_opt(10^6)
outp = outputscheduler(opf,opt);

In [38]:
bb = linear_bound(my_model.ll, my_model.pr, gs_list, zz_state(opf))
update_bound(bb, my_ll, prior, gs_list, zz_state(opf));

In [39]:
print("Speed = ", opf.alpha)

Speed = [3.45019, 5.28851, 6.22982, 4.50396, 8.03383, 9.33317, 3.27547, 9.35093, 0.443374, 0.181778, 8.91771, 9.28604, 0.525634, 2.48689, 9.38953]

In [40]:
my_zz_sampler = zz_sampler(0, gs_list, bb)
hyper_sampler = block_gibbs_sampler(1.)
blocksampler = Array{msampler}(2)
blocksampler[1] = my_zz_sampler
blocksampler[2] = hyper_sampler;

In [41]:
start = time()
ZZ_block_sample(my_model::model, outp::outputscheduler, blocksampler::Array{msampler})
print( round((time()-start)/60, 2), " mins")

1.87 mins

In [42]:
n_samples = 10^4
xi_samples = extract_samples(outp.opf.xi_skeleton, 
                             outp.opf.bt_skeleton, 
                             outp.opf.bt_skeleton[end]/n_samples,
                             "linear");
hyper_samples = extract_samples(outp.opf.hyper_skeleton, 
                                outp.opf.bt_skeleton, 
                                outp.opf.bt_skeleton[end]/n_samples, 
                                "constant");

In [43]:
[compute_configT(my_model, xi_samples[:,1:end-1], hyper_samples[:,1:end-1], k) for k in 1:d]

15-element Array{Float64,1}:
 0.99886 
 0.99712 
 0.982481
 1.02262 
 0.96099 
 1.00408 
 1.06045 
 0.960987
 0.944222
 0.747004
 1.02758 
 1.01906 
 0.95823 
 1.02802 
 0.977183