#### Sanity checks using non-sparse structures. Everything here has been checked and works [except grouped sub-sampling]. 

In [1]:
using Distributions, TimeIt, PyPlot, NPZ
include("zz_structures.jl")
include("mbsampler.jl")

get_clustering (generic function with 1 method)

In [2]:
d, Nobs = 5, 100
p = 2e-1
X = rand(Normal(), d, Nobs) .* rand(Binomial(1,p), d, Nobs)
X[1,:] = ones(Nobs)
ξ_true = rand(Normal(),d)
y = [rand(Binomial(1, 1/(1+exp(-ξ_true'X[:,j]))), 1)[1] + 0. for j in 1:Nobs];

my_ll = ll_logistic(X,y);
my_prior = gaussian_prior_nh(zeros(d),1*ones(d))
my_model = model(my_ll, my_prior)

root = find_root(my_model, rand(d));

In [3]:
ϵ = 1e-1
weights_cv = zeros(d, Nobs)
for n in 1:Nobs
    weights_cv[:,n] = [abs.(X[i,n])*norm(X[:,n]) for i in 1:d] + ϵ
end
weights_cv ./= sum(weights_cv,2);

weights = abs.(X) + ϵ
weights ./= sum(weights,2)
mb_size = 10;

In [4]:
# Stratified sub-sampling without CV (and weights)

ll_pd_root_list = [partial_derivative_vec(my_model.ll, root, i, 1:Nobs) for i in 1:d]

cmbsamplers = Array{cmbsampler}(d)
N_cluster = mb_size
mode = "wc"

for dim in 1:d
    csamplers = Array{mbsampler}(N_cluster)
    clusters = get_clustering_vec(ll_pd_root_list[dim], N_cluster, mode)
    #print(clusters)
    scp = ones(N_cluster)
    for (ci, c) in enumerate(clusters)
        #csamplers[ci] = umbsampler(size(c)[1], scp[ci])
        csamplers[ci] = wumbsampler(size(c)[1], scp[ci], weights[dim,c])
    end
    #cmbsamplers[dim] = cmbsampler(csamplers, clusters, ones(Nobs)/Nobs)
    cmbsamplers[dim] = cmbsampler(csamplers, clusters, weights[dim,:])
end
gs_list = mbsampler_list(d,cmbsamplers);

LoadError: [91mUndefVarError: get_clustering_vec not defined[39m

In [5]:
A = eye(d)
opf = projopf(A, 100) 
opt = maxa_opt(10^5)
outp = outputscheduler(opf,opt) ;

In [6]:
start = time()
ZZ_sample(my_model, outp, gs_list)
print( round((time()-start)/60, 2), " minutes to run \n")

0.14 minutes to run 


In [7]:
n_samples = 10^4
@timeit samples = extract_samples(outp.opf.xi_skeleton, outp.opf.bt_skeleton, outp.opf.bt_skeleton[end]/n_samples);

100 loops, best of 3: 1.58 ms per loop


In [8]:
[compute_configT(my_model, samples, k) for k in 1:5]

[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:01[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:00[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:00[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:00[39m
[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:00[39m


5-element Array{Float64,1}:
 1.33203 
 0.933271
 1.15586 
 0.936368
 1.02764 

In [22]:
opt = maxa_opt(10^5)
outp = outputscheduler(opf,opt) ;

In [23]:
start = time()
ZZ_sample(my_model, outp, gs_list)
print( round((time()-start)/60, 2), " minutes to run \n")

0.13 minutes to run 


In [24]:
outp.opf.tcounter

11781

In [26]:
ones(d)

5-element Array{Float64,1}:
 1.0
 1.0
 1.0
 1.0
 1.0