In [1]:
import PyPlot
using PyPlot, Distributions, Optim, TimeIt
include("ZZ_subsampling.jl") 
include("ZZ_clustering.jl")
include("ZZ_dominated_Hessian.jl")

ZZ_dominated_Hessian (generic function with 4 methods)

In [2]:
function logistic_data(d, Nobs, p=0.1) 
    X = ones(d, Nobs)
    X[2:(end-1),:] = rand(d-2, Nobs)
    X[:,end] = 3rand(Normal(),d)
    y = rand(Binomial(1,p), Nobs)
    return X, y
end

logistic_data (generic function with 2 methods)

In [3]:
d, Nobs = 5, 1000
X, y = logistic_data(d, Nobs, 0.1)
X .*= rand(Binomial(1,0.4), d, Nobs)
β_0 = zeros(d)
sum(y)

106

In [7]:
mb_size = 10
max_attempts = 10^5
σ = 2
A = eye(d)
root = find_root(X, y, σ);

In [8]:
ϵ = 1e-5
weights_cv = zeros(d, Nobs)
for n in 1:Nobs
    weights_cv[:,n] = [abs.(X[i,n])*norm(X[:,n]) for i in 1:d] + ϵ
end
weights_cv ./= sum(weights_cv,2);

weights = abs.(X) + ϵ
weights ./= sum(weights,2);

In [9]:
h = 1
replace = false 

false

#### ZZ_clustering

In [10]:
start = time()
s_kmeans, b_kmeans = ZZ_clustering(X, y, max_attempts, β_0, mb_size, root, σ, A, "kmeans", false) 
s_sorted_uw, b_sorted_uw = ZZ_clustering(X, y, max_attempts, β_0, mb_size, root, σ, A, "sorted", false) 
s_sorted_w, b_sorted_w = ZZ_clustering(X, y, max_attempts, β_0, mb_size, root, σ, A, "sorted", true, 1e-1) 
print( round((time()-start)/60, 2), " minutes to run \n")
samples_kmeans = extract_samples(s_kmeans, b_kmeans, h)
samples_sorted_uw = extract_samples(s_sorted_uw, b_sorted_uw, h)
samples_sorted_w = extract_samples(s_sorted_w, b_sorted_w, h)
print("Configuration temperatures: \n")
print("k-means: ", round.([compute_configT(samples_kmeans, i, X, y, Nobs, σ) for i in 1:d], 2), "\n")
print("sorted + unweighted: ", round.([compute_configT(samples_sorted_uw, i, X, y, Nobs, σ) for i in 1:d], 2), "\n")
print("sorted + weighted: ", round.([compute_configT(samples_sorted_w, i, X, y, Nobs, σ) for i in 1:d], 2))

0.4% of switches accepted 
0.069% of switches accepted 
0.89% of switches accepted 
0.16 minutes to run 
Configuration temperatures: 
k-means: [-6.44, -8.82, -4.09, -6.22, -0.71]
sorted + unweighted: [-3.37, -1.94, -0.69, -2.5, 1.42]
sorted + weighted: [0.15, 0.17, 1.15, -0.15, 1.05]

In [11]:
ZZ_clustering(X, y, max_attempts, β_0, mb_size, root, σ, A, "sorted", true, 1e-4);

0.88% of switches accepted 


In [12]:
ZZ_clustering(X, y, max_attempts, β_0, mb_size, root, σ, A, "sorted", true, 1e-8);

0.96% of switches accepted 


#### ZZ_subsampling:

In [13]:
start = time()
s_cv_unweighted, b_cv_unweighted = ZZ_logistic(X, y, max_attempts, β_0, 2mb_size, root, σ, A, true, nothing, replace)
s_cv_weighted, b_cv_weighted = ZZ_logistic(X, y, max_attempts, β_0, 2mb_size, root, σ, A, true, weights_cv, replace)
print( round((time()-start)/60, 2), " minutes to run \n")
samples_cv_unweighted = extract_samples(s_cv_unweighted, b_cv_unweighted, h)
samples_cv_weighted = extract_samples(s_cv_weighted, b_cv_weighted, h);
print("Configuration temperatures: \n")
print("CV unweighted: ", round.([compute_configT(samples_cv_unweighted, i, X, y, Nobs, σ) for i in 1:d],2), "\n")
print("CV weighted  : ", round.([compute_configT(samples_cv_weighted, i, X, y, Nobs, σ) for i in 1:d],2))

0.025% of switches accepted 
3.3% of switches accepted 
0.09 minutes to run 
Configuration temperatures: 
CV unweighted: [-1.28, -0.62, -0.44, -1.92, -0.62]
CV weighted  : [1.15, 0.86, 1.18, 1.27, 1.29]

In [14]:
start = time()
s_iid, b_iid = ZZ_logistic(X, y, max_attempts, β_0, 2mb_size, root, σ, A, false, nothing, replace)
s_unweighted, b_unweighted = ZZ_logistic(X, y, max_attempts, β_0, 2mb_size, root, σ, A, false, ones(d,Nobs), replace)
s_weighted, b_weighted = ZZ_logistic(X, y, max_attempts, β_0, 2mb_size, root, σ, A, false, weights, replace)
print( round((time()-start)/60, 2), " minutes to run \n")
samples_iid = extract_samples(s_iid, b_iid, h)
samples_unweighted = extract_samples(s_unweighted, b_unweighted, h)
samples_weighted = extract_samples(s_weighted, b_weighted, h)
print("Configuration temperatures: \n")
print("iid       : ", round.([compute_configT(samples_iid, i, X, y, Nobs, σ) for i in 1:d], 2), "\n")
print("weighted  : ", round.([compute_configT(samples_weighted, i, X, y, Nobs, σ) for i in 1:d], 2), "\n")
print("unweighted: ", round.([compute_configT(samples_unweighted, i, X, y, Nobs, σ) for i in 1:d], 2)) 

0.39% of switches accepted 
0.41% of switches accepted 
2.8% of switches accepted 
0.11 minutes to run 
Configuration temperatures: 
iid       : [-10.03, -0.98, -2.96, 0.06, -0.46]
weighted  : [0.09, 2.34, 0.75, 0.92, 0.35]
unweighted: [3.33, -2.22, -5.48, -3.04, 3.38]

#### ZZ_dominated_Hessian:

In [None]:
h = .1
start = time()
s_dH, b_dH = ZZ_dominated_Hessian(X, y, 10^5, β_0, σ, A) 
print( round((time()-start)/60, 2), " minutes to run \n")
samples_dH = extract_samples(s_dH, b_dH, h)
print("Configuration temperatures: \n")
print(round.([compute_configT(samples_dH, i, X, y, Nobs, σ) for i in 1:d], 2), "\n")