In [1]:
using Distributions, TimeIt, PyPlot, NPZ, JLD, ProgressMeter
include("zz_structures.jl")
include("mbsampler.jl")

get_clustering_vec_and_score (generic function with 1 method)

In [6]:
d, Nobs = 10_000, 1_000_000
p = 2e-4
X = sprandn(d, Nobs, p)
X[1,:] = ones(Nobs)
xi_true = rand(Normal(), d)
y = [rand(Binomial(1, 1/(1+exp(-ξ_true'X[:,j])))) + 0. for j in 1:Nobs];

In [3]:
my_ll = ll_logistic_sp(X,y);
my_prior = gaussian_prior_nh(zeros(d),100*ones(d))
my_model = model(my_ll, my_prior);
is_sparse = true
gc()

In [4]:
function stochastic_gradient(my_model::model, ξ, batch_size) 
    d = length(ξ)
    # pick random minibatch 
    mb = sample(1:my_model.ll.Nobs, batch_size; replace=false)
    return [mean(partial_derivative_vec(my_model, ξ, k, mb)) for k in 1:d]
end

stochastic_gradient (generic function with 1 method)

In [114]:
function SGD(my_model::model, ξ_0, batch_size, tol, max_iter) 
    d = length(ξ_0) 
    ξ_current = copy(ξ_0)
    @showprogress for t in 1:max_iter
        ξ_updated = ξ_current - (1/1.001^t)*stochastic_gradient(my_model, ξ_current, batch_size)
        if norm(ξ_updated-ξ_current) < tol 
            @printf("converged in %i iterations \n", t)
            break;
        else 
            ξ_current = copy(ξ_updated)
        end
    end
    return ξ_current
end

SGD (generic function with 2 methods)

In [119]:
batch_size = 2000
tol = 0
xi_0 = rand(d)
max_iter = 500
ξ_sgd = SGD(my_model, ξ_0, batch_size, tol, max_iter);

[32mProgress: 100%|█████████████████████████████████████████| Time: 0:04:18[39m


10000-element Array{Float64,1}:
 -0.942492  
  0.551303  
  0.361845  
  0.842921  
  0.992322  
  0.123291  
  0.053986  
  0.855631  
  0.0833264 
  0.363671  
  0.738626  
  0.438232  
  0.973465  
  ⋮         
  0.526352  
  0.913224  
  0.748534  
  0.389626  
  0.384242  
  0.991562  
  0.562702  
  0.664919  
  0.511789  
  0.00461878
  0.638936  
  0.302122  

In [None]:
ξ_lbfgs = find_root(my_model, ξ_0);

In [125]:
norm(ξ_true-ξ_sgd), norm(ξ_true-ξ_lbfgs)

(115.95296168610993, 20.631376246419517)

In [126]:
norm(ξ_true-ξ_sgd)/norm(ξ_true)

1.153672549495507

In [127]:
save("X_large.jld", "X", X)
save("y_large.jld", "y", y)
save("xi_true.jld", "xi_true", ξ_true)
save("xi_root_sgd.jld", "xi_root_sgd", ξ_sgd)
save("xi_root_lbfgs.jld", "xi_root_lbfgs", ξ_lbfgs)