In [1]:
using Distributions, StatsBase, ProgressMeter
include("general_functions.jl")
pos(x) = max(0,x)

pos (generic function with 1 method)

In [2]:
""" 
Unbiased estimator of derivative of loglikelihood based on minibatch.
The derivative is a sum.
Weights have to sum to 1.
"""

#"No weights"

function derivative_ll(X::Array{Float64}, 
                       y::Array{Int64}, 
                       ξ::Array{Float64}, 
                       dim::Int64, 
                       mb::Array{Int64})  
    ξ = reshape(ξ, 1, length(ξ))
    mean(X[dim,mb].*(reshape(exp.(ξ*X[:,mb])./(1+exp.(ξ*X[:,mb])), length(mb), ) - y[mb]))*size(X,2)
end

function derivative_ll(X::Array{Float64}, 
                       y::Array{Int64}, 
                       ξ::Array{Float64}, 
                       dim::Int64, 
                       mb::Array{Int64},
                       weights::Array{Float64})  
    for dim in 1:length(ξ) 
        @assert abs(1 - sum(weights[dim,:])) < 1e-7
    end
    ξ = reshape(ξ, 1, length(ξ))
    w = weights[dim,:]
    mean(X[dim,mb].*(reshape(exp.(ξ*X[:,mb])./(1+exp.(ξ*X[:,mb])), length(mb), ) - y[mb])./w[mb])
end

function derivative_gaussian_prior(ξ::Array{Float64}, 
                                   dim::Int64, 
                                   σ::Float64) 
    return ξ[dim]/σ^2
end

derivative_gaussian_prior (generic function with 1 method)

In [3]:
function compute_configT(samples, k, X, y, Nobs, σ)
    d, Nobs = size(X) 
    n_samples = size(samples,2)
    configT = 0.0
    for j in 1:n_samples
        configT += samples[k,j]*(derivative_ll(X, y, samples[:,j], dim, Array(1:Nobs)) 
                                 + derivative_gaussian_prior(samples[:,j], dim, σ)) 
    end
    return configT/n_samples
end

compute_configT (generic function with 1 method)

In [5]:
"""Rate without weights:"""

function rate(X::Array{Float64}, 
              y::Array{Int64}, 
              ξ::Array{Float64},
              dim::Int64, 
              mb_size::Int64, 
              θ::Array{Float64}, 
              root::Array{Float64}, 
              gradient_root::Array{Float64},
              replace::Bool,
              cv::Bool,
              weights::Void)
    d, Nobs = size(X)
    mb = sample(1:Nobs, mb_size; replace=replace)     
    if cv 
        rate_ξ = derivative_ll(X, y, ξ, dim, mb)
        rate_root = derivative_ll(X, y, root, dim, mb) 
        rate_ = pos(θ[dim]*(gradient_root[dim] + rate_ξ - rate_root))
    else
        rate_ = pos(θ[dim]*(derivative_ll(X, y, ξ, dim, mb) 
                           + derivative_gaussian_prior(ξ, dim, σ)))
    end
    return rate_
end


function rate(X::Array{Float64}, 
              y::Array{Int64}, 
              ξ::Array{Float64},
              dim::Int64, 
              mb_size::Int64, 
              θ::Array{Float64}, 
              root::Array{Float64}, 
              gradient_root::Array{Float64},
              replace::Bool,
              cv::Bool,
              weights::Array{Float64})
    d, Nobs = size(X)
    for dim in 1:d 
        @assert abs(1 - sum(weights[dim,:])) < 1e-7
    end
    mb = wsample(1:Nobs, Weights(weights[dim,:]), mb_size; replace=replace)           
    if cv 
        # fix this: 
        rate_ξ = derivative_ll(X, y, ξ, dim, mb, weights)
        rate_root = derivative_ll(X, y, root, dim, mb, weights) 
        rate_ = pos(θ[dim]*(gradient_root[dim] + rate_ξ - rate_root))
    else 
        rate_ = (pos(θ[dim]*derivative_ll(X, y, ξ, dim, mb, weights)) 
                     + pos(θ[dim]*derivative_gaussian_prior(ξ, dim, σ)))
    end
    return rate_
end

rate (generic function with 2 methods)

In [13]:
print(rate(X,y,ξ,dim,mb_size,θ,root,gradient_root,true,true,weights), "\n")
print(rate(X,y,ξ,dim,mb_size,θ,root,gradient_root,true,false,weights), "\n")
print(rate(X,y,ξ,dim,mb_size,θ,root,gradient_root,true,true,nothing), "\n")
print(rate(X,y,ξ,dim,mb_size,θ,root,gradient_root,true,false,nothing), "\n")

13.211711632782393
9.691227520745723
0.0
0.0


In [14]:
""" Bounds """

function bounds(X::Array{Float64,2},
                cv::Bool,
                weights::Void) 
    d, Nobs = size(X) 
    if cv 
        lipschitz_constants = zeros(d,N)
        for n in 1:Nobs
            lipschitz_constants[:,n] = 1/4*[abs.(X[i,n])*norm(X[:,n]) for i in 1:d]
        end
        return Nobs*(maximum(lipschitz_constants, 2))
    else
        return Nobs*(maximum(abs.(X), 2))
    end
end

function bounds(X::Array{Float64},
                cv::Bool,
                weights::Array{Float64})
#     @assert sum(weights) == 1
    d, Nobs = size(X) 
    if cv 
        lipschitz_constants = zeros(d,N)
        for n in 1:Nobs
            lipschitz_constants[:,n] = 1/4*[abs.(X[i,n])*norm(X[:,n]) for i in 1:d]
        end
        return maximum(lipschitz_constants./weights, 2)
    else
        return maximum(abs.(X./weights),2)
    end
end

bounds (generic function with 2 methods)

In [18]:
print( round.(bounds(X,false,weights)), "\n")
print( round.(bounds(X,false,nothing)), "\n")
print( round.(bounds(X,true,weights)), "\n")
print( round.(bounds(X,true,nothing)), "\n")

[100.0; 88.0; 78.0; 80.0; 83.0; 68.0; 78.0; 83.0; 75.0; 74.0]
[100.0; 229.0; 268.0; 227.0; 304.0; 309.0; 221.0; 251.0; 234.0; 273.0]
[123.0; 109.0; 97.0; 99.0; 103.0; 84.0; 96.0; 102.0; 93.0; 92.0]
[123.0; 238.0; 261.0; 203.0; 356.0; 311.0; 259.0; 310.0; 192.0; 267.0]


In [19]:
function get_ab(θ::Array{Float64}, 
                bound::Array{Float64}, 
                σ::Float64, 
                ξ::Array{Float64}, 
                root::Array{Float64}, 
                gradient_root::Array{Float64},
                cv::Bool)
    d = length(ξ)
    if cv 
        a = [pos((θ[i]*gradient_root[i])[1]) + (bound[i]+1/σ^2)*norm(ξ-root) for i in 1:d]
        b = √d*(bound + 1/σ^2)
        else round
        a = bound + abs.(ξ)/σ^2 
        b = ones(d)/σ^2
    end
    return a, b
end

get_ab (generic function with 1 method)

In [20]:
bound = bounds(X,true,nothing);

In [26]:
function ZZ_logistic(X, y, max_attempts, ξ0, mb_size, root, σ, A, weights, replace, cv)

    d, Nobs = size(X) 
    m = size(A,1)
    if weights != nothing 
        for i in 1:d 
            @assert abs.(1-sum(weights[i,:]) < 1e-7)
        end
    end
    
    # Initialise everything:
    bouncing_times = []
    push!(bouncing_times, 0.)
    skeleton_points = zeros(m, 1000)
    skeleton_points[:,1] = A*copy(ξ0)
    ξ = copy(ξ0)
    θ = 2rand(Binomial(),d)-1.
    t, switches = 0, 0
    
    gradient_root = [derivative_ll(X, y, root, dim, Array(1:Nobs)) + derivative_gaussian_prior(root, dim, σ)  
                     for dim in 1:d]
    bound = bounds(X, cv, weights)
    
    # run sampler:
    @showprogress for attempt in 1:max_attempts
        a, b = get_ab(θ, bound, σ, ξ, root, gradient_root, cv)
        event_times = [get_event_time(a[i], b[i]) for i in 1:d] 
        τ, i0 = findmin(event_times)                
        t += τ 
        ξ_new = ξ + τ*θ
        rate_ = rate(X, y, ξ_new, i0, mb_size, θ, root, gradient_root, replace, cv, weights)
        actual_bound = a[i0] + b[i0]*τ
        alpha = rate_/actual_bound
        if alpha > 1 
            print("Error, rate larger than bound \n")
            break
        elseif rand() < alpha
            θ[i0] *= -1
            switches += 1
            skeleton_points[:,switches+1] = A*ξ_new
            push!(bouncing_times, t)
        end   
        if switches == size(skeleton_points,2) - 1 
            skeleton_points = extend_skeleton_points(skeleton_points)
        end
        ξ = copy(ξ_new)
    end
    
    # Return stuff:
    print(signif(100*switches/max_attempts,2),"% of switches accepted \n")
    return hcat(skeleton_points[:,1:switches+1], A*ξ), push!(bouncing_times, t)
end

ZZ_logistic (generic function with 2 methods)

In [39]:
d, N = 2, 100
X = rand(Normal(), d, N)
# X[1,:] = 1
y = rand(Binomial(),N);

In [45]:
σ = 1.
mb_size = 10
replace = true
A = eye(d)
max_attempts = 10^6
ξ0 = rand(d)
cv = false
root = zeros(d)
weights = nothing

In [46]:
s, b = ZZ_logistic(X, y, max_attempts, ξ0, mb_size, root, σ, A, weights, replace, cv);

[32mProgress:  96%|███████████████████████████████████████  |  ETA: 0:00:00[39m

2.7% of switches accepted 


[32mProgress:  99%|████████████████████████████████████████ |  ETA: 0:00:00[39m[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:03[39m


In [47]:
b[end]

1914.9188594410289

In [48]:
samples = extract_samples(s, b, 0.1)
[compute_configT(samples, dim, X, y, N, σ) for dim in 1:d]

2-element Array{Float64,1}:
  0.947701 
 -0.0263316