In [None]:
#Libraries
using Gen
using PyPlot
using Distributions
using LinearAlgebra
using Flux
using Random
using Distances
include("hmc_mod.jl")
include("helper_functions.jl")
include("rj_proposals_nodes.jl")
include("NUTS.jl");

In [None]:
#------------------------------------
#Hyperparameters and Helper Functions
#------------------------------------

#Select Network Goal
network = "classifier"
#network = "interpolator"

#Data hyperparameters
n = 20 #Number of samples per mode (classifier)
m = 4 #Number of modes (classifier)
d = 2 #Input dimension
N = n*m #Total samples
σₐ = 0.03 #Mode variance (classifier)
bound = 0.5

#Network hyperparameters

#Node hyperparameters
k_range = 4 #Maximum number of neurons per layer
k_list = [Int(i) for i in 1:k_range]
k_real = 2

#NUTS
Δmax = 1000;

In [None]:
#Data
x_raw, classes = real_data_classifier(Int(N/4), 4, bound, σₐ);
classes = [(i+1) % 2 + 1 for i in classes]
y_real = classes

plot_data_classifier(x_raw,classes)
x = transpose(x_raw)
size(x)
typeof(x)

In [None]:
#Bayesian Neural Net
function G(x, trace)
    activation = σ
    layers = 1 #trace[:l]
    ks = [trace[(:k,i)] for i=1:layers]
    for i=1:layers
        in_dim, out_dim = layer_unpacker(i, layers, ks)
        W = reshape(trace[(:W,i)], out_dim, in_dim)
        b = reshape(trace[(:b,i)], trace[(:k,i)])
        nn = Dense(W, b, activation)
        x = nn(x)
    end
    
    Wₒ = reshape(trace[(:W,layers+1)], 1, ks[layers])
    bₒ = reshape(trace[(:b,layers+1)], 1)
    
    nn_out = Dense(Wₒ, bₒ)
    return nn_out(x)
end;

@gen function classifier(x)
    
    #Create a blank choicemap
    obs = choicemap()::ChoiceMap
    
    #Draw number of layers
    l = 1
    
    #Create individual weight and bias vectors
    #Loop through hidden layers
    k = [Int(0) for i=1:l+1]
    for i=1:l
        k[i] = @trace(categorical([1/length(k_list) for i=1:length(k_list)]), (:k,i))
        obs[(:k,i)] = k[i]
    end
    k[l+1] = @trace(categorical([1.0]), (:k,l+1))
    obs[(:k,l+1)] = k[l+1]
    
    ######################################
    #New hyperparameter schedule - Jan 20#
    ######################################
    
    #Standard Deviations
    τ₁ ~ gamma(100,0.001) #Hidden weights and biases
    τ₂ ~ gamma(100*k[1],0.001) #Output weights and biases
    σ₁ = 1/τ₁
    σ₂ = 1/τ₂
    
    #Sample weight and parameter vectors
    W = [zeros(k[i]) for i=1:l+1]
    b = [zeros(k[i]) for i=1:l+1]
    μ = [zeros(k[i]) for i=1:l+1]
    μb = [zeros(k[i]) for i=1:l+1]
   
     for i=1:l+1
        if i == 1
            h = Int(d * k[i])
        else
            h = Int(k[i-1] * k[i])
        end

        if i<=l
            #Hidden Weights
            u = zeros(h) #Draw
            S = Diagonal([1 for i=1:length(u)])
            μ[i] = @trace(mvnormal(u,S), (:μ,i))
            Σ = Diagonal([σ₁ for i=1:length(μ[i])])
            W[i] = @trace(mvnormal(μ[i],Σ), (:W,i))
            obs[(:W,i)] = W[i]
            
            #Hidden Biases
            ub = zeros(k[i]) #Draw
            Sb = Diagonal([1 for i=1:length(ub)])    
            μb[i] = @trace(mvnormal(ub,Sb), (:μb,i))
            Σ2 = Diagonal([σ₁ for i=1:length(μb[i])])
            b[i] = @trace(mvnormal(μb[i],Σ2), (:b,i))
            obs[(:b,i)] = b[i]
        else
            #Output Weights
            u = zeros(k[l]) #Draw
            S = Diagonal([1 for i=1:length(u)])
            μ[i] = @trace(mvnormal(u,S), (:μ,i))
            Σ = Diagonal([σ₂ for i=1:length(μ[i])])
            W[i] = @trace(mvnormal(μ[i],Σ), (:W,i))
            obs[(:W,i)] = W[i]

            #Output Bias
            ub = zeros(1) #Draw
            Sb = Diagonal([1 for i=1:length(ub)])  
            μb[i] = @trace(mvnormal(ub,Sb), (:μb,i))
            Σ2 = Diagonal([σ₂ for i=1:length(μb[i])])
            b[i] = @trace(mvnormal(μb[i],Σ2), (:b,i))
            obs[(:b,i)] = b[i]
        end
    end
    
    #Return Network Scores for X
    scores = G(x,obs)
    scores = Flux.σ.(scores)
    
    #Logistic Regression Likelihood
    y = scores
    for j=1:N
        y[j] = @trace(categorical([1-scores[j],scores[j]]), (:y,j))
    end
    #y = [(@trace(categorical([1-scores[j],scores[j]]), (:y,j))) for j=1:length(scores)]

    return scores
end;
obs_master = choicemap()::ChoiceMap
for i=1:length(classes)
    obs_master[(:y,i)] = classes[i]
end
obs = obs_master;
(best_trace,) = generate(classifier, (x,), obs)

println(best_trace[:τ₁])
println(best_trace[:τ₂])

test_scores = classifier(x)
test_labels = data_labeller(test_scores)
test_acc = sum([classes[i] == test_labels[i] for i=1:length(classes)])

In [None]:
#----------------
#Test Likelihood
#----------------
scores = []
accs = []
ks = []
best_ks = []
best_traces = []
(best_trace,) = generate(classifier, (x,), obs)
best_acc = 0
best_score = get_score(best_trace)
best_pred_y = (G(x, best_trace))
best_pred_labels = data_labeller(best_pred_y)
best_k = best_trace[(:k,1)]
function likelihood(best_trace, best_acc, best_score, best_k)
    obs = obs_master;
    #obs[(:k,1)] = 2
    (trace,) = generate(classifier, (x,), obs)
    
    pred_y = (G(x, trace))
    pred_labels = data_labeller(pred_y)
    acc = sum([classes[i] == pred_labels[i] for i=1:length(classes)])
    score = get_score(trace)

    if acc > best_acc
        best_acc = acc
        best_score = score
        best_trace = trace
        best_pred_y = pred_y
        best_k = best_trace[(:k,1)]
    end
    push!(best_ks,best_k)
    push!(scores,score)
    push!(accs,acc)

    return(best_trace, best_acc, best_score, best_k)
end;

for i=1:10000
    best_trace, best_acc, best_score, best_k = likelihood(best_trace, best_acc, best_score, best_k)
    push!(best_ks, best_k)
end

PyPlot.scatter(accs, scores)
plt.title("Comparing Classifier Accuracy to Log Likelihood")
plt.xlabel("Classifier Accuracy")
plt.ylabel("Log Likelihood")
plt.ylim(-100,1)
plt.legend()
#print(best_ks)

In [None]:
#New Helper Functions
include("hmc_mod.jl")

scores = []
traces = []
ks = []

function propose_hyperparameters(trace)
    hyper_selection = select()
    push!(hyper_selection, :τ₁)
    push!(hyper_selection, :τ₂)
    (new_trace, weight, retdiff) = regenerate(trace, hyper_selection)
    if log(rand()) < weight
        return new_trace
    else
        return trace
    end
end;

function propose_parameters(trace)
    param_selection = select()
    for i=1:1+1 #Number of Layers
        push!(param_selection, (:μ,i))
        push!(param_selection, (:μb,i))
        push!(param_selection, (:W,i))
        push!(param_selection, (:b,i))
    end
    (new_trace, weight) = regenerate(trace, param_selection)
    if log(rand()) < weight
        return new_trace
    else
        return trace
    end
end;

function hmc_parameters(trace)
    
    param_selection = select()
    for i=1:1+1 #Number of Layers
        push!(param_selection, (:μ,i))
        push!(param_selection, (:μb,i))
        push!(param_selection, (:W,i))
        push!(param_selection, (:b,i))
    end
    
    L = 10
    eps = 0.1
    
    (trace, accepted) = hmc(trace,param_selection,L=L,eps=eps,check=false,observations=obs)
    return(trace)
end

function node_parameter(trace)
    obs = obs_master
    
    node_selection = select()
    for i=1:1+1 #Number of Layers
        push!(node_selection, (:k,i))
        push!(node_selection, (:μ,i))
        push!(node_selection, (:μb,i))
        push!(node_selection, (:W,i))
        push!(node_selection, (:b,i))
    end
    
    param_selection = select()
    for i=1:1+1 #Number of Layers
        push!(param_selection, (:μ,i))
        push!(param_selection, (:μb,i))
        push!(param_selection, (:W,i))
        push!(param_selection, (:b,i))
    end
    
    L = 100
    eps = 0.2
    
    (trace_tilde, hmc_score) = hmc_mod(trace,param_selection,L=L,eps=eps,check=false,observations=obs)
    (trace_prime, q_weight) = regenerate(trace_tilde, node_selection)
    (trace_star, hmc_score2) = hmc_mod(trace_prime,param_selection,L=L,eps=eps,check=false,observations=obs)
    
    println(q_weight)
    
    theta_score = get_score(trace)
    star_score = get_score(trace_star)
    across_score = star_score - theta_score + hmc_score + hmc_score2 #+ q_weight

    if rand() < exp(across_score)
        return trace_star
    else
        return trace
    end
end

(trace,) = generate(classifier, (x,), obs)
trace2 = node_parameter(trace)

for i=1:1000
    trace = node_parameter(trace)
    trace = propose_hyperparameters(trace)
    push!(scores,get_score(trace))
    push!(traces, trace)
    push!(ks, trace[(:k,1)])
end

In [31]:
a = Distributions.logpdf(Normal(0,1), 10.0)
b = pdf(Normal(0,1), 10.0)

println(exp(a))
println(b)

7.69459862670641e-23
7.69459862670642e-23


In [None]:
#-------
#K Plot
#-------
plot(ks)

In [None]:
(trace,) = generate(classifier, (x,), obs)
println(get_score(trace))
trace = propose_hyperparameters(trace)
println(get_score(trace))
scores = []
traces = []

for i=1:1000
    trace = node_parameter(trace)
    trace = propose_hyperparameters(trace)
    push!(scores,get_score(trace))
    push!(traces, trace)
end

plot(scores)

In [None]:
#-----------
#Score plot
#-----------

scores = [get_score(trace) for trace in traces]
plot(scores)
plt.ylim(-200,0)

In [None]:
#---------------
#Accuracy plot
#---------------

accs = []
for i=1:length(traces)
    trace = traces[i]
    pred_y = G(x,trace)
    pred_labels = data_labeller(pred_y)
    
    acc = sum([classes[i] == pred_labels[i] for i=1:length(classes)])
    push!(accs,acc)
end

plot(accs)
println(sum(accs)/length(accs))
plt.title("RJMCMC Accuracy: XOR Classifier")
plt.xlabel("Iteration")
plt.ylabel("# Classified Correctly (out of 200)");

In [None]:
#---------------
#Classifier plot
#---------------

function plot_grid(data,scores,alpha=1.0)
    PyPlot.scatter(data[:,1],data[:,2],c=scores,alpha=alpha,cmap="PRGn")
    #PyPlot.colorbar()
end

function tracegrid(traces, samples=500, low=-1.0, high=1.0)
    d=2
    n=100
    r = range(low, high, length = n)
    
    iter = Iterators.product((r for _ in 1:d)...)
    grid= vec([collect(i) for i in iter])
    grid_raw = reduce(hcat, getindex.(grid,i) for i in eachindex(grid[1]))
    grid2 = transpose(grid_raw)
    z_master = zeros(length(grid2[1,:]))
    
    for i=1:samples
        j = rand((100,length(traces)))
        trace = traces[j]
        z = Flux.σ.(G(grid2,trace))[1,:]
        z_master += (z ./ samples)
    end
    plot_grid(grid_raw, z_master)
end

tracegrid(traces)
plot_data_classifier(x_raw,classes)