In [187]:
# using CSV, DataFrames, Distributions, Plots, BayesNets, LinearAlgebra, StatsBase
using Distributions, LinearAlgebra
#Bayesian Neural Network

struct self{Topo, Train, Test, learn_rate}
    topo::Topo
    train_data::Train
    test_data::Test
    lr::learn_rate
end

neural_net=self([5,10,5],[5,10,5],[5,10,5],5);

struct weight{w_1,b_1,w_2,b_2}
    weight_layer_1::w_1
    bias_1::b_1
    weight_layer_2::w_2
    bias_2::b_2
end

function weight_output(neural_net)
    #initialize weight
    weight_layer_1=rand(Normal(0, 1), (neural_net.topo[1],neural_net.topo[2]))./sqrt(neural_net.topo[1]); #weight first layer
    bias_1=rand(Normal(0, 1), (1,neural_net.topo[2]))./sqrt(neural_net.topo[2]); #bias first layer
    weight_layer_2=rand(Normal(0, 1), (neural_net.topo[2],neural_net.topo[3]))./sqrt(neural_net.topo[2]); #weight second layer
    bias_2=rand(Normal(0, 1), (1,neural_net.topo[3]))./sqrt(neural_net.topo[2]); #bias first layer
    return [weight_layer_1, bias_1, weight_layer_2, bias_2]
end

layer=weight_output(neural_net);

function compute_output(neural_net)
    #compute hidden and last layer output
    hidden_output=zeros(1,neural_net.topo[2]);
    last_output=zeros(1,neural_net.topo[3]);
    return last_output
end

function sigmoid(x)
    return 1 ./ (1 .+ exp.(-x))
end

function sampleEr(neural_net, actualout)
    error=compute_output(neural_net) .- actualout;
    sqerror=sum(error.^2)/neural_net.topo[3];
    return sqerror
end

function forward_pass(X, neural_net, layer)
    z_1=(reshape(X,(1,neural_net.topo[1])))*layer[1] .+ layer[2];
    hid_out=sigmoid(z_1);
    z_2=hid_out*layer[3] .+ layer[4];
    last_out=sigmoid(z_2);
    return hid_out,last_out
end
    
function backward_pass(inp, desired, neural_net, layer)
    hid_out, last_out=forward_pass(X, neural_net, layer);
    out_delta=((reshape(desired,(1,neural_net.topo[3]))) .- last_out) .* (last_out .* (1 .- last_out)); 
    hid_delta=out_delta*transpose(layer[3]) .* (hid_out .* (1 .- hid_out));
    h_o_layer=2 # hidden to output layer
    for i in 1:neural_net.topo[h_o_layer]
        for j in 1:neural_net.topo[h_o_layer+1]
            #update weight layer 2
            layer[3][i,j] += neural_net.lr * out_delta[j] * hid_out[i]
        end
    end
    for k in 1:neural_net.topo[h_o_layer+1]
        layer[4][k] += -1 * neural_net.lr * out_delta[k]
    end
    
    i_h_layer=1 #input to hidden layer
    for i in 1:neural_net.topo[i_h_layer]
        for j in 1:neural_net.topo[i_h_layer+1]
            #update weight later 1
            layer[1][i,j] += neural_net.lr * hid_delta[j] * inp[i] #placeholder for input
        end
    end
    for k in 1:neural_net.topo[i_h_layer+1]
        layer[2][k] += -1 * neural_net.lr * hid_delta[k]
    end
    return layer
end

function decode(w, neural_net, layer)
    w_layer1size=neural_net.topo[1] * neural_net.topo[2];
    w_layer2size=neural_net.topo[2] * neural_net.topo[3];
    
    w_layer1=w[1:w_layer1size];
    layer[1]=reshape(layer[1],(neural_net.topo[1],neural_net.topo[2]));
    
    w_layer2=w[w_layer1size+1:w_layer1size + w_layer2size];
    layer[3]=reshape(layer[3],(neural_net.topo[2],neural_net.topo[3]));
    
    layer[2]=w[w_layer1size+w_layer1size+1:w_layer1size+w_layer1size+neural_net.topo[2]];
    layer[4]=w[w_layer1size+w_layer1size+neural_net.topo[2]+1:w_layer1size+w_layer1size+neural_net.topo[2]+neural_net.topo[3]];
    return layer
end

function encode(neural_net, layer)
    w1=reshape(collect(Iterators.flatten(layer[1])),(1,prod(size(layer[1]))));
    w2=reshape(collect(Iterators.flatten(layer[3])),(1,prod(size(layer[3]))));
    w=hcat(w1,w2,layer[2],layer[4]);
    return w
end

function langevin_gradient(data, w, depth, neural_net, layer) #BP with SGD
    layer=decode(w, neural_net, layer);
    sz=size(data)[1];
    
    inp=zeros(1,neural_net.topo[1]);
    desired=zeros(1,neural_net.topo[3]);
    fx=zeros(sz);
    
    for i in 1:depth
        for i in 1:sz
            pat=i;
            inp=data[pat,1:neural_net.topo[1]];
            desired=data[pat, (neural_net.topo[1] + 1):(neural_net.topo[1]+last(neural_net.topo))];
            hid_out, last_out=forward_pass(inp, neural_net, layer);
            layer=backward_pass(inp, desired, neural_net, layer);
        end
    end
    
    w_updated=encode(neural_net,layer);
    
    return w_updated
end

function evaluate_proposal(data, w, neural_net, layer)
    layer=decode(w, neural_net, layer);
    sz=size(data)[1];
    inp=zeros(1,neural_net.topo[1]);
    desired=zeros(1,neural_net.topo[3]);
    
    fx=zeros(sz,neural_net.topo[3]);
    
    for i in 1:sz
        inp=data[i,1:neural_net.topo[1]];
        hid_out, last_out=forward_pass(inp, neural_net, layer);
        fx[i,:]=last_out;
    end
    return fx
end


    

            
            
            

langevin_gradient (generic function with 1 method)