# RBM and CRBM

Objective: Implement CRBM in Julia for time series analysis

In [1]:
# Import Distributions to generate the W matrix of the RBM
using Distributions
using MNIST
using Benchmarks


In [2]:
type RBM{T <: Real}
    n_vis::Int
    n_hid::Int
    W::Matrix{T}         
    vis_bias::Vector{T}     
    hid_bias::Vector{T}   
    trained::Bool
end

function Base.show{T}(io::IO, rbm::RBM{T})
    n_vis = size(rbm.vis_bias, 1)
    n_hid = size(rbm.hid_bias, 1)
    trained = rbm.trained
    print(io, "RBM{$T}(n_vis=$n_vis, n_hid=$n_hid, trained=$trained)")
end

In [3]:
function sigmoid(vector::Array{Float64})
    return 1./(1 + exp(-vector))
end

sigmoid (generic function with 1 method)

In [4]:
function initialize_RBM(n_vis, n_hid, sigma, T)
    
    return RBM{T}( n_vis,                                 # num visible units 
                   n_hid,                                 # num hidden unnits
                   rand(Normal(0,sigma), n_hid, n_vis),  # weight matrix
                   zeros(n_vis),                          # visible vector  
                   zeros(n_hid),                          # Hidden vector
                   false)                                 # trained
end

initialize_RBM (generic function with 1 method)

In [5]:
rbm = initialize_RBM(784, 100, 0.01, Float64)

RBM{Float64}(n_vis=784, n_hid=100, trained=false)

In [6]:
size(rbm.W)

(100,784)

In [7]:
X_train, y_train = MNIST.traindata()
X_test, y_test = MNIST.testdata()

(
[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0],

[7.0,2.0,1.0,0.0,4.0,1.0,4.0,9.0,5.0,9.0  …  7.0,8.0,9.0,0.0,1.0,2.0,3.0,4.0,5.0,6.0])

In [8]:
function contrastive_divergence_K(Xbatch, rbm, K::Integer, lr::Real)
        
    batch_size = size(Xbatch)[2]
    Delta_W = zeros(size(rbm.W))
    Delta_b = zeros(size(rbm.vis_bias))
    Delta_c = zeros(size(rbm.hid_bias))
    
    xneg = zeros(size(rbm.vis_bias))
    hneg = similar(rbm.hid_bias)
    b1 = similar(rbm.W * Xbatch[:,1])
    b2 = similar(rbm.W' * hneg)
    ehp = similar(rbm.hid_bias)
    ehn = similar(rbm.hid_bias)
        
    @inbounds for i in 1:batch_size
        x =  @view Xbatch[:,i]
        xneg = @view Xbatch[:,i]

        for k in 1:K
            hneg .= sigmoid(rbm.W * xneg .+ rbm.hid_bias) .> rand.()
            At_mul_B!(b2, rbm.W, hneg)
            xneg .= sigmoid(b2 .+ rbm.vis_bias) .> rand.()         
        end

        A_mul_B!(b1, rbm.W, x)
        ehp .= sigmoid(b1 .+ rbm.hid_bias)
        A_mul_B!(b1, rbm.W, xneg)
        ehn .= sigmoid(b1 .+ rbm.hid_bias)

        Delta_W .+= lr .* (ehp .* x' .- ehn .* xneg')
        Delta_b .+= lr .* (x .- xneg)
        Delta_c .+= lr .* (ehp .- ehn)

    end

    rbm.W .+= Delta_W ./ batch_size;
    rbm.vis_bias .+= Delta_b ./ batch_size;
    rbm.hid_bias .+= Delta_c ./ batch_size;

    return 
end

contrastive_divergence_K (generic function with 1 method)

In [9]:
X_batch = X_train[:,1:25]

@benchmark contrastive_divergence_K(X_batch, rbm, 1, 0.01)
#@time contrastive_divergence_K(X_batch, rbm, 1, 0.01)

     Time per evaluation: 27.76 ms [22.54 ms, 32.98 ms]
Proportion of time in GC: 8.20% [3.49%, 12.91%]
        Memory allocated: 78.83 mb
   Number of allocations: 3594 allocations
       Number of samples: 100
   Number of evaluations: 100
 Time spent benchmarking: 3.90 s


In [10]:
size(X_train), size(X_batch)

((784,60000),(784,25))

# Fit RBM

In [11]:
function fit_CDK(X, rbm, batch_size::Integer,  n_epochs::Integer, K::Integer, lr::Real)
        
    n_samples = size(X)[2]
    indicies = [x:min(x + batch_size-1, n_samples) for x in 1:batch_size:n_samples]
    mb = 1
    print("number minibatches:", length(indicies), "\n")
    for epoch in 1:n_epochs
        tic();
        for minibatch_ind in indicies
            Xbatch = @view X[:, minibatch_ind]
            contrastive_divergence_K(Xbatch, rbm, K, lr)
            
        end
        print("\nepoch ", epoch, "  time epoch:", toq())
    end
    rbm.trained = true
end

fit_CDK (generic function with 1 method)

In [12]:
n_epochs = 1
batch_size = 200
K = 1
lr = 0.01

@time fit_CDK(X_train, rbm, batch_size,  n_epochs, K, lr)

number minibatches:300

epoch 1  time epoch:109.878847006110.166125 seconds (8.71 M allocations: 180.974 GB, 6.15% gc time)


true

# vectorized cdk

In [32]:
function vec_contrastive_divergence_K(Xbatch, rbm, K::Integer, lr::Real)
    
    Xneg = copy(Xbatch)
    batch_size = size(Xbatch)[2]
    
    for k in 1:K
        Hneg = sigmoid(rbm.W * Xneg .+ rbm.hid_bias) .> rand()
        Xneg = sigmoid(rbm.W' * Hneg  .+ rbm.vis_bias) .> rand()
    end
       
    Ehp = sigmoid(rbm.W * Xbatch .+ rbm.hid_bias)
    Ehn = sigmoid( rbm.W * Xneg .+ rbm.hid_bias)

    Delta_W = lr*( Ehp * Xbatch' -  Ehn *  Xneg')
    Delta_vis_bias = sum(lr .* (Xbatch .- Xneg), 2)[:]
    Delta_hid_bias = sum(lr .* (Ehp - Ehn), 2)[:]
    
    rbm.W .+= Delta_W ./ batch_size;
    rbm.vis_bias .+= Delta_vis_bias ./ batch_size;
    rbm.hid_bias .+= Delta_hid_bias ./ batch_size;
    
end



vec_contrastive_divergence_K (generic function with 1 method)

In [33]:
X_batch = X_train[:,1:25]
@benchmark vec_contrastive_divergence_K(X_batch, rbm, 1, 0.01)

     Time per evaluation: 26.25 ms [20.48 ms, 32.02 ms]
Proportion of time in GC: 0.64% [0.00%, 1.36%]
        Memory allocated: 5.36 mb
   Number of allocations: 173 allocations
       Number of samples: 100
   Number of evaluations: 100
 Time spent benchmarking: 2.97 s


In [15]:
function vec_fit_CDK(X, rbm, batch_size::Integer,  n_epochs::Integer, K::Integer, lr::Real)
        
    n_samples = size(X)[2]
    indicies = [x:min(x + batch_size-1, n_samples) for x in 1:batch_size:n_samples]
    mb = 1
    println("number minibatches:", length(indicies), "\n")
    for epoch in 1:n_epochs
        tic();
        for minibatch_ind in indicies
            Xbatch = @view X[:, minibatch_ind]
            vec_contrastive_divergence_K(Xbatch, rbm, K, lr)
        end
        print("\n\nepoch ", epoch, "  time epoch:", toq(), "\n")
    end
    rbm.trained = true
end

vec_fit_CDK (generic function with 1 method)

In [35]:
n_epochs = 1
batch_size = 1000
K = 1
lr = 0.01

@time vec_fit_CDK(X_train, rbm, batch_size,  n_epochs, K, lr)

number minibatches:30



epoch 1  time epoch:29.837511542
 29.838367 seconds (44.19 k allocations: 4.163 GB, 6.65% gc time)


true