# RBM and CRBM

Objective: Implement CRBM in Julia for time series analysis

In [1]:
# Import Distributions to generate the W matrix of the RBM
using Distributions

In [2]:
# Import MNIST dataset for experimenting
using MNIST

In [3]:
X_train, y_train = MNIST.traindata()
X_train_rows = X_train';
X_train_rows = X_train_rows[1:42000,:];
X_train_cols = X_train[:,1:42000];

In [4]:
# Data
size(X_train_rows), size(y_train)

((42000,784),(60000,))

### Define a type RBM

In [5]:
type RBM_rows{T <: Real}
    W::Matrix{T}
    vis_bias::Vector{T}
    hid_bias::Vector{T}
    n_vis::Int32
    n_hid::Int32
    trained::Bool
end

The following function allow us to define what will be printed once we type RBM in our julia terminal (or notebook)

In [6]:
function Base.show{T}(io::IO, rbm::RBM_rows{T})
    n_vis = size(rbm.vis_bias, 1)
    n_hid = size(rbm.hid_bias, 1)
    trained = rbm.trained
    print(io, "RBM{$T}(n_vis=$n_vis, n_hid=$n_hid, trained=$trained)")
end

The following function is used to instanciate an RBM

In [7]:
function initializeRBM_rows(n_vis::Int64, n_hid::Int64; sigma=0.01, T=Float64)
    
    return RBM_rows{T}( rand(Normal(0,sigma),n_vis, n_hid),  # weight matrix
                   zeros(n_vis),                          # visible vector  
                   zeros(n_hid),                          # Hidden vector
                   n_vis,                                 # num visible units 
                   n_hid,                                 # num hidden unnits
                   false)                                 # trained
end

initializeRBM_rows (generic function with 1 method)

In [8]:
rbm = initializeRBM_rows(784, 225)

RBM{Float64}(n_vis=784, n_hid=225, trained=false)

### Train and RBM

In [11]:
function sigmoid(vector::Array{Float64})
    return 1./(1 + e.^(-vector))
end

sigmoid (generic function with 1 method)

# Thinking the data as rows (Julia stores it as columns)

In [12]:
function contrastive_divergence_rows_K(Xbatch, rbm, K::Int64, lr::Float64)
        
    batch_size = size(Xbatch)[1]

    Delta_W = zeros(rbm.W)
    Delta_b = zeros(rbm.vis_bias)
    Delta_c = zeros(rbm.hid_bias)

    for i in 1:batch_size
        x =  Xbatch[i:i,:]
        xneg = Xbatch[i:i,:]

        for k in 1:K
            hneg = sigmoid( xneg * rbm.W .+ rbm.hid_bias') .> rand(1,rbm.n_hid)
            xneg = sigmoid( hneg * rbm.W' .+ rbm.vis_bias') .> rand(1,rbm.n_vis)
        end

        ehp = sigmoid(x * rbm.W + rbm.hid_bias')
        ehn = sigmoid(xneg * rbm.W + rbm.hid_bias')
        
        Delta_W += lr * (kron(x, ehp') - kron(xneg, ehn'))'
        Delta_b += lr * (x - xneg)'
        Delta_c += lr * (ehp - ehn)'
    end
    
    rbm.W += Delta_W / batch_size;
    rbm.vis_bias += vec(Delta_b / batch_size);
    rbm.hid_bias += vec(Delta_c / batch_size);
    
end

contrastive_divergence_rows_K (generic function with 1 method)

In [13]:
X_batch_rows = X_train_rows[1:200,:];

In [None]:
@time contrastive_divergence_rows_K(X_batch_rows, rbm, 1, 0.01);

### split in 2 functions

In [15]:
function updates(x, xneg, rbm,  Delta_W, Delta_b, Delta_c, K, lr)

        for k in 1:K
            hneg = sigmoid( xneg * rbm.W .+ rbm.hid_bias') .> rand(1,rbm.n_hid)
            xneg = sigmoid( hneg * rbm.W' .+ rbm.vis_bias') .> rand(1,rbm.n_vis)
        end

        ehp = sigmoid(x * rbm.W + rbm.hid_bias')
        ehn = sigmoid(xneg * rbm.W + rbm.hid_bias')
        
        Delta_W += lr * x*ehp' - xneg* ehn')'
        Delta_b += lr * (x - xneg)'
        Delta_c += lr * (ehp - ehn)'
end

LoadError: syntax: extra token ")" after end of expression

In [16]:
function contrastive_divergence_rows_K_split(Xbatch, rbm, K::Int64, lr::Float64)
        
    batch_size = size(Xbatch)[1]

    Delta_W = zeros(rbm.W)
    Delta_b = zeros(rbm.vis_bias)
    Delta_c = zeros(rbm.hid_bias)

    for i in 1:batch_size
        x =  Xbatch[i:i,:]
        xneg = Xbatch[i:i,:]
        updates(x,xneg,rbm, Delta_W, Delta_b, Delta_c, K, lr)
    end
    
    rbm.W += Delta_W / batch_size;
    rbm.vis_bias += vec(Delta_b / batch_size);
    rbm.hid_bias += vec(Delta_c / batch_size);
    
end

contrastive_divergence_rows_K_split (generic function with 1 method)

In [17]:
@time contrastive_divergence_rows_K_split(X_batch_rows, rbm, 1, 0.01);

LoadError: UndefVarError: updates not defined

#### Generate indicies for the minibatches

In [45]:
function fit_CDK_rows(X, rbm, batch_size::Integer, n_epochs::Integer, K::Integer, lr::Real)
        
    n_samples = size(X)[1]
    indicies = [x:min(x + batch_size-1, n_samples) for x in 1:batch_size:n_samples]

    for epoch in 1:n_epochs
        tic();
        for minibatch_ind in indicies
            contrastive_divergence_rows_K(X[minibatch_ind, :], rbm, K, lr)
        end
        print("\nepoch ", epoch, "  time epoch:", toq())
        
    end
    rbm.trained = true
end



fit_CDK_rows (generic function with 1 method)

In [46]:
size(X_train_rows)

(42000,784)

In [48]:
# 175 seconds in julia
# 86.36 seconds in python
# System monitors shows python version uses several cores

n_epochs = 1
batch_size = 200
K = 1
lr = 0.01

fit_CDK_rows(X_train_rows, rbm, batch_size,  n_epochs, K, lr)


epoch 1  time epoch:186.911796912

true

# Thinking the data as columns


Now we will implement the same as cols



In [49]:
type RBM_col{T <: Real}
    W::Matrix{T}         
    vis_bias::Vector{T}     
    hid_bias::Vector{T}   
    n_vis::Int32
    n_hid::Int32
    trained::Bool
end

In [50]:
function Base.show{T}(io::IO, rbm::RBM_col{T})
    n_vis = size(rbm.vis_bias, 1)
    n_hid = size(rbm.hid_bias, 1)
    trained = rbm.trained
    print(io, "RBM{$T}(n_vis=$n_vis, n_hid=$n_hid, trained=$trained)")
end

In [51]:
function initialize_RBM_col(n_vis::Int64, n_hid::Int64; sigma=0.01, T=Float64)
    
    return RBM_col{T}( rand(Normal(0,sigma),(n_hid,n_vis)),  # weight matrix
                   zeros(n_vis),                             # visible vector  
                   zeros(n_hid),                             # Hidden vector
                   n_vis,                                    # num visible units 
                   n_hid,                                    # num hidden unnits
                   false)                                    # trained


end

initialize_RBM_col (generic function with 1 method)

In [52]:
function sigmoid(vector::Array{Float64})
    return 1./(1 + e.^(-vector))
end



sigmoid (generic function with 1 method)

In [53]:
rbm = initialize_RBM_col(784, 225)

RBM{Float64}(n_vis=784, n_hid=225, trained=false)

In [66]:
function contrastive_divergence_K(Xbatch, rbm, K::Int64, lr::Float64)
        
    batch_size = size(Xbatch)[2]

    Delta_W = zeros(size(rbm.W))
    Delta_b = zeros(size(rbm.vis_bias))
    Delta_c = zeros(size(rbm.hid_bias))

    for i in 1:batch_size
        x =  Xbatch[:,i]
        xneg = Xbatch[:,i]

        for k in 1:K
            hneg = sigmoid( rbm.W * xneg .+ rbm.hid_bias) .> rand(rbm.n_hid)
            xneg = sigmoid( rbm.W' * hneg .+ rbm.vis_bias) .> rand(rbm.n_vis)
        end

        ehp = sigmoid(rbm.W * x + rbm.hid_bias)
        ehn = sigmoid(rbm.W * xneg + rbm.hid_bias)
     
        Delta_W += lr * (kron(x, ehp') - kron(xneg, ehn'))'
        Delta_b += lr * (x - xneg)
        Delta_c += lr * (ehp - ehn)

    end

    rbm.W += Delta_W / batch_size;
    rbm.vis_bias += Delta_b / batch_size;
    rbm.hid_bias += Delta_c / batch_size;
    
    return 
end



contrastive_divergence_K (generic function with 1 method)

In [70]:
X_batch_cols = X_train_cols[:,1:200];

In [72]:
size(X_batch_cols)

(784,200)

In [76]:
@time contrastive_divergence_K(X_batch_cols, rbm, 1, 0.01)

  1.279093 seconds (21.22 k allocations: 1.608 GB, 3.56% gc time)


In [77]:
function fit_CDK_cols(X, rbm, batch_size::Integer,  n_epochs::Integer, K::Integer, lr::Real)
        
    n_samples = size(X)[2]
    indicies = [x:min(x + batch_size-1, n_samples) for x in 1:batch_size:n_samples]

    for epoch in 1:n_epochs
        tic();
        for minibatch_ind in indicies
            contrastive_divergence_K(X[:, minibatch_ind], rbm, K, lr)
        end
        print("\nepoch ", epoch, "  time epoch:", toq())
    end
    rbm.trained = true
end

fit_CDK_cols (generic function with 1 method)

In [81]:
size(X_train_cols)

(784,42000)

In [None]:
n_epochs = 3
batch_size = 200
K = 1
lr = 0.01

@time fit_CDK_cols(X_train_cols, rbm, batch_size,  n_epochs, K, lr)