# RBM and CRBM

Objective: Implement CRBM in Julia for time series analysis

In [1]:
# Import Distributions to generate the W matrix of the RBM
using Distributions
using MNIST
#using BenchmarkTools
using Benchmarks

In [2]:
type RBM{T <: Real}
    n_vis::Int
    n_hid::Int
    W::Matrix{T}         
    vis_bias::Vector{T}     
    hid_bias::Vector{T}   
    trained::Bool
end

function Base.show{T}(io::IO, rbm::RBM{T})
    n_vis = size(rbm.vis_bias, 1)
    n_hid = size(rbm.hid_bias, 1)
    trained = rbm.trained
    print(io, "RBM{$T}(n_vis=$n_vis, n_hid=$n_hid, trained=$trained)")
end

In [3]:
function sigmoid(vector::Array{Float64})
    return 1./(1 + exp(-vector))
end

sigmoid (generic function with 1 method)

In [4]:
function initialize_RBM(n_vis, n_hid, sigma, T)
    
    return RBM{T}( n_vis,                                 # num visible units 
                   n_hid,                                 # num hidden unnits
                   rand(Normal(0,sigma), n_hid, n_vis),  # weight matrix
                   zeros(n_vis),                          # visible vector  
                   zeros(n_hid),                          # Hidden vector
                   false)                                 # trained
end

initialize_RBM (generic function with 1 method)

In [5]:
rbm = initialize_RBM(784, 100, 0.01, Float64)

RBM{Float64}(n_vis=784, n_hid=100, trained=false)

In [6]:
size(rbm.W)

(100,784)

In [7]:
X_train, y_train = MNIST.traindata()
X_test, y_test = MNIST.testdata()

(
[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0],

[7.0,2.0,1.0,0.0,4.0,1.0,4.0,9.0,5.0,9.0  …  7.0,8.0,9.0,0.0,1.0,2.0,3.0,4.0,5.0,6.0])

In [8]:
function contrastive_divergence_K(Xbatch, rbm, K::Integer, lr::Real)
        
    batch_size = size(Xbatch)[2]
    Delta_W = zeros(size(rbm.W))
    Delta_b = zeros(size(rbm.vis_bias))
    Delta_c = zeros(size(rbm.hid_bias))
    
    xneg = zeros(size(rbm.vis_bias))
    hneg = similar(rbm.hid_bias)
    b1 = similar(rbm.W * Xbatch[:,1])
    b2 = similar(rbm.W' * hneg)
    ehp = similar(rbm.hid_bias)
    ehn = similar(rbm.hid_bias)
        
    @inbounds for i in 1:batch_size
        x =  @view Xbatch[:,i]
        xneg = @view Xbatch[:,i]

        for k in 1:K
            hneg .= sigmoid(rbm.W * xneg .+ rbm.hid_bias) .> rand.()
            At_mul_B!(b2, rbm.W, hneg)
            xneg .= sigmoid(b2 .+ rbm.vis_bias) .> rand.()         
        end

        A_mul_B!(b1, rbm.W, x)
        ehp .= sigmoid(b1 .+ rbm.hid_bias)
        A_mul_B!(b1, rbm.W, xneg)
        ehn .= sigmoid(b1 .+ rbm.hid_bias)

        Delta_W .+= lr .* (ehp .* x' .- ehn .* xneg')
        Delta_b .+= lr .* (x .- xneg)
        Delta_c .+= lr .* (ehp .- ehn)

    end

    rbm.W .+= Delta_W ./ batch_size;
    rbm.vis_bias .+= Delta_b ./ batch_size;
    rbm.hid_bias .+= Delta_c ./ batch_size;

    return 
end

contrastive_divergence_K (generic function with 1 method)

In [9]:
X_batch = X_train[:,1:25]

@benchmark contrastive_divergence_K(X_batch, rbm, 1, 0.01)
#@time contrastive_divergence_K(X_batch, rbm, 1, 0.01)

     Time per evaluation: 40.97 ms [26.44 ms, 55.49 ms]
Proportion of time in GC: 6.99% [2.83%, 11.16%]
        Memory allocated: 78.83 mb
   Number of allocations: 3594 allocations
       Number of samples: 100
   Number of evaluations: 100
 Time spent benchmarking: 5.30 s


In [10]:
size(X_train), size(X_batch)

((784,60000),(784,25))

# Fit RBM

In [11]:
function fit_CDK(X, rbm, batch_size::Integer,  n_epochs::Integer, K::Integer, lr::Real)
        
    n_samples = size(X)[2]
    indicies = [x:min(x + batch_size-1, n_samples) for x in 1:batch_size:n_samples]
    mb = 1
    print("number minibatches:", length(indicies), "\n")
    for epoch in 1:n_epochs
        tic();
        for minibatch_ind in indicies
            Xbatch = @view X[:, minibatch_ind]
            contrastive_divergence_K(Xbatch, rbm, K, lr)
            
        end
        print("\nepoch ", epoch, "  time epoch:", toq())
    end
    rbm.trained = true
end

fit_CDK (generic function with 1 method)

In [12]:
n_epochs = 1
batch_size = 200
K = 1
lr = 0.01

@time fit_CDK(X_train, rbm, batch_size,  n_epochs, K, lr)

number minibatches:300

epoch 1  time epoch:107.746640715108.065719 seconds (8.71 M allocations: 180.974 GB, 6.64% gc time)


true

# vectorized cdk

In [13]:
function vec_contrastive_divergence_K(Xbatch, rbm, K::Integer, lr::Real)
    
    Xneg = copy(Xbatch)
    batch_size = size(Xbatch)[2]
    
    
    for k in 1:K
        Hneg = sigmoid(rbm.W * Xneg .+ rbm.hid_bias) .> rand()
        Xneg = sigmoid(rbm.W' * Hneg  .+ rbm.vis_bias) .> rand()
    end
       
    Ehp = sigmoid(rbm.W * Xbatch .+ rbm.hid_bias)
    Ehn = sigmoid( rbm.W * Xneg .+ rbm.hid_bias)

    Delta_W = lr*( Ehp * Xbatch' -  Ehn *  Xneg')
    Delta_vis_bias = sum(lr .* (Xbatch .- Xneg), 2)[:]
    Delta_hid_bias = sum(lr .* (Ehp - Ehn), 2)[:]
    
    rbm.W .+= Delta_W ./ batch_size;
    rbm.vis_bias .+= Delta_vis_bias ./ batch_size;
    rbm.hid_bias .+= Delta_hid_bias ./ batch_size;
    
end

vec_contrastive_divergence_K (generic function with 1 method)

In [14]:
X_batch = X_train[:,1:25]
@benchmark vec_contrastive_divergence_K(X_batch, rbm, 1, 0.01)

     Time per evaluation: 21.38 ms [18.65 ms, 24.10 ms]
Proportion of time in GC: 0.00% [0.00%, 0.00%]
        Memory allocated: 5.36 mb
   Number of allocations: 173 allocations
       Number of samples: 100
   Number of evaluations: 100
 Time spent benchmarking: 4.74 s


In [15]:
function vec_fit_CDK(X, rbm, batch_size::Integer,  n_epochs::Integer, K::Integer, lr::Real)
        
    n_samples = size(X)[2]
    indicies = [x:min(x + batch_size-1, n_samples) for x in 1:batch_size:n_samples]
    mb = 1
    println("number minibatches:", length(indicies), "\n")
    for epoch in 1:n_epochs
        tic();
        for minibatch_ind in indicies
            Xbatch = @view X[:, minibatch_ind]
            vec_contrastive_divergence_K(Xbatch, rbm, K, lr)
        end
        print("\n\nepoch ", epoch, "  time epoch:", toq(), "\n")
    end
    rbm.trained = true
end

vec_fit_CDK (generic function with 1 method)

In [16]:
n_epochs = 1
batch_size = 1000
K = 1
lr = 0.01

@time vec_fit_CDK(X_train, rbm, batch_size,  n_epochs, K, lr)

number minibatches:60



epoch 1  time epoch:31.606272515
 33.093533 seconds (2.10 M allocations: 4.316 GB, 1.17% gc time)


## VERY IMPORTANT: DEFINE VARIABLES AT THE BEGINING

In [17]:
function vec_contrastive_divergence_K(Xbatch, rbm, K::Integer, lr::Real)
    
    Xneg = copy(Xbatch)
    batch_size = size(Xbatch)[2]
    
    local Hneg::Array{Float64} = zeros(rbm.n_hid, batch_size)
    local Xneg::Array{Float64} = zeros(rbm.n_vis, batch_size)
    
    for k in 1:K
        Hneg .= sigmoid(rbm.W * Xneg .+ rbm.hid_bias) .> rand()
        Xneg .= sigmoid(rbm.W' * Hneg  .+ rbm.vis_bias) .> rand()
    end
       
    Ehp = sigmoid(rbm.W * Xbatch .+ rbm.hid_bias)
    Ehn = sigmoid(rbm.W * Xneg .+ rbm.hid_bias)

    Delta_W = lr*( Ehp * Xbatch' -  Ehn *  Xneg')
    Delta_vis_bias = sum(lr .* (Xbatch .- Xneg), 2)[:]
    Delta_hid_bias = sum(lr .* (Ehp - Ehn), 2)[:]
    
    rbm.W .+= Delta_W ./ batch_size;
    rbm.vis_bias .+= Delta_vis_bias ./ batch_size;
    rbm.hid_bias .+= Delta_hid_bias ./ batch_size;
    
end



vec_contrastive_divergence_K (generic function with 1 method)

In [18]:
X_batch = X_train[:,1:25]
@benchmark vec_contrastive_divergence_K(X_batch, rbm, 1, 0.01)

     Time per evaluation: 4.58 ms [3.55 ms, 5.60 ms]
Proportion of time in GC: 0.00% [0.00%, 0.00%]
        Memory allocated: 5.53 mb
   Number of allocations: 175 allocations
       Number of samples: 100
   Number of evaluations: 100
 Time spent benchmarking: 0.93 s


In [19]:
function vec_fit_CDK(X, rbm, batch_size::Integer,  n_epochs::Integer, K::Integer, lr::Real)
        
    n_samples = size(X)[2]
    indicies = [x:min(x + batch_size-1, n_samples) for x in 1:batch_size:n_samples]
    mb = 1
    println("number minibatches:", length(indicies), "\n")
    for epoch in 1:n_epochs
        tic();
        for minibatch_ind in indicies
            Xbatch = @view X[:, minibatch_ind]
            vec_contrastive_divergence_K(Xbatch, rbm, K, lr)
        end
        print("\n\nepoch ", epoch, "  time epoch:", toq(), "\n")
    end
    rbm.trained = true
end



vec_fit_CDK (generic function with 1 method)

In [61]:
n_epochs = 1
batch_size = 1000
K = 1
lr = 0.01

@time vec_fit_CDK(X_train, rbm, batch_size,  n_epochs, K, lr)

number minibatches:60



epoch 1  time epoch:8.467765761
  8.469279 seconds (49.26 k allocations: 4.664 GB, 39.08% gc time)


true

### Define space for all the arrays

In [62]:
function vec_contrastive_divergence_K(Xbatch, rbm, K::Integer, lr::Real)
    
    Xneg = copy(Xbatch)
    batch_size = size(Xbatch)[2]
    
    local Hneg::Array{Float64} = zeros(rbm.n_hid, batch_size)
    local Xneg::Array{Float64} = zeros(rbm.n_vis, batch_size)
    local Ehp::Array{Float64} = zeros(rbm.n_hid, batch_size)
    local Ehn::Array{Float64} = zeros(rbm.n_hid, batch_size)
    
    for k in 1:K
        Hneg .= sigmoid(rbm.W * Xneg .+ rbm.hid_bias) .> rand()
        Xneg .= sigmoid(rbm.W' * Hneg  .+ rbm.vis_bias) .> rand()
    end
       
    Ehp .= sigmoid(rbm.W * Xbatch .+ rbm.hid_bias)
    Ehn .= sigmoid(rbm.W * Xneg .+ rbm.hid_bias)
   
    rbm.W .+= lr*( Ehp * Xbatch' -  Ehn *  Xneg') ./ batch_size;
    rbm.vis_bias .+= sum(lr .* (Xbatch .- Xneg), 2)[:]./ batch_size;
    rbm.hid_bias .+= sum(lr .* (Ehp - Ehn), 2)[:] ./ batch_size;
    
end



vec_contrastive_divergence_K (generic function with 1 method)

In [63]:
X_batch = X_train[:,1:25]
@benchmark vec_contrastive_divergence_K(X_batch, rbm, 1, 0.01)

     Time per evaluation: 6.60 ms [2.71 ms, 10.50 ms]
Proportion of time in GC: 0.00% [0.00%, 0.00%]
        Memory allocated: 5.57 mb
   Number of allocations: 179 allocations
       Number of samples: 100
   Number of evaluations: 100
 Time spent benchmarking: 1.43 s


In [64]:
n_epochs = 1
batch_size = 1000
K = 1
lr = 0.01

@time vec_fit_CDK(X_train, rbm, batch_size,  n_epochs, K, lr)

number minibatches:60



epoch 1  time epoch:7.035202554
  7.049922 seconds (49.49 k allocations: 4.664 GB, 21.55% gc time)


true

## Use the BLAS

In [270]:
?BLAS.gemm

```
gemm(tA, tB, alpha, A, B)
```

Returns `alpha*A*B` or the other three variants according to `tA` (transpose `A`) and `tB`.

```
gemm(tA, tB, A, B)
```

Returns `A*B` or the other three variants according to `tA` (transpose `A`) and `tB`.


In [259]:
?BLAS.gemm!

```
gemm!(tA, tB, alpha, A, B, beta, C)
```

Update `C` as `alpha*A*B + beta*C` or the other three variants according to `tA` (transpose `A`) and `tB`. Returns the updated `C`.


In [265]:
?BLAS.gemv!

```
gemv!(tA, alpha, A, x, beta, y)
```

Update the vector `y` as `alpha*A*x + beta*y` or `alpha*A'x + beta*y` according to `tA` (transpose `A`). Returns the updated `y`.


In [344]:
?BLAS.ger!

```
ger!(alpha, x, y, A)
```

Rank-1 update of the matrix `A` with vectors `x` and `y` as `alpha*x*y' + A`.


### The blas has understandable names in Julia

In [297]:
?A_mul_Bt

search: [1mA[22m[1m_[22m[1mm[22m[1mu[22m[1ml[22m[1m_[22m[1mB[22m[1mt[22m [1mA[22m[1m_[22m[1mm[22m[1mu[22m[1ml[22m[1m_[22m[1mB[22m[1mt[22m! [1mA[22mt[1m_[22m[1mm[22m[1mu[22m[1ml[22m[1m_[22m[1mB[22m[1mt[22m [1mA[22mt[1m_[22m[1mm[22m[1mu[22m[1ml[22m[1m_[22m[1mB[22m[1mt[22m! [1mA[22m[1m_[22m[1mm[22m[1mu[22m[1ml[22m[1m_[22m[1mB[22mc [1mA[22m[1m_[22m[1mm[22m[1mu[22m[1ml[22m[1m_[22m[1mB[22m! [1mA[22m[1m_[22m[1mm[22m[1mu[22m[1ml[22m[1m_[22m[1mB[22mc!



```
A_mul_Bt(A, B)
```

For matrices or vectors $A$ and $B$, calculates $A⋅Bᵀ$.


In [299]:
?At_mul_B

search: [1mA[22m[1mt[22m[1m_[22m[1mm[22m[1mu[22m[1ml[22m[1m_[22m[1mB[22m [1mA[22m[1mt[22m[1m_[22m[1mm[22m[1mu[22m[1ml[22m[1m_[22m[1mB[22mt [1mA[22m[1mt[22m[1m_[22m[1mm[22m[1mu[22m[1ml[22m[1m_[22m[1mB[22m! [1mA[22m[1mt[22m[1m_[22m[1mm[22m[1mu[22m[1ml[22m[1m_[22m[1mB[22mt!



```
At_mul_B(A, B)
```

For matrices or vectors $A$ and $B$, calculates $Aᵀ⋅B$.


In [350]:
?A_mul_B

search: [1mA[22m[1m_[22m[1mm[22m[1mu[22m[1ml[22m[1m_[22m[1mB[22mt [1mA[22m[1m_[22m[1mm[22m[1mu[22m[1ml[22m[1m_[22m[1mB[22mc [1mA[22m[1m_[22m[1mm[22m[1mu[22m[1ml[22m[1m_[22m[1mB[22m! [1mA[22m[1m_[22m[1mm[22m[1mu[22m[1ml[22m[1m_[22m[1mB[22mt! [1mA[22m[1m_[22m[1mm[22m[1mu[22m[1ml[22m[1m_[22m[1mB[22mc! [1mA[22mt[1m_[22m[1mm[22m[1mu[22m[1ml[22m[1m_[22m[1mB[22m [1mA[22mc[1m_[22m[1mm[22m[1mu[22m[1ml[22m[1m_[22m[1mB[22m

Couldn't find [36mA_mul_B
[39mPerhaps you meant A_mul_B!, A_mul_Bc, A_mul_Bt, Ac_mul_B, At_mul_B or A_mul_Bc!


No documentation found.

Binding `A_mul_B` does not exist.


#### Example

In [341]:
(rbm.W' * Hneg)[1:3]

3-element Array{Float64,1}:
 -0.0766849
 -0.0577609
 -0.106906 

In [342]:
BLAS.gemm('T','N', Float64(1.0), rbm.W, Hneg)[1:3]

3-element Array{Float64,1}:
 -0.0766849
 -0.0577609
 -0.106906 

In [343]:
At_mul_B(rbm.W, Hneg)[1:3]

3-element Array{Float64,1}:
 -0.0766849
 -0.0577609
 -0.106906 

In [73]:
function vec_contrastive_divergence_K(Xbatch, rbm, K::Integer, lr::Real)
    
    Xneg = copy(Xbatch)
    batch_size = size(Xbatch)[2]
    
    local Hneg::Array{Float64} = zeros(rbm.n_hid, batch_size)
    local Xneg::Array{Float64} = zeros(rbm.n_vis, batch_size)
    local Ehp::Array{Float64} = zeros(rbm.n_hid, batch_size)
    local Ehn::Array{Float64} = zeros(rbm.n_hid, batch_size)
    
    for k in 1:K
        #Hneg .= sigmoid(rbm.W * Xneg .+ rbm.hid_bias) .> rand()
        Hneg .= sigmoid( rbm.W * Xneg .+ rbm.hid_bias) .> rand()
        #Xneg .= sigmoid(rbm.W' * Hneg  .+ rbm.vis_bias) .> rand()
        Xneg .= sigmoid(At_mul_B(rbm.W, Hneg) .+ rbm.vis_bias) .> rand()
    end
       
    Ehp .= sigmoid(rbm.W * Xbatch .+ rbm.hid_bias)
    Ehn .= sigmoid(rbm.W * Xneg .+ rbm.hid_bias)
   
    #rbm.W .+= lr*( Ehp * Xbatch' -  Ehn *  Xneg') ./ batch_size;
    rbm.W .+= lr*(A_mul_Bt(Ehp, Xbatch) .- A_mul_Bt(Ehn, Xneg)) ./ batch_size;
    rbm.vis_bias .+= sum(lr .* (Xbatch .- Xneg), 2)[:]./ batch_size;
    rbm.hid_bias .+= sum(lr .* (Ehp - Ehn), 2)[:] ./ batch_size;
    
end



vec_contrastive_divergence_K (generic function with 2 methods)

In [78]:
n_epochs = 1
batch_size = 400
K = 1
lr = 0.01

@time vec_fit_CDK(X_train, rbm, batch_size,  n_epochs, K, lr)

number minibatches:150



epoch 1  time epoch:5.305408089
  5.307262 seconds (63.63 k allocations: 4.983 GB, 5.74% gc time)


true

### Allocating memory inside

In [67]:
function vec_contrastive_divergence_K(Xbatch, rbm, K::Integer, lr::Real, Hneg, Xneg, Ehp,Ehn  )
    
    Xneg = Xbatch

    for k in 1:K
        #Hneg .= sigmoid(rbm.W * Xneg .+ rbm.hid_bias) .> rand()
        Hneg .= sigmoid( rbm.W * Xneg .+ rbm.hid_bias) .> rand()
        #Xneg .= sigmoid(rbm.W' * Hneg  .+ rbm.vis_bias) .> rand()
        Xneg .= sigmoid(At_mul_B(rbm.W, Hneg) .+ rbm.vis_bias) .> rand()
    end
       
    Ehp .= sigmoid(rbm.W * Xbatch .+ rbm.hid_bias)
    Ehn .= sigmoid(rbm.W * Xneg .+ rbm.hid_bias)
   
    #rbm.W .+= lr*( Ehp * Xbatch' -  Ehn *  Xneg') ./ batch_size;
    rbm.W .+= lr*(A_mul_Bt(Ehp, Xbatch) .- A_mul_Bt(Ehn, Xneg)) ./ batch_size;
    rbm.vis_bias .+= sum(lr .* (Xbatch .- Xneg), 2)[:]./ batch_size;
    rbm.hid_bias .+= sum(lr .* (Ehp - Ehn), 2)[:] ./ batch_size;
    
end

vec_contrastive_divergence_K (generic function with 2 methods)

In [68]:
function mem_vec_fit_CDK(X, rbm, batch_size::Integer,  n_epochs::Integer, K::Integer, lr::Real)
        
    n_samples = size(X)[2]
    indicies = [x:min(x + batch_size-1, n_samples) for x in 1:batch_size:n_samples]
    mb = 1
    println("number minibatches:", length(indicies), "\n")
    
    batch_size = length(indicies[1] )
    local Hneg::Array{Float64} = zeros(rbm.n_hid, batch_size)
    local Xneg::Array{Float64} = zeros(rbm.n_vis, batch_size)
    local Ehp::Array{Float64} = zeros(rbm.n_hid, batch_size)
    local Ehn::Array{Float64} = zeros(rbm.n_hid, batch_size)
    
    for epoch in 1:n_epochs
        tic();
        for minibatch_ind in indicies
            Hneg .= zero(Hneg)
            Xneg .= zero(Xneg)
            Ehp .= zero(Ehp)
            Ehn .= zero(Ehn)
            
            vec_contrastive_divergence_K(X[:, minibatch_ind], rbm, K, lr, Hneg, Xneg, Ehp,Ehn  )
        end
        print("\n\nepoch ", epoch, "  time epoch:", toq(), "\n")
    end
    rbm.trained = true
end

mem_vec_fit_CDK (generic function with 1 method)

In [69]:
n_epochs = 1
batch_size = 1000
K = 1
lr = 0.01

@benchmark mem_vec_fit_CDK(X_train, rbm, batch_size,  n_epochs, K, lr)

number minibatches:60



epoch 1  time epoch:8.194700379


     Time per evaluation: 8.24 s
Proportion of time in GC: 39.90%
        Memory allocated: 4.76 gb
   Number of allocations: 101515 allocations
       Number of samples: 1
   Number of evaluations: 1
 Time spent benchmarking: 8.61 s


In [398]:
?zeros

search: [1mz[22m[1me[22m[1mr[22m[1mo[22m[1ms[22m sp[1mz[22m[1me[22m[1mr[22m[1mo[22m[1ms[22m non[1mz[22m[1me[22m[1mr[22m[1mo[22m[1ms[22m drop[1mz[22m[1me[22m[1mr[22m[1mo[22m[1ms[22m drop[1mz[22m[1me[22m[1mr[22m[1mo[22m[1ms[22m! count_[1mz[22m[1me[22m[1mr[22m[1mo[22m[1ms[22m



```
zeros(type, dims)
```

Create an array of all zeros of specified type. The type defaults to Float64 if not specified.

```
zeros(A)
```

Create an array of all zeros with the same element type and shape as `A`.



# Optimized vectorial

Use BLAS directly to make the "transposes"

- https://discourse.julialang.org/t/blas-performance-issues-for-common-neural-network-patterns/565

In [40]:
?BLAS.gemv

```
gemv(tA, alpha, A, x)
```

Returns `alpha*A*x` or `alpha*A'x` according to `tA` (transpose `A`).

```
gemv(tA, A, x)
```

Returns `A*x` or `A'x` according to `tA` (transpose `A`).


In [57]:
?BLAS.gemv!

```
gemv!(tA, alpha, A, x, beta, y)
```

Update the vector `y` as `alpha*A*x + beta*y` or `alpha*A'x + beta*y` according to `tA` (transpose `A`). Returns the updated `y`.


In [73]:
Xbatch = X_train[:,1:25]
Xneg = copy(Xbatch)
Hneg = sigmoid(rbm.W * Xneg .+ rbm.hid_bias);

In [86]:
?BLAS.gemm

```
gemm(tA, tB, alpha, A, B)
```

Returns `alpha*A*B` or the other three variants according to `tA` (transpose `A`) and `tB`.

```
gemm(tA, tB, A, B)
```

Returns `A*B` or the other three variants according to `tA` (transpose `A`) and `tB`.


In [85]:
(rbm.W' * Hneg)[1:3]

3-element Array{Float64,1}:
 -0.0100554 
 -0.00985561
 -0.0187645 

In [96]:
BLAS.gemm('T','N', Float64(1.0), rbm.W, Hneg)[1:3]

3-element Array{Float64,1}:
 -0.0100554 
 -0.00985561
 -0.0187645 

In [154]:
@benchmark rbm.W' * Hneg

     Time per evaluation: 190.88 μs [167.04 μs, 214.72 μs]
Proportion of time in GC: 0.00% [0.00%, 0.00%]
        Memory allocated: 153.23 kb
   Number of allocations: 3 allocations
       Number of samples: 100
   Number of evaluations: 100
 Time spent benchmarking: 0.52 s


In [170]:
@benchmark BLAS.gemm('T','N', Float64(1.0), rbm.W, Hneg)

     Time per evaluation: 577.60 μs [0.00 ns, 1.56 ms]
Proportion of time in GC: 0.00% [0.00%, 0.00%]
        Memory allocated: 153.23 kb
   Number of allocations: 3 allocations
       Number of samples: 100
   Number of evaluations: 100
 Time spent benchmarking: 0.62 s


In [186]:
[3,4,5] .> rand()

3-element BitArray{1}:
 true
 true
 true

In [187]:
rand()

0.3635189487245134

In [209]:
T = Float32
function optvec_contrastive_divergence_K(Xbatch, rbm, K::Integer, lr::Real)
    
    Xneg = copy(Xbatch)
    batch_size = size(Xbatch)[2]
    
    # I put the line below here because then 
    # Hneg = sigmoid(rbm.W * Xneg .+ rbm.hid_bias) .> rand() 
    # is cast as an Array{float64} and then I can use the BLAS 
    # without errors
    local Hneg::Array{Float64} = zeros(rbm.n_hid, batch_size)
    local Xneg::Array{Float64} = zeros(rbm.n_vis, batch_size)

    for k in 1:K
        Hneg .= sigmoid(rbm.W * Xneg .+ rbm.hid_bias) .> rand()
        Xneg .= sigmoid(BLAS.gemm('T','N', Float64(1.0), rbm.W, Hneg)  .+ rbm.vis_bias) .> rand()
    end
       
    Ehp = sigmoid(rbm.W * Xbatch .+ rbm.hid_bias)
    Ehn = sigmoid(rbm.W * Xneg .+ rbm.hid_bias)

    Delta_W = lr*( Ehp * Xbatch' -  Ehn *  Xneg')
    Delta_vis_bias = sum(lr .* (Xbatch .- Xneg), 2)[:]
    Delta_hid_bias = sum(lr .* (Ehp - Ehn), 2)[:]
    
    rbm.W .+= Delta_W ./ batch_size;
    rbm.vis_bias .+= Delta_vis_bias ./ batch_size;
    rbm.hid_bias .+= Delta_hid_bias ./ batch_size;
    
end



optvec_contrastive_divergence_K (generic function with 1 method)

In [211]:
X_batch = X_train[:,1:25]
@benchmark optvec_contrastive_divergence_K(X_batch, rbm, 1, 0.01)

     Time per evaluation: 7.49 ms [4.17 ms, 10.80 ms]
Proportion of time in GC: 0.00% [0.00%, 0.00%]
        Memory allocated: 5.53 mb
   Number of allocations: 175 allocations
       Number of samples: 100
   Number of evaluations: 100
 Time spent benchmarking: 1.33 s


In [None]:
function vec_fit_CDK(X, rbm, batch_size::Integer,  n_epochs::Integer, K::Integer, lr::Real)
        
    n_samples = size(X)[2]
    indicies = [x:min(x + batch_size-1, n_samples) for x in 1:batch_size:n_samples]
    mb = 1
    println("number minibatches:", length(indicies), "\n")
    for epoch in 1:n_epochs
        tic();
        for minibatch_ind in indicies
            Xbatch = @view X[:, minibatch_ind]
            vec_contrastive_divergence_K(Xbatch, rbm, K, lr)
        end
        print("\n\nepoch ", epoch, "  time epoch:", toq(), "\n")
    end
    rbm.trained = true
end

In [30]:
expand(:(Delta_W .+= lr * ( x * ehp' - xneg * ehn')'))

:((Base.broadcast!)(Base.identity,Delta_W,Delta_W .+ A_mul_Bc(lr,A_mul_Bc(x,ehp) - A_mul_Bc(xneg,ehn))))

In [34]:
?A_mul_Bc

search: [1mA[22m[1m_[22m[1mm[22m[1mu[22m[1ml[22m[1m_[22m[1mB[22m[1mc[22m [1mA[22m[1m_[22m[1mm[22m[1mu[22m[1ml[22m[1m_[22m[1mB[22m[1mc[22m! [1mA[22mc[1m_[22m[1mm[22m[1mu[22m[1ml[22m[1m_[22m[1mB[22m[1mc[22m [1mA[22mc[1m_[22m[1mm[22m[1mu[22m[1ml[22m[1m_[22m[1mB[22m[1mc[22m! [1mA[22m[1m_[22m[1mm[22m[1mu[22m[1ml[22m[1m_[22m[1mB[22mt [1mA[22m[1m_[22m[1mm[22m[1mu[22m[1ml[22m[1m_[22m[1mB[22m! [1mA[22m[1m_[22m[1mm[22m[1mu[22m[1ml[22m[1m_[22m[1mB[22mt!



```
A_mul_Bc(A, B)
```

For matrices or vectors $A$ and $B$, calculates $A⋅Bᴴ$.
