## Toy NNets for education pourpouses in  Julia

Interesting discussion how to make forward pass efficiently using BLAS:

- https://discourse.julialang.org/t/blas-performance-issues-for-common-neural-network-patterns/565

In [324]:
using MNIST

In [325]:
train = MNIST.traindata()

(
[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0],

[5.0,0.0,4.0,1.0,9.0,2.0,1.0,3.0,1.0,4.0  …  9.0,2.0,9.0,5.0,1.0,8.0,3.0,5.0,6.0,8.0])

In [326]:
X_train = train[1];
y_train = train[2];

In [327]:
sort(unique(train[2]))

10-element Array{Float64,1}:
 0.0
 1.0
 2.0
 3.0
 4.0
 5.0
 6.0
 7.0
 8.0
 9.0

## Defining Linear layer and relu layer

In [328]:
T = Float32
n_visible = 784
n_hidden = 500

srand(1234)
W1 = rand(T, n_hidden, n_visible );

In [329]:
size(W1)

(500,784)

In [330]:
size(X_train[:,1:10])

(784,10)

In [331]:
W1 * X_train[:,1:10]

500×10 Array{Float64,2}:
 13027.4  14847.2  10032.1   8841.98  …  9508.38  17518.3  5716.2   11275.0 
 14086.0  16402.7   9930.5   9091.55     8630.12  17810.7  5409.04  11826.5 
 12056.6  15166.0   8827.64  8437.43     8358.53  16801.2  4946.36  10161.1 
 14100.1  15136.6   9994.82  7975.77     9229.68  17994.4  5289.07  10823.4 
 13865.6  15790.8   9572.64  8406.93     8977.36  18329.5  5107.01  10836.5 
 13293.7  15751.5   9085.16  8706.94  …  8279.63  16631.5  5301.57   9686.37
 13012.2  14699.1   8958.21  7850.34     8374.82  17603.9  5581.95   9880.36
 12612.6  15642.7   9316.4   8956.66     8878.85  16938.1  5414.17  10578.8 
 13888.1  15245.1   9318.4   8850.87     8042.99  18003.9  5010.2   10644.4 
 12934.6  15676.7  10495.0   8267.86     8636.57  18339.1  5117.54  11485.0 
 14082.3  15518.7   9735.68  9012.78  …  9074.54  18323.8  5688.01  11034.4 
 14101.5  16376.5   9696.03  8447.71     9571.04  18369.6  6213.38  10135.2 
 13291.9  15373.1  10304.7   8296.81     8532.4   1

#### Defining layers

In [259]:
workspace()
type LinearLayer{T}
    """
    Standard layer between activations.
    The output of this layer for a given input is meant to be a matrix product 
    of the input times W
    """
    input_dim::Int
    output_dim::Int
    W::Array{T}
    b::Vector{T}
    seed::Int
    
    function LinearLayer(input, output; seed=1234)
        srand(seed)
        return new(input,output,rand(T,input,output)/sqrt(input), zeros(output))
    end
end

In [260]:
input_dim = 784
output_dim = 500
l = LinearLayer{Float32}(input_dim,output_dim);

In [266]:
workspace()
type ReluActivation{T}
    """
    Relu Activation function latyer
    """
    dim::Int
end

#### Softmax layer

http://stats.stackexchange.com/questions/79454/softmax-layer-in-a-neural-network

In [265]:
workspace()
type SoftMaxLayer{T}
    """
    Standard layer between activations.
    The output of this layer for a given input is meant to be a matrix product 
    of the input times W
    """
    input_dim::Int
    output_dim::Int
    W::Array{T}
    seed::Int

    function SoftMaxLayer(input, output; seed=1234)
        srand(seed)
        return new(input,output,rand(T,input,output)/sqrt(input))
    end
end

In [None]:
type linear_layer

In [None]:

T = Float32
W1 = rand(T, 500, 1000)
W2 = rand(T, 500, 500)
W3 = rand(T, 10, 500)
dW1, dW2, dW3 = zeros(W1), zeros(W2), zeros(W3)
out1, out2, out3 = zeros(T, 2048), zeros(T, 1024), zeros(T, 10)
dOut1, dOut2, dOut = zeros(T, 2048), zeros(T, 1024), zeros(T, 512 * 512)

function mockNN(input::Array{Float32, 1}, error::Array{Float32, 1})
  # Forward
  BLAS.gemv!('N', T(1.0), W1, input, T(0.0), out1)
  BLAS.gemv!('N', T(1.0), W2, out1, T(0.0), out2)
  BLAS.gemv!('N', T(1.0), W3, out2, T(0.0), out3)

  # Backward
  # ∂E/∂inputs and ∂E/∂W
  fill!(dW3, 0)
  fill!(dOut2, 0)
  BLAS.gemv!('N', T(1.0), W3', error, T(0.0), dOut2)
  BLAS.ger!(T(1.0), error, out2, dW3)
  
  fill!(dW2, 0)
  fill!(dOut1, 0)
  BLAS.gemv!('N', T(1.0), W2', dOut2, T(0.0), dOut1)
  BLAS.ger!(T(1.0), dOut2, out1, dW2)

  fill!(dW1, 0)
  fill!(dOut, 0)
  BLAS.gemv!('N', T(1.0), W1', dOut1, T(0.0), dOut)
  BLAS.ger!(T(1.0), dOut1, input, dW1)
end


In [None]:

input = rand(T, 512 * 512)
error = rand(T, 10)
@time mockNN(input, error)
for i in 1:10
  input = rand(T, 512 * 512)
  error = rand(T, 10)
  @time mockNN(input, error)
end



In [None]:
T = Float32
W1 = rand(T, 2048, 512 * 512)
W2 = rand(T, 1024, 2048)
W3 = rand(T, 10, 1024)
dW1, dW2, dW3 = zeros(W1), zeros(W2), zeros(W3)
out1, out2, out3 = zeros(T, 2048), zeros(T, 1024), zeros(T, 10)
dOut1, dOut2, dOut = zeros(T, 2048), zeros(T, 1024), zeros(T, 512 * 512)

function mockNN2(input::Array{Float32, 1}, error::Array{Float32, 1})
  # Forward
  BLAS.gemv!('N', T(1.0), W1, input, T(0.0), out1)
  BLAS.gemv!('N', T(1.0), W2, out1, T(0.0), out2)
  BLAS.gemv!('N', T(1.0), W3, out2, T(0.0), out3)

  # Backward
  # ∂E/∂inputs and ∂E/∂W
  fill!(dW3, 0)
  fill!(dOut2, 0)
  BLAS.gemv!('T', T(1.0), W3, error, T(0.0), dOut2)
  BLAS.ger!(T(1.0), error, out2, dW3)
  
  fill!(dW2, 0)
  fill!(dOut1, 0)
  BLAS.gemv!('T', T(1.0), W2, dOut2, T(0.0), dOut1)
  BLAS.ger!(T(1.0), dOut2, out1, dW2)

  fill!(dW1, 0)
  fill!(dOut, 0)
  BLAS.gemv!('T', T(1.0), W1, dOut1, T(0.0), dOut)
  BLAS.ger!(T(1.0), dOut1, input, dW1)
end

In [None]:
input = rand(T, 512 * 512)
error = rand(T, 10)
@time mockNN(input, error)
for i in 1:10
  input = rand(T, 512 * 512)
  error = rand(T, 10)
  @time mockNN2(input, error)
end
