In [None]:
using LinearAlgebra
using Random
using MLDatasets
using ImageView, Images

Define constants

In [None]:
⍺ = 0.01
epochs = 30

Define initial weights and biases

In [None]:
# Conv Layer 1
W_1_1 = 2.0 * rand(5, 5) .- 1.0
W_1_2 = 2.0 * rand(5, 5) .- 1.0 

b_1_1 = 2.0 * rand() - 1.0
b_1_2 = 2.0 * rand() - 1.0

# Conv Layer 2
W_2_1 = 2.0 * rand(3, 3) .- 1.0
W_2_2 = 2.0 * rand(3, 3) .- 1.0
W_2_3 = 2.0 * rand(3, 3) .- 1.0
W_2_4 = 2.0 * rand(3, 3) .- 1.0
W_2_5 = 2.0 * rand(3, 3) .- 1.0
W_2_6 = 2.0 * rand(3, 3) .- 1.0
W_2_7 = 2.0 * rand(3, 3) .- 1.0
W_2_8 = 2.0 * rand(3, 3) .- 1.0

b_2_1 = 2.0 * rand() - 1.0
b_2_2 = 2.0 * rand() - 1.0
b_2_3 = 2.0 * rand() - 1.0
b_2_4 = 2.0 * rand() - 1.0

#Fully Connected Layer
W = 2.0 * rand(10, 100) .- 1.0 
b = 2.0 * rand(1, 10) .- 1.0 

Data pre-processing

In [None]:
train_data = MNIST.traintensor()
test_data = MNIST.testtensor()
train_labels = MNIST.trainlabels()
test_labels = MNIST.testlabels()

train_data = Float64.(train_data) / 255.0
test_data = Float64.(test_data) / 255.0

Defining helper functions 

In [None]:
function convolve(matrix, kernel, stride, matrix_n, matrix_m, kernel_n, kernel_m, bias)
    row_iters, column_iters = (matrix_n ÷ stride) + 1, (matrix_m ÷ stride) + 1
    convolved_matrix_n = convolved_matrix_m = (matrix_m - (kernel_m - 1)) 
    convolved_matrix = zeros(Float64, convolved_matrix_n, convolved_matrix_m)
    convolution_matrix_row = 1
    convolution_matrix_col = 1
    for row in kernel_n:stride:matrix_n
      for column in kernel_m:stride:matrix_m
        sub_matrix = matrix[row - (kernel_n - 1):row, column - (kernel_m - 1):column]
        convolved_matrix[convolution_matrix_row, convolution_matrix_col] = dot(sub_matrix, kernel) + bias
        convolution_matrix_col += 1
      end
        convolution_matrix_col = 1
        convolution_matrix_row += 1
    end
    return convolved_matrix
  end
  

In [None]:
function convolve_2_kernels(matrix_1, matrix_2, kernel_1, kernel_2, stride, matrix_n, matrix_m, kernel_n, kernel_m, bias)
    row_iters, column_iters = (matrix_n ÷ stride) + 1, (matrix_m ÷ stride) + 1
    convolved_matrix_n = convolved_matrix_m = (matrix_m - (kernel_m - 1)) 
    convolved_matrix = zeros(Float64, convolved_matrix_n, convolved_matrix_m)
    convolution_matrix_row = 1
    convolution_matrix_col = 1
    for row in kernel_n:stride:matrix_n
      for column in kernel_m:stride:matrix_m
        sub_matrix_1 = matrix_1[row - (kernel_n - 1):row, column - (kernel_m - 1):column]
        sub_matrix_2 = matrix_2[row - (kernel_n - 1):row, column - (kernel_m - 1):column]
        convolved_matrix[convolution_matrix_row, convolution_matrix_col] = dot(sub_matrix_1, kernel_1) + dot(sub_matrix_2, kernel_2) + bias
        convolution_matrix_col += 1
      end
        convolution_matrix_col = 1
        convolution_matrix_row += 1
    end
    return convolved_matrix
  end

In [None]:
function max_pooling(matrix, pool_size, stride)
    dims = size(matrix)[1]
    output_dims = dims ÷ pool_size
    output_matrix = zeros(Float64, output_dims, output_dims)
    output_matrix_row = 1
    output_matrix_col = 1
    for row in pool_size:stride:dims
        for column in pool_size:stride:dims
            sub_matrix = matrix[row - (pool_size - 1):row, column - (pool_size - 1):column]
            output_matrix[output_matrix_row, output_matrix_col] = maximum(sub_matrix)
            output_matrix_col += 1
        end
        output_matrix_col = 1
        output_matrix_row += 1
    end
    return output_matrix
end

In [None]:
function print_matrix(matrix)
  n = size(matrix, 1)
  m = size(matrix, 2)
  for row in 1:n
    print("[")
    for column in 1:m
      print(matrix[row, column], ", ")
    end
    println("]")
  end
end

Defining activation functions

In [None]:
ReLU(matrix) = max.(matrix, 0)

In [None]:
softmax(matrix) = exp.(matrix) ./ sum(exp.(matrix))

Defining layers

Forward prop

Convolution Layer 1

In [None]:
function convolution_layer_one(x, W_1, W_2, stride, b_1, b_2)
    input_size = size(x)[1]
    kernel_size = 5

    Matrix_1 = convolve(x, W_1, stride, input_size, input_size, kernel_size, kernel_size, b_1)
    Matrix_2 = convolve(x, W_2, stride, input_size, input_size, kernel_size, kernel_size, b_2)
    Matrix_1 = ReLU(Matrix_1)
    Matrix_2 = ReLU(Matrix_2)

    return Matrix_1, Matrix_2 
end


Pooling Layer 1

In [None]:
function pooling_layer_one(Matrix_1, Matrix_2)
    Matrix_1 = max_pooling(Matrix_1, 2, 2)
    Matrix_2 = max_pooling(Matrix_2, 2, 2)
    return Matrix_1, Matrix_2 
end

Convolutional Layer 2

In [None]:
function convolution_layer_two(Matrix_1, Matrix_2, stride, W_1, W_2, W_3, W_4, W_5, W_6, W_7, W_8, b_1, b_2, b_3, b_4)
    copy_matrix_1 = copy(Matrix_1)
    copy_matrix_2 = copy(Matrix_2)
    
    Matrix_1 = convolve_2_kernels(copy_matrix_1, copy_matrix_2, W_1, W_2, stride, size(copy_matrix_1)[1], size(copy_matrix_1)[1], size(W_1)[1], size(W_1)[1], b_1)
    Matrix_2 = convolve_2_kernels(copy_matrix_1, copy_matrix_2, W_3, W_4, stride, size(copy_matrix_1)[1], size(copy_matrix_1)[1], size(W_1)[1], size(W_1)[1], b_2)
    Matrix_3 = convolve_2_kernels(copy_matrix_1, copy_matrix_2, W_5, W_6, stride, size(copy_matrix_1)[1], size(copy_matrix_1)[1], size(W_1)[1], size(W_1)[1], b_3)
    Matrix_4 = convolve_2_kernels(copy_matrix_1, copy_matrix_2, W_7, W_8, stride, size(copy_matrix_1)[1], size(copy_matrix_1)[1], size(W_1)[1], size(W_1)[1], b_4)

    Matrix_1 = softmax(Matrix_1)
    Matrix_2 = softmax(Matrix_2)
    Matrix_3 = softmax(Matrix_3)
    Matrix_4 = softmax(Matrix_4)

    return Matrix_1, Matrix_2, Matrix_3, Matrix_4
end

Pooling Layer 2

In [None]:
function pooling_layer_two(Matrix_1, Matrix_2, Matrix_3, Matrix_4)
    Matrix_1 = max_pooling(Matrix_1, 2, 2)
    Matrix_2 = max_pooling(Matrix_2, 2, 2)
    Matrix_3 = max_pooling(Matrix_3, 2, 2)
    Matrix_4 = max_pooling(Matrix_4, 2, 2)

    return Matrix_1, Matrix_2, Matrix_3, Matrix_4
end

Fully Connected Layer

In [None]:
function flatten(Matrix_1, Matrix_2, Matrix_3, Matrix_4)
    matrix = vcat(Matrix_1, Matrix_2, Matrix_3, Matrix_4)
    return reshape(matrix, :)
end

In [None]:
function fully_connected_layer(matrix, W, b)
    matrix = reshape(matrix, 1, :)
    linear_output = W * matrix' .+ b'
    return softmax(linear_output)
end

Back Prop

Operational Functions

In [None]:
function mean_sqrd_error(ŷ, y)
    return (y - ŷ)^2
end

In [None]:
function gradient_descent(y, ŷ, ⍺, epochs)
    for _ in 1:epochs
        

Forward Prop

In [None]:
function forward_prop(input, W_1_1, W_1_2, b_1_1, b_1_2, W_2_1, W_2_2, W_2_3, W_2_4, W_2_5, W_2_6, W_2_7, W_2_8, b_2_1, b_2_2, b_2_3, b_2_4, W, b)
    matrix_one, matrix_two = convolution_layer_one(input, W_1_1, W_1_2, 1, b_1_1, b_1_2)
    matrix_one, matrix_two = pooling_layer_one(matrix_one, matrix_two)
    matrix_one, matrix_two, matrix_three, matrix_four = convolution_layer_two(matrix_one, matrix_two, 1, W_2_1, W_2_2, W_2_3, W_2_4, W_2_5, W_2_6, W_2_7, W_2_8, b_2_1, b_2_2, b_2_3, b_2_4)
    matrix_one, matrix_two, matrix_three, matrix_four = pooling_layer_two(matrix_one, matrix_two, matrix_three, matrix_four)
    matrix_one, matrix_two, matrix_three, matrix_four = transpose(matrix_one), transpose(matrix_two), transpose(matrix_three), transpose(matrix_four)
    flattened_matrix = flatten(matrix_one, matrix_two, matrix_three, matrix_four)
    probabilities = fully_connected_layer(flattened_matrix, W, b)
    print_matrix(probabilities)
    println(size(probabilities))
    ŷ = argmax(probabilities)[1]
    return ŷ - 1
end

In [None]:
input = train_data[:,:,1]
output = forward_prop(input, W_1_1, W_1_2, b_1_1, b_1_2, W_2_1, W_2_2, W_2_3, W_2_4, W_2_5, W_2_6, W_2_7, W_2_8, b_2_1, b_2_2, b_2_3, b_2_4, W, b)
println(output)

Back Prop

In [None]:
function back_prop(ŷ, y)
    loss = mean_sqrd_error(ŷ, y)
    