# 3 Layer Network on MNIST

In [None]:
using MLDatasets
train_x, train_y = MNIST.traindata()
test_x,  test_y  = MNIST.testdata();

In [None]:
(images, labels) = (reshape(train_x[:,:,1:1000], (1000, 28*28)) ./ 255.0, train_y[1:1000])
one_hot_labels = zeros(length(labels), 10)
for (i,l) in enumerate(labels)
    one_hot_labels[i,l+1] = 1.0
end
labels = one_hot_labels

test_images = reshape(test_x, (size(test_x)[end], 28*28)) ./ 255.0
test_labels = zeros((length(test_y),10))

for (i,l) in enumerate(test_y)
    test_labels[i,l+1] = 1.0
end

using Random
Random.seed!(1)


relu(x) = x > 0 ? x : 0
relu2deriv(x) = x > 0 ? 1 : 0

alpha, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 350, 40, 784, 10)

weights_0_1 = 0.2 .* rand(pixels_per_image,hidden_size) .- 0.1
weights_1_2 = 0.2 .* rand(hidden_size,num_labels) .- 0.1

for j = 1:iterations
    Error, Correct_cnt = (0.0, 0)
    for i = 1:size(images)[1]
        layer_0 = images[i,:]
        layer_1 = relu.(layer_0' * weights_0_1)
        layer_2 = layer_1 * weights_1_2

        Error += sum((labels[i,:]' .- layer_2) .^ 2)
        Correct_cnt += Int(argmax(layer_2) == argmax(labels[i,:]'))

        layer_2_delta = (labels[i]' .- layer_2)
        layer_1_delta = (layer_2_delta * weights_1_2') .* relu2deriv.(layer_1)
        weights_1_2 += alpha .* layer_1' * layer_2_delta
        weights_0_1 += alpha .* layer_0 * layer_1_delta
    end
    print("Train error: $(Error/size(images)[1])% Train accuracy: $(Correct_cnt/size(images)[1])   \r")
end
        

In [None]:
if(j % 10 == 0 or j == iterations)
    Error, Correct_cnt = (0.0, 0)

    for i = 1:size(test_images)[1]
        
        layer_0 = test_images[i,:]
        layer_1 = relu.(layer_0' * weights_0_1)
        layer_2 = layer_1 * weights_1_2
        
        Error += sum((test_labels[i,:]' .- layer_2) .^ 2)
        Correct_cnt += Int(argmax(layer_2) == argmax(test_labels[i,:]'))


    end
    print("Test-Err:: $(Error/size(test_images)[1])% Test-Acc:: $(Correct_cnt/size(test_images)[1])   \r")
end

In [None]:
(images, labels) = (reshape(train_x[:,:,1:1000], (1000, 28*28)) ./ 255.0, train_y[1:1000])
one_hot_labels = zeros(length(labels), 10)
for (i,l) in enumerate(labels)
    one_hot_labels[i,l+1] = 1.0
end
labels = one_hot_labels

test_images = reshape(test_x, (size(test_x)[end], 28*28)) ./ 255.0
test_labels = zeros((length(test_y),10))

for (i,l) in enumerate(test_y)
    test_labels[i,l+1] = 1.0
end

using Random
Random.seed!(1)


relu(x) = x > 0 ? x : 0
relu2deriv(x) = x > 0 ? 1 : 0

alpha, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 350, 40, 784, 10)

weights_0_1 = 0.2 .* rand(pixels_per_image,hidden_size) .- 0.1
weights_1_2 = 0.2 .* rand(hidden_size,num_labels) .- 0.1

for j = 1:iterations
    Error, Correct_cnt = (0.0, 0)
    for i = 1:size(images)[1]
        layer_0 = images[i,:]
        layer_1 = relu.(layer_0' * weights_0_1)
        layer_2 = layer_1 * weights_1_2

        Error += sum((labels[i,:]' .- layer_2) .^ 2)
        Correct_cnt += Int(argmax(layer_2) == argmax(labels[i,:]'))

        layer_2_delta = (labels[i]' .- layer_2)
        layer_1_delta = (layer_2_delta * weights_1_2') .* relu2deriv.(layer_1)
        weights_1_2 += alpha .* layer_1' * layer_2_delta
        weights_0_1 += alpha .* layer_0 * layer_1_delta
    end
    print("I: $(j) Train error: $(Error/size(images)[1])% Train accuracy: $(Correct_cnt/size(images)[1])   \r")
    
    if ((j % 10 == 0) || (j == iterations))
        println()
        Error, Correct_cnt = (0.0, 0)
        for i = 1:size(test_images)[1]
        
            layer_0 = test_images[i,:]
            layer_1 = relu.(layer_0' * weights_0_1)
            layer_2 = layer_1 * weights_1_2

            Error += sum((test_labels[i,:]' .- layer_2) .^ 2)
            Correct_cnt += Int(argmax(layer_2) == argmax(test_labels[i,:]'))
        end
        print("Test-Err:: $(Error/size(test_images)[1])% Test-Acc:: $(Correct_cnt/size(test_images)[1]) \r")
        println()
    end
end

# Dropout In Code

In [None]:
i = 1
layer_0 = images[i,:]
dropout_mask = np.random.randint(2,size=layer_1.shape)
        layer_1 = relu.(layer_0' * weights_0_1)
        layer_2 = layer_1 * weights_1_2

        Error += sum((labels[i,:]' .- layer_2) .^ 2)
        Correct_cnt += Int(argmax(layer_2) == argmax(labels[i,:]'))

        layer_2_delta = (labels[i]' .- layer_2)
        layer_1_delta = (layer_2_delta * weights_1_2') .* relu2deriv.(layer_1)
        weights_1_2 += alpha .* layer_1' * layer_2_delta
        weights_0_1 += alpha .* layer_0 * layer_1_delta

layer_0 = images[i:i+1]
dropout_mask = np.random.randint(2,size=layer_1.shape)

layer_1 *= dropout_mask * 2
layer_2 = np.dot(layer_1, weights_1_2)

error += np.sum((labels[i:i+1] - layer_2) ** 2)

correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i+i+1]))

layer_2_delta = (labels[i:i+1] - layer_2)
layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)

layer_1_delta *= dropout_mask

weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)

In [75]:
layer_0 = images[1,:]
layer_1 = relu.(layer_0' * weights_0_1)
layer_2 = layer_1 * weights_1_2
Error, Correct_cnt = (0.0, 0)
Error += sum((labels[1,:]' .- layer_2) .^ 2)
Correct_cnt += Int(argmax(layer_2) == argmax(labels[1,:]'))
layer_2_delta = (labels[1,:]' .- layer_2)
layer_1_delta = (layer_2_delta * weights_1_2') .* relu2deriv.(layer_1)

1×40 Array{Float64,2}:
 0.0  0.0125772  0.0290307  0.130989  …  0.0787766  -0.0470649  0.0

In [76]:
Error

0.9787734709551295

In [None]:
layer_1_delta = layer_2_delta.dot(weights_1_2.T)\
                                    * relu2deriv(layer_1)

In [72]:
size(layer_1_delta)

# for i,l in enumerate(y_test):
#     test_labels[i][l] = 1
    
# np.random.seed(1)
# relu = lambda x:(x>=0) * x # returns x if x > 0, return 0 otherwise
# relu2deriv = lambda x: x>=0 # returns 1 for input > 0, return 0 otherwise
# alpha, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 350, 40, 784, 10)

(1, 40)

In [43]:
layer_2'

10×1 LinearAlgebra.Adjoint{Any,Array{Any,2}}:
  0.00019057039418527043
 -0.0004017092607755022
  3.906512950117453e-6
  0.00021406501181451382
  3.872879737752826e-6
  0.0005775486359465876
 -1.8004695510317768e-5
 -0.0004627779056668574
  0.00011401794918601783
  0.0007726419149043299

In [None]:
images, labels = (x_train[0:1000].reshape(1000,28*28) / 255, y_train[0:1000])