# 3 Layer Network on MNIST

In [None]:
using MLDatasets
train_x, train_y = MNIST.traindata()
test_x,  test_y  = MNIST.testdata();

In [None]:
(images, labels) = (reshape(train_x[:,:,1:1000], (1000, 28*28)) ./ 255.0, train_y[1:1000])
one_hot_labels = zeros(length(labels), 10)
for (i,l) in enumerate(labels)
    one_hot_labels[i,l+1] = 1.0
end
labels = one_hot_labels

test_images = reshape(test_x, (size(test_x)[end], 28*28)) ./ 255.0
test_labels = zeros((length(test_y),10))

for (i,l) in enumerate(test_y)
    test_labels[i,l+1] = 1.0
end

using Random
Random.seed!(1)


relu(x) = x > 0 ? x : 0
relu2deriv(x) = x > 0 ? 1 : 0

alpha, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 350, 40, 784, 10)

weights_0_1 = 0.2 .* rand(pixels_per_image,hidden_size) .- 0.1
weights_1_2 = 0.2 .* rand(hidden_size,num_labels) .- 0.1

for j = 1:iterations
    Error, Correct_cnt = (0.0, 0)
    for i = 1:size(images)[1]
        layer_0 = images[i,:]
        layer_1 = relu.(layer_0' * weights_0_1)
        layer_2 = layer_1 * weights_1_2

        Error += sum((labels[i,:]' .- layer_2) .^ 2)
        Correct_cnt += Int(argmax(layer_2) == argmax(labels[i,:]'))

        layer_2_delta = (labels[i]' .- layer_2)
        layer_1_delta = (layer_2_delta * weights_1_2') .* relu2deriv.(layer_1)
        weights_1_2 += alpha .* layer_1' * layer_2_delta
        weights_0_1 += alpha .* layer_0 * layer_1_delta
    end
    print("Train error: $(Error/size(images)[1])% Train accuracy: $(Correct_cnt/size(images)[1])   \r")
end
        

Train error: 0.9991914936776255% Train accuracy: 0.112   

In [None]:
if(j % 10 == 0 or j == iterations)
    Error, Correct_cnt = (0.0, 0)

    for i = 1:size(test_images)[1]
        
        layer_0 = test_images[i,:]
        layer_1 = relu.(layer_0' * weights_0_1)
        layer_2 = layer_1 * weights_1_2
        
        Error += sum((test_labels[i,:]' .- layer_2) .^ 2)
        Correct_cnt += Int(argmax(layer_2) == argmax(test_labels[i,:]'))


    end
    print("Test-Err:: $(Error/size(test_images)[1])% Test-Acc:: $(Correct_cnt/size(test_images)[1])   \r")
end

In [133]:
(images, labels) = (reshape(train_x[:,:,1:1000], (1000, 28*28)) ./ 255.0, train_y[1:1000])
one_hot_labels = zeros(length(labels), 10)
for (i,l) in enumerate(labels)
    one_hot_labels[i,l+1] = 1.0
end
labels = one_hot_labels

test_images = reshape(test_x, (size(test_x)[end], 28*28)) ./ 255.0
test_labels = zeros((length(test_y),10))

for (i,l) in enumerate(test_y)
    test_labels[i,l+1] = 1.0
end

using Random
Random.seed!(1)


relu(x) = x > 0 ? x : 0
relu2deriv(x) = x > 0 ? 1 : 0

alpha, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 350, 40, 784, 10)

weights_0_1 = 0.2 .* rand(pixels_per_image,hidden_size) .- 0.1
weights_1_2 = 0.2 .* rand(hidden_size,num_labels) .- 0.1

for j = 1:iterations
    Error, Correct_cnt = (0.0, 0)
    for i = 1:size(images)[1]
        layer_0 = images[i,:]
        layer_1 = relu.(layer_0' * weights_0_1)
        layer_2 = layer_1 * weights_1_2

        Error += sum((labels[i,:]' .- layer_2) .^ 2)
        Correct_cnt += Int(argmax(layer_2) == argmax(labels[i,:]'))

        layer_2_delta = (labels[i] .- layer_2)
        layer_1_delta = (layer_2_delta * weights_1_2) .* relu2deriv.(layer_1)
        weights_1_2 += alpha .* layer_1 * layer_2_delta
        weights_0_1 += alpha .* layer_0 * layer_1_delta
    end
    print("I: $(j) Train error: $(Error/size(images)[1])% Train accuracy: $(Correct_cnt/size(images)[1])   \r")
    
    if ((j % 10 == 0) || (j == iterations))
        println()
        Error, Correct_cnt = (0.0, 0)
        for i = 1:size(test_images)[1]
        
            layer_0 = test_images[i,:]
            layer_1 = relu.(layer_0' * weights_0_1)
            layer_2 = layer_1 * weights_1_2

            Error += sum((test_labels[i,:]' .- layer_2) .^ 2)
            Correct_cnt += Int(argmax(layer_2) == argmax(test_labels[i,:]'))
        end
        print("Test-Err:: $(Error/size(test_images)[1])% Test-Acc:: $(Correct_cnt/size(test_images)[1]) \r")
        println()
    end
end

DimensionMismatch: DimensionMismatch("A has dimensions (1,10) but B has dimensions (40,10)")

# Dropout In Code

In [None]:
i = 1
layer_0 = images[i,:]
dropout_mask = bitrand(size(layer_1))
layer_1 = relu.(layer_0' * weights_0_1)
layer_1 .*= dropout_mask .* 2
layer_2 = layer_1 * weights_1_2

Error += sum((labels[i,:]' .- layer_2) .^ 2)
Correct_cnt += Int(argmax(layer_2) == argmax(labels[i,:]'))

layer_2_delta = (labels[i]' .- layer_2)
layer_1_delta = (layer_2_delta * weights_1_2') .* relu2deriv.(layer_1)

layer_1_delta .*= dropout_mask


In [130]:
using Random
Random.seed!(1)

relu(x) = x > 0 ? x : 0
relu2deriv(x) = x > 0 ? 1 : 0

alpha, iterations, hidden_size = (0.005, 300, 100)
pixels_per_image, num_labels = (784, 10)

weights_0_1 = 0.2 .* rand(pixels_per_image,hidden_size) .- 0.1
weights_1_2 = 0.2 .* rand(hidden_size,num_labels) .- 0.1

for j = 1:iterations
    Error, Correct_cnt = (0.0, 0)
    for i = 1:size(images)[1]
        layer_0 = images[i,:]
        layer_1 = relu.(layer_0' * weights_0_1)
        dropout_mask = bitrand(size(layer_1))
        layer_1 .*= dropout_mask .* 2
        layer_2 = layer_1 * weights_1_2

        Error += sum((labels[i,:]' .- layer_2) .^ 2)
        Correct_cnt += Int(argmax(layer_2) == argmax(labels[i,:]'))

        layer_2_delta = (labels[i]' .- layer_2)
        layer_1_delta = (layer_2_delta * weights_1_2') .* relu2deriv.(layer_1)

        layer_1_delta .*= dropout_mask
        
        weights_1_2 += alpha .* layer_1' * layer_2_delta
        weights_0_1 += alpha .* layer_0 * layer_1_delta
    end
    
    if (j % 10 == 0)
        test_Error, test_Correct_cnt = (0.0, 0)
        for i = 1:size(test_images)[1]
        
            layer_0 = test_images[i,:]
            layer_1 = relu.(layer_0' * weights_0_1)
            layer_2 = layer_1 * weights_1_2

            test_Error += sum((test_labels[i,:]' .- layer_2) .^ 2)
            test_Correct_cnt += Int(argmax(layer_2) == argmax(test_labels[i,:]'))
        end
        println("I: $(j) Train error: $(Error/size(images)[1])% Train accuracy: $(Correct_cnt/size(images)[1]) Test-Err:: $(Error/size(test_images)[1])% Test-Acc:: $(Correct_cnt/size(test_images)[1])")
    end
end

I: 10 Train error: 0.998787979997298% Train accuracy: 0.095 Test-Err:: 0.09987879799972979% Test-Acc:: 0.0095
I: 20 Train error: 0.9978306286667931% Train accuracy: 0.097 Test-Err:: 0.0997830628666793% Test-Acc:: 0.0097
I: 30 Train error: 0.996516667364948% Train accuracy: 0.108 Test-Err:: 0.09965166673649481% Test-Acc:: 0.0108


InterruptException: InterruptException:

# Batch Gradient Descent

In [None]:
using Random
Random.seed!(1)

relu(x) = x > 0 ? x : 0
relu2deriv(x) = x > 0 ? 1 : 0

batch_size = 100
alpha, iterations = (0.001, 300)
pixels_per_image, num_labels, hidden_size = (784, 10, 100)

weights_0_1 = 0.2 .* rand(pixels_per_image,hidden_size) .- 0.1
weights_1_2 = 0.2 .* rand(hidden_size,num_labels) .- 0.1

for j = 1:iterations
    Error, Correct_cnt = (0.0, 0)
    for i = 1:batch_size:size(images)[1]-batch_size
        batch_start, batch_end = i, i+batch_size-1
        layer_0 = images[batch_start:batch_end,:]
        layer_1 = relu.(layer_0 * weights_0_1)
        dropout_mask = bitrand(size(layer_1))
        layer_1 .*= (dropout_mask .* 2)
        layer_2 = layer_1 * weights_1_2
        
        Error += sum((labels[batch_start:batch_end,:] .- layer_2) .^ 2)
        
        for k=1:batch_size
            Correct_cnt += Int(argmax(layer_2[k,:]) == argmax(labels[batch_start+k-1,:]))
            layer_2_delta = (labels[batch_start:batch_end,:] .- layer_2) ./batch_size
            layer_1_delta = (layer_2_delta * weights_1_2') .* relu2deriv.(layer_1)

            layer_1_delta .*= dropout_mask

            weights_1_2 += alpha .* layer_1 * layer_2_delta
            
            println(size(layer_1), size(layer_2_delta))
            @assert false
            weights_0_1 += alpha .* layer_0' * layer_1_delta
        end
    end
        
    if (j % 10 == 0)
        test_Error, test_Correct_cnt = (0.0, 0)
        for i = 1:size(test_images)[1]

            layer_0 = test_images[i,:]
            layer_1 = relu.(layer_0' * weights_0_1)
            layer_2 = layer_1 * weights_1_2

            test_Error += sum((test_labels[i,:]' .- layer_2) .^ 2)
            test_Correct_cnt += Int(argmax(layer_2) == argmax(test_labels[i,:]'))
        end
        println("I: $(j) Train error: $(Error/size(images)[1])% Train accuracy: $(Correct_cnt/size(images)[1]) Test-Err:: $(test_Error/size(test_images)[1])% Test-Acc:: $(test_Correct_cnt/size(test_images)[1])")
    end
end

