# Learning Signal and Ignoring Noise
# Introduction to Regularization & Batching

# Section 8.1/8.2

In [1]:
import sys, numpy as np
from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
images, labels = (x_train[0:1000].reshape(1000,28*28) / 255, y_train[0:1000])
one_hot_labels = np.zeros((len(labels),10))

for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = x_test.reshape(len(x_test),28*28) / 255
test_labels = np.zeros((len(y_test),10))

for i,l in enumerate(y_test):
    test_labels[i][l] = 1

Using TensorFlow backend.


In [2]:
np.random.seed(1)
relu = lambda x:(x>=0) * x # returns x if x > 0, return 0 otherwise
relu2deriv = lambda x: x>=0 # returns 1 for input > 0, return 0 otherwise
alpha, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 350, 40, 784, 10)
weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1
weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1
for j in range(iterations):
    error, correct_cnt = (0.0, 0)
    for i in range(len(images)):
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0,weights_0_1))
        layer_2 = np.dot(layer_1,weights_1_2)
        error += np.sum((labels[i:i+1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))
        layer_2_delta = (labels[i:i+1] - layer_2)
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)
        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)

    sys.stdout.write("\r"+ \
        " I:"+str(j)+ \
        " Error:" + str(error/float(len(images)))[0:5] +\
        " Correct:" + str(correct_cnt/float(len(images))))
    
    if(j % 10 == 0 or j == iterations-1):
        error, correct_cnt = (0.0, 0)
        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0,weights_0_1))
            layer_2 = np.dot(layer_1,weights_1_2)
        
            error += np.sum((test_labels[i:i+1] - layer_2) ** 2)
            correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
        sys.stdout.write(" Test-Err:" + str(error/float(len(test_images)))[0:5] +\
            " Test-Acc:" + str(correct_cnt/float(len(test_images))))
        print()

 I:0 Error:0.722 Correct:0.537 Test-Err:0.601 Test-Acc:0.6488
 I:10 Error:0.312 Correct:0.901 Test-Err:0.420 Test-Acc:0.8114
 I:20 Error:0.260 Correct:0.937 Test-Err:0.414 Test-Acc:0.8111
 I:30 Error:0.232 Correct:0.946 Test-Err:0.417 Test-Acc:0.8066
 I:40 Error:0.215 Correct:0.956 Test-Err:0.426 Test-Acc:0.8019
 I:50 Error:0.204 Correct:0.966 Test-Err:0.437 Test-Acc:0.7982
 I:60 Error:0.194 Correct:0.967 Test-Err:0.448 Test-Acc:0.7921
 I:70 Error:0.186 Correct:0.975 Test-Err:0.458 Test-Acc:0.7864
 I:80 Error:0.179 Correct:0.979 Test-Err:0.466 Test-Acc:0.7817
 I:90 Error:0.172 Correct:0.981 Test-Err:0.474 Test-Acc:0.7758
 I:100 Error:0.166 Correct:0.984 Test-Err:0.482 Test-Acc:0.7706
 I:110 Error:0.161 Correct:0.984 Test-Err:0.489 Test-Acc:0.7686
 I:120 Error:0.157 Correct:0.986 Test-Err:0.496 Test-Acc:0.766
 I:130 Error:0.153 Correct:0.999 Test-Err:0.502 Test-Acc:0.7622
 I:140 Error:0.149 Correct:0.991 Test-Err:0.508 Test-Acc:0.758
 I:150 Error:0.145 Correct:0.991 Test-Err:0.513 Test-

# Section 8.9


In [3]:
import numpy, sys
np.random.seed(1)

def relu(x):
    return (x >= 0) * x # returns x if x > 0

def relu2deriv(output):
    return output >= 0 #returns 1 for input > 0

alpha, iterations, hidden_size = (0.005, 300, 100)
pixels_per_image, num_labels = (784, 10)
weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1
weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1

for j in range(iterations):
    error, correct_cnt = (0.0,0)
    for i in range(len(images)):
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0,weights_0_1))
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = np.dot(layer_1,weights_1_2)
        error += np.sum((labels[i:i+1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))
        layer_2_delta = (labels[i:i+1] - layer_2)
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)
        layer_1_delta *= dropout_mask
        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)
    
    if(j%10 == 0):
        test_error = 0.0
        test_correct_cnt = 0
        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0,weights_0_1))
            layer_2 = np.dot(layer_1, weights_1_2)
            test_error += np.sum((test_labels[i:i+1] - layer_2) ** 2)
            test_correct_cnt += int(np.argmax(layer_2) ==  np.argmax(test_labels[i:i+1]))
        sys.stdout.write("\n" + \
            "I:" + str(j) + \
            " Test-Err:" + str(test_error/ float(len(test_images)))[0:5] + \
            " Test-Acc:" + str(test_correct_cnt/ float(len(test_images)))+ \
            " Train-Err:" + str(error/ float(len(images)))[0:5] + \
            " Train-Acc:" + str(correct_cnt/ float(len(images))))


I:0 Test-Err:0.641 Test-Acc:0.6333 Train-Err:0.891 Train-Acc:0.413
I:10 Test-Err:0.458 Test-Acc:0.787 Train-Err:0.472 Train-Acc:0.764
I:20 Test-Err:0.415 Test-Acc:0.8133 Train-Err:0.430 Train-Acc:0.809
I:30 Test-Err:0.421 Test-Acc:0.8114 Train-Err:0.415 Train-Acc:0.811
I:40 Test-Err:0.419 Test-Acc:0.8112 Train-Err:0.413 Train-Acc:0.827
I:50 Test-Err:0.409 Test-Acc:0.8133 Train-Err:0.392 Train-Acc:0.836
I:60 Test-Err:0.412 Test-Acc:0.8236 Train-Err:0.402 Train-Acc:0.836
I:70 Test-Err:0.412 Test-Acc:0.8033 Train-Err:0.383 Train-Acc:0.857
I:80 Test-Err:0.410 Test-Acc:0.8054 Train-Err:0.386 Train-Acc:0.854
I:90 Test-Err:0.411 Test-Acc:0.8144 Train-Err:0.376 Train-Acc:0.868
I:100 Test-Err:0.411 Test-Acc:0.7903 Train-Err:0.369 Train-Acc:0.864
I:110 Test-Err:0.411 Test-Acc:0.8003 Train-Err:0.371 Train-Acc:0.868
I:120 Test-Err:0.402 Test-Acc:0.8046 Train-Err:0.353 Train-Acc:0.857
I:130 Test-Err:0.408 Test-Acc:0.8091 Train-Err:0.352 Train-Acc:0.867
I:140 Test-Err:0.405 Test-Acc:0.8083 Train-Er

# Section 8.11

In [4]:
import numpy as np
np.random.seed(1)

def relu(x):
    return (x >= 0) * x # returns x if x > 0
def relu2deriv(output):
    return output >= 0 # returns 1 for input > 0

batch_size = 100
alpha, iterations = (0.001, 300)
pixels_per_image, num_labels, hidden_size = (784, 10, 100)

weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1
weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1

for j in range(iterations):
    error, correct_cnt = (0.0, 0)
    for i in range(int(len(images) / batch_size)):
        batch_start, batch_end = ((i * batch_size),((i+1)*batch_size))
        layer_0 = images[batch_start:batch_end]
        layer_1 = relu(np.dot(layer_0,weights_0_1))
        dropout_mask = np.random.randint(2,size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = np.dot(layer_1,weights_1_2)
        error += np.sum((labels[batch_start:batch_end] - layer_2) ** 2)/batch_size
        for k in range(batch_size):
            correct_cnt += int(np.argmax(layer_2[k:k+1]) == np.argmax(labels[batch_start+k:batch_start+k+1]))/batch_size
        layer_2_delta = labels[batch_start:batch_end] - layer_2
        layer_1_delta = layer_2_delta.dot(weights_1_2.T)*relu2deriv(layer_1)
        layer_1_delta *= dropout_mask
        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)
            
    if(j%10 == 0):
        test_error = 0.0
        test_correct_cnt = 0
        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0,weights_0_1))
            layer_2 = np.dot(layer_1, weights_1_2)
            
            test_error += np.sum((test_labels[i:i+1] - layer_2) ** 2)
            test_correct_cnt += int(np.argmax(layer_2) ==  np.argmax(test_labels[i:i+1]))
        sys.stdout.write("\n" + \
            "I:" + str(j) + \
            " Test-Err:" + str(test_error/ float(len(test_images)))[0:5] + \
            " Test-Acc:" + str(test_correct_cnt/ float(len(test_images)))+ \
            " Train-Err:" + str(error/ int(len(images) / batch_size))[0:5] + \
            " Train-Acc:" + str(correct_cnt/ int(len(images) / batch_size)))


I:0 Test-Err:0.815 Test-Acc:0.3832 Train-Err:1.272 Train-Acc:0.16100000000000012
I:10 Test-Err:0.569 Test-Acc:0.7183 Train-Err:0.591 Train-Acc:0.6719999999999902
I:20 Test-Err:0.508 Test-Acc:0.7577 Train-Err:0.530 Train-Acc:0.726999999999989
I:30 Test-Err:0.483 Test-Acc:0.7815 Train-Err:0.497 Train-Acc:0.7579999999999882
I:40 Test-Err:0.464 Test-Acc:0.7915 Train-Err:0.486 Train-Acc:0.7499999999999885
I:50 Test-Err:0.453 Test-Acc:0.7978 Train-Err:0.462 Train-Acc:0.7839999999999877
I:60 Test-Err:0.446 Test-Acc:0.8015 Train-Err:0.445 Train-Acc:0.8009999999999874
I:70 Test-Err:0.437 Test-Acc:0.8054 Train-Err:0.444 Train-Acc:0.8069999999999873
I:80 Test-Err:0.440 Test-Acc:0.807 Train-Err:0.450 Train-Acc:0.8029999999999873
I:90 Test-Err:0.437 Test-Acc:0.8059 Train-Err:0.444 Train-Acc:0.7979999999999874
I:100 Test-Err:0.437 Test-Acc:0.8029 Train-Err:0.436 Train-Acc:0.8049999999999873
I:110 Test-Err:0.431 Test-Acc:0.8024 Train-Err:0.420 Train-Acc:0.817999999999987
I:120 Test-Err:0.433 Test-Ac