<a href="https://colab.research.google.com/github/ikeasamoahansah/Grokking-Deep-Learning/blob/main/nnfs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
import sys, numpy as np
from keras.datasets import mnist

In [11]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [12]:
images, labels = (X_train[0:1000].reshape(1000, 28*28) / 255, y_train[0:1000])

In [13]:
one_hot_labels = np.zeros((len(labels), 10))

In [14]:
for i, l in enumerate(labels):
  one_hot_labels[i][l] = 1
labels = one_hot_labels

In [15]:
test_images = X_test.reshape(len(X_test), 28*28)/255
test_labels = np.zeros((len(y_test), 10))
for i, l in enumerate(y_test):
  test_labels[i][l] = 1
np.random.seed(1)

relu = lambda x: (x>=0) * x
relu2deriv = lambda x: x>=0

alpha, iterations, hidden_size, pixels_per_image, num_labels = \
(0.005, 350, 40, 784, 10)

In [20]:
weights_0_1 = 0.2 * np.random.random((pixels_per_image, hidden_size)) - 0.1
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

In [21]:
for j in range(iterations):
  error, correct_cnt = (0.0, 0)

  for i in range(len(images)):
    layer_0 = images[i:i+1]
    layer_1 = relu(np.dot(layer_0, weights_0_1))
    layer_2 = np.dot(layer_1, weights_1_2)
    error += np.sum((labels[i:i+1] - layer_2) ** 2) #mse
    correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))

    layer_2_delta = (labels[i:i+1] - layer_2)
    layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)

    weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
    weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)

  sys.stdout.write("\r" + \
                   " I:"+str(j)+\
                   " Error:" +str(error/float(len(images)))[0:5] +\
                   " Correct:" +str(correct_cnt/float(len(images))))

 I:349 Error:0.100 Correct:0.999

In [22]:
if(j % 10 == 0 or j == iterations-1):
  error, correct_cnt = (0.0, 0)

  for i in range(len(test_images)):
    layer_0 = test_images[i:i+1]
    layer_1 = relu(np.dot(layer_0, weights_0_1))
    layer_2 = np.dot(layer_1, weights_1_2)

    error += np.sum((test_labels[i:i+1]-layer_2) ** 2)
    correct_cnt += int(np.argmax(layer_2)) == np.argmax(test_labels[i:i+1])

  sys.stdout.write(" Test-Err:" + str(error/float(len(test_images)))[0:5] + \
                   " Test Acc:" + str(correct_cnt/float(len(test_images))))
  print()

 Test-Err:0.624 Test Acc:0.6986


# Adding dropout to improve Accuracy

In [23]:
for j in range(iterations):
  error, correct_cnt = (0.0, 0)

  for i in range(len(images)):
    layer_0 = images[i:i+1]
    layer_1 = relu(np.dot(layer_0, weights_0_1))

    dropout_mask = np.random.randint(2, size=layer_1.shape)
    layer_1 *= dropout_mask * 2

    layer_2 = np.dot(layer_1, weights_1_2)
    error += np.sum((labels[i:i+1] - layer_2) ** 2) #mse
    correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))

    layer_2_delta = (labels[i:i+1] - layer_2)
    layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)

    # dropout
    layer_1_delta *= dropout_mask

    weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
    weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)

  sys.stdout.write("\r" + \
                   " I:"+str(j)+\
                   " Error:" +str(error/float(len(images)))[0:5] +\
                   " Correct:" +str(correct_cnt/float(len(images))))

 I:349 Error:0.376 Correct:0.846

In [24]:
if(j % 10 == 0 or j == iterations-1):
  error, correct_cnt = (0.0, 0)

  for i in range(len(test_images)):
    layer_0 = test_images[i:i+1]
    layer_1 = relu(np.dot(layer_0, weights_0_1))
    layer_2 = np.dot(layer_1, weights_1_2)

    error += np.sum((test_labels[i:i+1]-layer_2) ** 2)
    correct_cnt += int(np.argmax(layer_2)) == np.argmax(test_labels[i:i+1])

  sys.stdout.write(" Test-Err:" + str(error/float(len(test_images)))[0:5] + \
                   " Test Acc:" + str(correct_cnt/float(len(test_images))))
  print()

 Test-Err:0.430 Test Acc:0.7964


# Batch Gradient Descent

In [29]:
np.random.seed(1)

def relu(x):
  return (x >= 0) * x

def relu2deriv(output):
  return output >= 0

batch_size = 100
alpha, iterations = (0.001, 300)
pixels_per_image, num_labels, hidden_size = (784, 10, 100)

weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1
weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1

In [33]:
for j in range(iterations):
  error, correct_cnt = (0.0, 0)

  for i in range(int(len(images)/batch_size)):
    batch_start, batch_end = ((i * batch_size), ((i+1) * batch_size))
    layer_0 = images[batch_start:batch_end] #change here
    layer_1 = relu(np.dot(layer_0, weights_0_1))

    dropout_mask = np.random.randint(2, size=layer_1.shape)
    layer_1 *= dropout_mask * 2
    layer_2 = np.dot(layer_1, weights_1_2)

    error += np.sum((labels[batch_start:batch_end] - layer_2) ** 2) #mse
    for k in range(batch_size):
      correct_cnt += int(np.argmax(layer_2[k:k+1]) == \
                        np.argmax(labels[batch_start+k:batch_start+k+1]))

      layer_2_delta = (labels[batch_start:batch_end] - layer_2)/batch_size
      layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)

      # dropout
      layer_1_delta *= dropout_mask

      weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
      weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)

  if(j % 10 == 0 or j == iterations-1):
    test_error, test_correct_cnt = (0.0, 0)

    for i in range(len(test_images)):
      layer_0 = test_images[i:i+1]
      layer_1 = relu(np.dot(layer_0, weights_0_1))
      layer_2 = np.dot(layer_1, weights_1_2)

      error += np.sum((test_labels[i:i+1]-layer_2) ** 2)
      correct_cnt += int(np.argmax(layer_2)) == np.argmax(test_labels[i:i+1])

    sys.stdout.write(" Test-Err:" + str(error/float(len(test_images)))[0:5] + \
                   " Test Acc:" + str(correct_cnt/float(len(test_images))))
    print()

 Test-Err:0.470 Test Acc:0.884
 Test-Err:0.466 Test Acc:0.8802
 Test-Err:0.464 Test Acc:0.8846
 Test-Err:0.463 Test Acc:0.8937
 Test-Err:0.463 Test Acc:0.8853
 Test-Err:0.464 Test Acc:0.8844
 Test-Err:0.462 Test Acc:0.8912
 Test-Err:0.461 Test Acc:0.8835
 Test-Err:0.466 Test Acc:0.8772
 Test-Err:0.464 Test Acc:0.8853
 Test-Err:0.465 Test Acc:0.8844
 Test-Err:0.465 Test Acc:0.8842
 Test-Err:0.458 Test Acc:0.8842
 Test-Err:0.462 Test Acc:0.8859
 Test-Err:0.459 Test Acc:0.8909
 Test-Err:0.458 Test Acc:0.8908
 Test-Err:0.458 Test Acc:0.8845
 Test-Err:0.455 Test Acc:0.8881
 Test-Err:0.451 Test Acc:0.8873
 Test-Err:0.452 Test Acc:0.8873
 Test-Err:0.454 Test Acc:0.8855
 Test-Err:0.454 Test Acc:0.8805
 Test-Err:0.452 Test Acc:0.889
 Test-Err:0.455 Test Acc:0.8911
 Test-Err:0.455 Test Acc:0.8932
 Test-Err:0.452 Test Acc:0.8972
 Test-Err:0.451 Test Acc:0.8929
 Test-Err:0.449 Test Acc:0.8924
 Test-Err:0.451 Test Acc:0.8868
 Test-Err:0.447 Test Acc:0.8897
 Test-Err:0.449 Test Acc:0.8836
