<a href="https://colab.research.google.com/github/gunjanvinzuda/handwritten_digit_recognition/blob/main/network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
#import libraries
from keras.datasets import mnist
import numpy as np
import random

In [3]:
(x_train0,y_train0), (x_test0, y_test0) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [4]:
#validation dataset containig 10000 examples
x_val = x_train0[:10000].copy()
y_val = y_train0[:10000].copy()
#training dataset containig 50000 examples
x_train = x_train0[10000:].copy()
y_train = y_train0[10000:].copy()
#testing dataset containig 10000 examples
x_test = x_test0.copy()
y_test = y_test0.copy()

print(len(x_train))
print(len(x_val))
print(len(x_test))

50000
10000
10000


In [5]:
#fucntion for converting digit to vector form
def vectorized(j):
  e = np.zeros((10,1))
  e[j] = 1.0
  return e

In [6]:
x_val = [x.reshape(-1,1)/255 for x in x_val ]
x_train =  [x.reshape(-1,1)/255 for x in x_train ]
x_test =  [x.reshape(-1,1)/255 for x in x_test ]

y_train =  [vectorized(y) for y in y_train ]#vectorizing digits for training dataset

In [7]:
#making (x,y) pair for every dataset
training_data = [(x,y) for x,y in zip(x_train, y_train)]
test_data = [(x,y) for x,y in zip(x_test, y_test)]
val_data = [(x,y) for x,y in zip(x_val, y_val)]

In [8]:
#nueral network made of 3 layers: 1(input) layer-784 nuerons, 2(hidden) layer-50 nuerons, 3(output) layer - 10 nuerons for 10 digits
layers = [784,50,10]
mini_batch_size = 10
epochs = 100
eta = 0.5#learning rate

n_train = len(y_train)
n_val = len(y_val)
n_test = len(y_test)

In [9]:
weights = [np.random.randn(y,x)/np.sqrt(x) for x,y in zip(layers[:-1],layers[1:])]
biases = [np.random.randn(x,1) for x in layers[1:]]

In [10]:
#sigmoid function
def sigmoid(z):
  return 1.0/(1.0+np.exp(-z))
#derivative of sigmoid function
def sigmoid_prime(z):
  return sigmoid(z)*(1-sigmoid(z))

def feedforward(a):
  for w,b in zip(weights,biases):
    a  = sigmoid(np.dot(w,a)+b)
  return a

def backprop(x,y):
  nabla_w = [np.zeros(w.shape) for w in weights]
  nabla_b = [np.zeros(b.shape) for b in biases]
  a = x
  activations = [a]
  Zs = []
  for w,b in zip(weights,biases):
    z = np.dot(w,a) + b
    Zs.append(z)
    a = sigmoid(z)
    activations.append(a)
  
  delta = (activations[-1]-y)*sigmoid_prime(Zs[-1])
  nabla_b[-1] = delta
  nabla_w[-1] = np.dot(delta, activations[-2].transpose())

  for l in range(2,len(layers)):    
    delta = np.dot(weights[1-l].transpose(), delta)*sigmoid_prime(Zs[-l])
    nabla_b[-l] = delta
    nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
  return (nabla_w, nabla_b)

def update(mini_batch):
  global weights, biases
  nabla_w = [np.zeros(w.shape) for w in weights]
  nabla_b = [np.zeros(b.shape) for b in biases]

  for x,y in mini_batch:
    delta_nabla_w, delta_nabla_b = backprop(x,y)
    nabla_w = [nw+dnw for nw,dnw in zip(nabla_w, delta_nabla_w)]
    nabla_b = [nb+dnb for nb,dnb in zip(nabla_b, delta_nabla_b)]
  
  weights = [w-(eta/len(mini_batch))*nw for w, nw in zip(weights,nabla_w)]
  biases = [b-(eta/len(mini_batch))*nb for b, nb in zip(biases,nabla_b)]

def evaluate(data):
  results = [(np.argmax(feedforward(x)), y) for (x,y) in data]    
  return sum([int(x == y) for (x, y) in results])

In [11]:
#training
for j in range(1,epochs+1):
  random.shuffle(training_data)
  mini_batches = [training_data[k:k+mini_batch_size] for k in range(0, n_train, mini_batch_size)]
  for mini_batch in mini_batches:
    update(mini_batch)
  print(f"Epoch {j}: {evaluate(val_data)} / {n_test} ")

Epoch 1: 9258 / 10000 
Epoch 2: 9384 / 10000 
Epoch 3: 9456 / 10000 
Epoch 4: 9527 / 10000 
Epoch 5: 9560 / 10000 
Epoch 6: 9563 / 10000 
Epoch 7: 9600 / 10000 
Epoch 8: 9607 / 10000 
Epoch 9: 9609 / 10000 
Epoch 10: 9635 / 10000 
Epoch 11: 9641 / 10000 
Epoch 12: 9634 / 10000 
Epoch 13: 9656 / 10000 
Epoch 14: 9661 / 10000 
Epoch 15: 9644 / 10000 
Epoch 16: 9667 / 10000 
Epoch 17: 9658 / 10000 
Epoch 18: 9679 / 10000 
Epoch 19: 9678 / 10000 
Epoch 20: 9684 / 10000 
Epoch 21: 9689 / 10000 
Epoch 22: 9686 / 10000 
Epoch 23: 9683 / 10000 
Epoch 24: 9697 / 10000 
Epoch 25: 9690 / 10000 
Epoch 26: 9693 / 10000 
Epoch 27: 9692 / 10000 
Epoch 28: 9694 / 10000 
Epoch 29: 9705 / 10000 
Epoch 30: 9703 / 10000 
Epoch 31: 9701 / 10000 
Epoch 32: 9712 / 10000 
Epoch 33: 9690 / 10000 
Epoch 34: 9690 / 10000 
Epoch 35: 9689 / 10000 
Epoch 36: 9696 / 10000 
Epoch 37: 9698 / 10000 
Epoch 38: 9698 / 10000 
Epoch 39: 9700 / 10000 
Epoch 40: 9708 / 10000 
Epoch 41: 9700 / 10000 
Epoch 42: 9705 / 10000 
E

In [12]:
#testing
print(f"{evaluate(test_data)}")

9696


In [13]:
import pickle
#saving to file
data = {"layers" : layers,
        "weights" :  weights,
        "biases" : biases  
}

f = open("model.txt","wb")
pickle.dump(data,f)
f.close()