The input training data consists of 28x28 pixel images of digits and the corresponding value of the digit.

The file is a csv file with each row having all the pixxel values (between 0 and 254) and the value of the digit corresponding to the pixxle image.

There are therefore 785 columns in each row (28*28+1)
There are 42K rows corresponding to 42K training images.
Lets explore the data below.

In [131]:
# importing necessary libraries
import numpy as np
import pandas as pd

In [132]:
#Reading the data
train_data = pd.read_csv("/content/train_digit.csv")
test_data = pd.read_csv("/content/test_digit.csv")

In [133]:
train_data.shape

(42000, 785)

In [134]:
train_data.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [135]:
# Setting up the data and converting the data into a numpy array
y0 = train_data['label'].values
X0 = train_data.drop(columns=['label']).values/255

In [136]:
#Set a random seed for reproducibility
np.random.seed(42)

# Shuffle the indices
indices = np.arange(X0.shape[0])
np.random.shuffle(indices)

# Split ratio (e.g., 80% train, 20% test)
train_ratio = 0.8
train_size = int(train_ratio * len(indices))

# Split indices into training and testing sets
train_indices = indices[:train_size]
test_indices = indices[train_size:]

# Use the indices to create the train/test sets
X_train = X0[train_indices]
X_test = X0[test_indices]
y_train = y0[train_indices]
y_test = y0[test_indices]

# Now X_train, X_test, y_train, and y_test are ready to be used.

In [137]:
print("Shapes of input variables")
print('X train =',X_train.shape)
print('y train =',y_train.shape)
print('X test =',X_test.shape)
print('y test =',y_test.shape)

Shapes of input variables
X train = (33600, 784)
y train = (33600,)
X test = (8400, 784)
y test = (8400,)


In [138]:
def one_hot_encoding(y):
  one_hot_y = np.zeros((y.shape[0],10))
  for i in range(y.shape[0]):
    one_hot_y[i,y[i]]=1
  return one_hot_y

In [140]:
# Testing the one hot encoding
one_hot_y = one_hot_encoding(y_train)
print(y_train[2])
print(one_hot_y[2])

9
[0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]


In [141]:
X_train.shape,one_hot_y.shape

((33600, 784), (33600, 10))

In [160]:
def neuralnet_shape(X, Y, n_h):
    Y_one = one_hot_encoding(Y)
    n_x = X.shape[1] #size of the first layer
    n_h = n_h # Taking the size of the hidden layer to be 15
    n_y = Y_one.shape[1] # size of output layer
    print('shape of input_layer_neurons',n_x)
    print('shape of hidden_layer_neuron' ,n_h)
    print('shape of ouput_layer_neurons',n_y)
    return (n_x,n_h,n_y)

In [161]:
n_x,n_h,n_y = neuralnet_shape(X_train,y_train,15)

shape of input_layer_neurons 784
shape of hidden_layer_neuron 15
shape of ouput_layer_neurons 10


In [162]:
def initialize_parameters(n_x, n_h, n_y):

    # Initialization of the the weights and biases for the neurons in the hidden layer
    w1 = np.random.randn(n_h, n_x) * 0.1 # Random weights
    b1= np.zeros((n_h, 1)) # Zero biases

    # Initialization the weights and biases for the neurons in the output layer
    w2 = np.random.randn(n_y, n_h) * 0.1 # Random weights
    b2 = np.zeros((n_y, 1)) # Zero biases


    parameters = {"w1": w1,
                  "b1": b1,
                  "w2": w2,
                  "b2": b2}

    return parameters


In [163]:
parameters = initialize_parameters(n_x,n_h,n_y)

In [164]:
# sigmoid function

def sigmoid(z):
    s = 1 / (1 + np.exp(-z))
    return s

In [165]:
def forward_propagation(X, parameters):

    w1=parameters['w1']
    b1=parameters['b1']
    w2= parameters['w2']
    b2=parameters['b2']

    z1 = np.dot(w1, X.T) + b1
    a1 = np.tanh(z1)
    z2 = np.dot(w2, a1) + b2
    a2 = sigmoid(z2)

    cache =  {"z1": z1,
              "a1": a1,
              "z2": z2,
              "a2": a2}

    return a2, cache

In [166]:
a2, cache = forward_propagation(X_train,parameters)

In [167]:
def cost_function(a2, Y, parameters):

    m = Y.shape[0]
    Y_one = one_hot_encoding(Y)

    # Calculate Costs
    logs = np.multiply(np.log(a2), Y_one.T) + np.multiply((1 - Y_one.T), np.log(1 - a2))
    cost = - np.sum(logs) / m  # sum of all the elements of the matrix. Summation of a single row gives cost for a single image. Summation of columns gives the total cost for all the images
    cost = float(np.squeeze(cost))

    return cost

In [168]:
cost1  = cost_function(a2,y_train,parameters)

In [169]:
print(cost1)

6.99813621307548


In [170]:
def backward_propagation(parameters, cache , X, Y):
    m = X.shape[0]

    Y_one = one_hot_encoding(Y)

    w1 = parameters['w1']
    w2 = parameters['w2']
    a1 = cache['a1']
    a2 = cache['a2']

    dz2 = a2 - Y_one.T
    dw2 = (1 / m) * np.dot(dz2, a1.T)
    db2 = (1 / m) * np.sum(dz2, axis=1, keepdims=True)
    dz1 = np.multiply(np.dot(w2.T, dz2), 1 - np.power(a1, 2))
    dw1 = (1 / m) * np.dot(dz1, X)
    db1 = (1 / m) * np.sum(dz1, axis=1, keepdims=True)

    grads = {"dw1":dw1, "db1": db1, "dw2": dw2,"db2": db2}

    return grads

In [171]:
def update_parameters(parameters, grads):
    alpha=0.09

    w1 = parameters['w1']
    b1 = parameters['b1']
    w2 = parameters['w2']
    b2 = parameters['b2']

    dw1 = grads['dw1']
    db1 = grads['db1']
    dw2 = grads['dw2']
    db2 = grads['db2']

    w1 = w1 - alpha * dw1
    b1 = b1 - alpha * db1
    w2 = w2 - alpha * dw2
    b2 = b2 - alpha * db2

    parameters = {"w1": w1, "b1": b1,"w2": w2,"b2": b2}

    return parameters

In [172]:
def nn_model(X,Y,n_h,num_iterations):
    n_x,n_h,n_y = neuralnet_shape(X,Y,n_h)
    parameters = initialize_parameters(n_x,n_h,n_y)
    for i in range(0,num_iterations):

        a2,cache = forward_propagation(X,parameters)
        cost = cost_function(a2,Y,parameters)
        grads = backward_propagation(parameters,cache,X,Y)
        parameters = update_parameters(parameters,grads)

        if i%100==0:
            print('Cost after iteration %i : %f'%(i,cost))
    return parameters

In [173]:
parameters = nn_model(X_train,y_train,15,1800)

shape of input_layer_neurons 784
shape of hidden_layer_neuron 15
shape of ouput_layer_neurons 10
Cost after iteration 0 : 6.990952
Cost after iteration 100 : 2.594073
Cost after iteration 200 : 2.015274
Cost after iteration 300 : 1.675138
Cost after iteration 400 : 1.426215
Cost after iteration 500 : 1.238785
Cost after iteration 600 : 1.102211
Cost after iteration 700 : 1.003864
Cost after iteration 800 : 0.930868
Cost after iteration 900 : 0.874582
Cost after iteration 1000 : 0.829741
Cost after iteration 1100 : 0.793044
Cost after iteration 1200 : 0.762357
Cost after iteration 1300 : 0.736242
Cost after iteration 1400 : 0.713698
Cost after iteration 1500 : 0.694003
Cost after iteration 1600 : 0.676619
Cost after iteration 1700 : 0.661136


In [177]:
def prediction(parameters, X):
    a2, cache = forward_propagation(X, parameters)
    return np.argmax(a2, 0) # converting the a2 predictions (one hot encoded matrix) into numerical digits

In [178]:
predicted_values = prediction(parameters, X_test)

correct_predictions = np.sum(predicted_values == y_test)
total_digits = y_test.size

In [179]:
print('Correct Predictions: ' + str(correct_predictions))
print('No. of digits tested: ' + str(total_digits))
print('Accuracy: ' + str(np.round((correct_predictions / total_digits) * 100, 1)) + '%')

Correct Predictions: 7608
No. of digits tested: 8400
Accuracy: 90.6%
