In [1]:
#library imports numpy, pandas and matplotlib
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt


In [2]:
#importing dataset using pandas
#df_train for training and df_test for testig purposes
df_train  = pd.read_csv('data/dataSet1.csv', header = None)
df_test  = pd.read_csv('data/dataSet2.csv', header = None)


In [3]:
#creating numpy arrays train and test out of pd dataframe for easier data manipulation 
train = df_train.to_numpy()
test = df_test.to_numpy()


In [4]:
#checking array shape for verfication 
train.shape


(2810, 65)

In [5]:
#creating train and test data
X_train = train[:, :64] #X_train contains all input values wich are the pixel values here
Y_train = train[:, 64] #Y_train contains all label/output values wich are the actual numbers

#similary this will be test data
X_test = test[:, :64]
Y_test = test[:, 64]


In [6]:
#defining hyperparameters 
#h is the number of neurons in the hidden layer
#training_cycles is number of cycles
#learning_rate is to control the models learning rate

h = 64
training_cycles = 1200
learning_rate = 0.001


In [7]:
#function to generate initial values for weights and biases in the network
def init_params(h):
    w1 = np.random.randn(h, 64) * np.sqrt(2. / (64))
    b1 = np.zeros((h, 1))  
    w2 = np.random.randn(10, h) * np.sqrt(2. / (h))  
    b2 = np.zeros((10, 1))  
    return w1, b1, w2, b2


In [8]:
#defineing ReLU function
def ReLU(Z):
    return np.maximum(0,Z)


In [9]:
#defineing Sigmoid function
def sigmoid(Z):
    return 1/(1+np.exp(-Z))


In [10]:
#Forward Propogation Function
def feed_forward(w1, b1, w2, b2, X):
    Z1 = w1.dot(X.T) + b1 #input layer dot product with bias added
    A1 = ReLU(Z1) #ReLU activation function
    Z2 = w2.dot(A1) + b2 #hidden layer dot product with bias added
    A2 = sigmoid(Z2) #sigmoid activation
   
    return Z1, A1, Z2, A2


In [11]:
#mapping label to an array
#ex: if label is 5 then map the 4th element as 1

def map_labels(Y):
    l1 = len(Y) #get len of the array
    l2 = 10 #10 as the numbers range from 0-9
    mapped_label = np.zeros((l2, l1), dtype=int)
    for i, label in enumerate(Y):
        mapped_label[label][i] = 1
    return mapped_label


In [12]:
#back propgation fucntion for the network to train it
def back_prop(Z1, A1, Z2, A2, w2, X, Y):
    m = Y.size  
    mapped_label = map_labels(Y)  
    
    dZ2 = A2 - mapped_label  # Error for output layer
    dw2 = (1 / m) * np.dot(dZ2, A1.T)  # Gradient of w2
    db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True) 
    
    # Backpropagation for the hidden layer (Z1 -> A1)
    dZ1 = np.dot(w2.T, dZ2) * (A1 > 0) 
    dw1 = (1 / m) * np.dot(dZ1, X) 
    db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)  
    
    return dw1, db1, dw2, db2


In [13]:
#function to update the params as we train
#alpha is the learning rate
def update_params(w1, b1, w2, b2, dw1, db1, dw2, db2, alpha):
    w1 = w1 - alpha * dw1
    b1 = b1 - alpha * db1
    w2 = w2 - alpha * dw2
    b2 = b2 - alpha * db2
    return w1, b1, w2, b2


In [14]:
#function to get predictions
def get_predictions(A2):
    return np.argmax(A2, axis=0) 

In [15]:
def get_accuracy(predictions, Y):
    mapped_label = map_labels(Y)
    accuracy = np.sum(predictions == np.argmax(mapped_label, axis=0)) / Y.size *100
    print("Predictions:", predictions)
    print("True Labels:", Y)
    return accuracy


In [16]:
def train_network(X_train, Y_train,iterations, alpha):
    w1, b1, w2, b2 = init_params(h)

    for i in range(iterations):
        Z1, A1, Z2, A2 = feed_forward(w1, b1, w2, b2, X_train)
        dw1, db1, dw2, db2 = back_prop(Z1, A1, Z2, A2, w2, X_train, Y_train)
        w1, b1, w2, b2 = update_params(w1, b1, w2, b2, dw1, db1, dw2, db2, alpha)
        
        if i % 100 == 0:
            print("Iteration:", i)
            print("Accuracy:", get_accuracy(get_predictions(A2), Y_train))
            
    return w1, b1, w2, b2

In [None]:
w1, b1, w2, b2 = train_network(X_train, Y_train,training_cycles, learning_rate)

Iteration: 0
Predictions: [5 5 9 ... 5 5 9]
True Labels: [0 0 7 ... 8 1 7]
Accuracy: 9.466192170818506
Iteration: 100
Predictions: [5 7 0 ... 5 5 0]
True Labels: [0 0 7 ... 8 1 7]
Accuracy: 11.281138790035588
Iteration: 200
Predictions: [6 7 0 ... 5 9 0]
True Labels: [0 0 7 ... 8 1 7]
Accuracy: 13.202846975088969
Iteration: 300
Predictions: [6 7 0 ... 5 9 0]
True Labels: [0 0 7 ... 8 1 7]
Accuracy: 15.836298932384341
Iteration: 400
Predictions: [6 7 2 ... 5 9 0]
True Labels: [0 0 7 ... 8 1 7]
Accuracy: 17.580071174377224
Iteration: 500
Predictions: [6 7 2 ... 5 9 0]
True Labels: [0 0 7 ... 8 1 7]
Accuracy: 19.644128113879002
Iteration: 600
Predictions: [6 7 2 ... 5 9 1]
True Labels: [0 0 7 ... 8 1 7]
Accuracy: 21.494661921708186
Iteration: 700
Predictions: [6 7 2 ... 5 9 1]
True Labels: [0 0 7 ... 8 1 7]
Accuracy: 23.594306049822062
Iteration: 800
Predictions: [0 4 2 ... 5 9 1]
True Labels: [0 0 7 ... 8 1 7]
Accuracy: 25.516014234875446
Iteration: 900
Predictions: [0 4 2 ... 4 9 1]
Tru

In [None]:
def predict(X, Y, index, w1, b1, w2, b2):
    
    Z1, A1, Z2, A2 = feed_forward(w1, b1, w2, b2, X)
    
    predictions = get_predictions(A2)
    
    predicted_label = predictions[index]
    
    actual_label = Y[index]
    
    image_data = X[index, :64].reshape(8, 8)   
    
    plt.imshow(image_data, cmap='gray')
    plt.axis('off')  # Hide the axes for a clean plot
    plt.title(f"Predicted: {predicted_label}, Actual: {actual_label}")
    plt.show()
    
    # Print the predicted and actual labels for the given index
    print(f"Index: {index}")
    print(f"Predicted Label: {predicted_label}")
    print(f"Actual Label: {actual_label}")


In [None]:
index = 91

predict(X_test, Y_test, index, w1, b1, w2, b2)
