# MNIST Part(III)

## using

In [46]:
#DO NOT IMPORT ANY OTHER LIBRARY
#Run these cells as it is
import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error,mean_squared_error
from sklearn.preprocessing import MinMaxScaler
import math
from tqdm import tqdm
from copy import deepcopy
import seaborn as sns
#!pip install plotly
try:
    import plotly.graph_objects as go
except:
    !pip install plotly # Or install the plotly library manually
    import plotly.graph_objects as go

# required only for jupyter notebooks / while running on colab
%matplotlib inline

# set seed
seed = 1
np.random.seed(seed)

## functions:

In [47]:
def Initialization(I, H1, H2, O):
    

    
    #START TODO ################
    
    W1 = np.random.randn(I,H1) * 0.1
    b1 = np.zeros((1, H1))

    W2 = np.random.randn(H1, H2) * 0.1
    b2 = np.zeros((1, H2))

    W3 = np.random.randn(H2,O) * 0.1
    b3 = np.zeros((1, O))
    
    return{
        "W1":W1,"b1":b1,
        "W2":W2,"b2":b2,
        "W3":W3,"b3":b3,
    } 

    
    #END TODO ###################
    raise NotImplementedError()

In [48]:
def Split_X_y_MNIST(df):
    X=df.iloc[:,1:].values/225
    Y=df.iloc[:,0].values
    return X,Y

In [49]:
def one_hot_encoding(y,num_class=10):
    return np.eye(10)[y]

In [50]:
def relu(x):
    return np.maximum(0, x)

def drelu(x):
    return (x > 0).astype(float)

In [51]:
def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

In [52]:
def forward(x,params):
    
    z1 = np.dot(x, params['W1']) +params['b1']
    a1 = relu(z1)
    
    z2 = np.dot(a1, params['W2']) + params['b2']
    a2 = relu(z2)
    
    z3 = np.dot(a2, params['W3']) + params['b3']
    a3 = softmax(z3)

    
    return {"z1": z1,"a1": a1,
            "z2": z2,"a2": a2,
            "z3": z3,"a3": a3
             }

In [53]:
def compute_loss(y_true, y_pred):
    #cross entropy
    epsilon = 1e-15
    y_pred = np.clip(y_pred, epsilon, 1. - epsilon)
    
    
    loss = -np.sum(y_true * np.log(y_pred)) / y_true.shape[0]
    
    return loss


In [54]:
def backward(x, y_true, l,params):
    m = x.shape[0]
    dz3 = l['a3'] - y_true
    dW3 = np.dot(l['a2'].T, dz3) / m
    db3 = np.sum(dz3, axis=0, keepdims=True) / m

    dz2 = np.dot(dz3,params['W3'].T) * drelu(l['z2'])
    dW2 = np.dot(l['a1'].T, dz2) / m
    db2 = np.sum(dz2, axis=0, keepdims=True) / m

    dz1 = np.dot(dz2, params['W2'].T) * drelu(l['z1'])
    dW1 = np.dot(x.T, dz1) / m
    db1 = np.sum(dz1, axis=0, keepdims=True) / m

    return{ "dW1":dW1,"db1": db1,"dW2": dW2,"db2": db2,"dW3": dW3,"db3": db3}


In [55]:
def regularization_L2(lmbda, W1, W2, W3, m):
    '''
    Inputs:
    - lmbda: Regularization parameter
    - W1, W2, W3, W4, W5: numpy arrays containing weights of the model
    - m: no of examples

    Outputs:
    - total_reg_loss: sum of L2 regularization loss of each layer
    '''

    # START TODO ################
    L2_cost=0
    L2_cost+=np.sum(np.square(W1))
    L2_cost+=np.sum(np.square(W2))
    L2_cost+=np.sum(np.square(W3))
    
    L2_cost=(lmbda/(2*m))*L2_cost
    return L2_cost
    # END TODO ###################
    raise NotImplementedError()

In [56]:
import numpy as np
import pandas as pd
import json

train_df = pd.read_csv("mnist_train.csv")
test_df = pd.read_csv("mnist_test.csv")

x_train, y_train = Split_X_y_MNIST(train_df)
x_test, y_test = Split_X_y_MNIST(test_df)

x_train = x_train.astype(np.float32).reshape(-1, 28 * 28)
x_test = x_test.astype(np.float32).reshape(-1, 28 * 28)

y_train = one_hot_encoding(y_train, 10)
y_test = one_hot_encoding(y_test, 10)

input_size = 28 * 28
hidden1_size = 256
hidden2_size = 64
output_size = 10
#init the weights and bias
params=Initialization(input_size,hidden1_size,hidden2_size,output_size)

learning_rate = 0.005
epochs = 50
batch_size = 64
lmbda=0.01




best_loss=1000
best_params=None

for epoch in range(epochs):
    #shuffle a data 
    indices = np.random.permutation(x_train.shape[0])
    x_train_shuffled = x_train[indices]
    y_train_shuffled = y_train[indices]
    loss=0
    num_iterate=0
    for i in range(0, x_train.shape[0], batch_size):
        x_batch = x_train_shuffled[i:i + batch_size]
        y_batch = y_train_shuffled[i:i + batch_size]

       
        l = forward(x_batch,params)
        a1=l["a1"]
        a2=l["a2"]
        a3=l["a3"]
        z1=l["z1"]
        z2=l["z2"]
        z3=l["z3"]

        loss += compute_loss(y_batch, a3)
        loss+=regularization_L2(lmbda,params['W1'],params['W2'],params['W3'],x_train.shape[0])

        num_iterate+=1
        grads= backward(x_batch, y_batch,l,params)

       
        params['W1'] -= learning_rate * grads['dW1']
        params['b1'] -= learning_rate * grads['db1']
        params['W2'] -= learning_rate * grads['dW2']
        params['b2'] -= learning_rate * grads['db2']
        params['W3'] -= learning_rate * grads['dW3']
        params['b3'] -= learning_rate * grads['db3']

    loss=loss/num_iterate
    print(f"epoch {epoch+1}/{epochs}, loss: {loss:.4f}")
    if(best_loss>loss):
        best_params=params


l = forward(x_test,best_params)
predictions = np.argmax(l['a3'], axis=1)
accuracy = np.mean(predictions == np.argmax(y_test, axis=1))


print(f"Test Accuracy: {accuracy * 100:.2f}%")

l=forward(x_train,best_params)
predictions = np.argmax(l['a3'], axis=1)
accuracy = np.mean(predictions == np.argmax(y_train, axis=1))

print(f"Train Accuracy: {accuracy * 100:.2f}%")






epoch 1/50, loss: 1.0911
epoch 2/50, loss: 0.4633
epoch 3/50, loss: 0.3707
epoch 4/50, loss: 0.3272
epoch 5/50, loss: 0.2983
epoch 6/50, loss: 0.2771
epoch 7/50, loss: 0.2597
epoch 8/50, loss: 0.2450
epoch 9/50, loss: 0.2324
epoch 10/50, loss: 0.2215
epoch 11/50, loss: 0.2115
epoch 12/50, loss: 0.2026
epoch 13/50, loss: 0.1944
epoch 14/50, loss: 0.1870
epoch 15/50, loss: 0.1800
epoch 16/50, loss: 0.1738
epoch 17/50, loss: 0.1680
epoch 18/50, loss: 0.1627
epoch 19/50, loss: 0.1576
epoch 20/50, loss: 0.1528
epoch 21/50, loss: 0.1486
epoch 22/50, loss: 0.1443
epoch 23/50, loss: 0.1403
epoch 24/50, loss: 0.1366
epoch 25/50, loss: 0.1332
epoch 26/50, loss: 0.1298
epoch 27/50, loss: 0.1265
epoch 28/50, loss: 0.1234
epoch 29/50, loss: 0.1206
epoch 30/50, loss: 0.1179
epoch 31/50, loss: 0.1150
epoch 32/50, loss: 0.1125
epoch 33/50, loss: 0.1101
epoch 34/50, loss: 0.1077
epoch 35/50, loss: 0.1052
epoch 36/50, loss: 0.1032
epoch 37/50, loss: 0.1010
epoch 38/50, loss: 0.0989
epoch 39/50, loss: 0.

In [57]:
import numpy as np
import tkinter as tk
from PIL import Image, ImageDraw, ImageOps
import pandas as pd
import json


def load_trained_model():
    

    if(best_params==None):
        with open("best_params.json",'r') as file:
            data=json.load(file)
            return data
    else:
        return best_params


def relu(x):
    return np.maximum(0, x)

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)


def forward(x, model_params):
    W1, b1, W2, b2, W3, b3 = model_params["W1"], model_params["b1"], model_params["W2"], model_params["b2"], model_params["W3"], model_params["b3"]
    
    z1 = np.dot(x, W1) + b1
    a1 = relu(z1)
    
    z2 = np.dot(a1, W2) + b2
    a2 = relu(z2)
    
    z3 = np.dot(a2, W3) + b3
    a3 = softmax(z3)
    
    return a3 


def predict_digit(img_array):
    
    model_params = load_trained_model()
    output = forward(img_array, model_params)
    return np.argmax(output, axis=1)[0]

class DigitRecognizerApp:
    def __init__(self, root):
        self.root = root
        self.root.title("MNIST Digit Recognizer")

        self.canvas = tk.Canvas(self.root, width=280, height=280, bg='white')
        self.canvas.pack()

        self.button_frame = tk.Frame(self.root)
        self.button_frame.pack()

        self.predict_button = tk.Button(self.button_frame, text="Predict", command=self.predict)
        self.predict_button.pack(side=tk.LEFT, padx=10)

        self.clear_button = tk.Button(self.button_frame, text="Clear", command=self.clear_canvas)
        self.clear_button.pack(side=tk.LEFT, padx=10)

        self.label = tk.Label(self.root, text="Draw a digit and press Predict")
        self.label.pack()

        self.canvas.bind("<B1-Motion>", self.draw)

        self.image = Image.new("L", (280, 280), color=255)
        self.draw_img = ImageDraw.Draw(self.image)

    def draw(self, event):
        x, y = event.x, event.y
        r = 10
        self.canvas.create_oval(x - r, y - r, x + r, y + r, fill='black')
        self.draw_img.ellipse([x - r, y - r, x + r, y + r], fill=0)

    def clear_canvas(self):
        self.canvas.delete("all")
        self.draw_img.rectangle([0, 0, 280, 280], fill=255)
        self.label.config(text="Draw a digit and press Predict")

    def predict(self):
        img_resized = self.image.resize((28, 28))
        img_resized = ImageOps.invert(img_resized)

        img_array = np.asarray(img_resized) / 255.0
        img_array = img_array.reshape(1, 28*28).astype(np.float32)

        prediction = predict_digit(img_array)
        self.label.config(text=f"Predicted Digit: {prediction}")

if __name__ == "__main__":
    root = tk.Tk()
    app = DigitRecognizerApp(root)
    root.mainloop()
