In [32]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.metrics import roc_curve, auc, roc_auc_score,confusion_matrix,classification_report
from sklearn.preprocessing import label_binarize

In [None]:
df=pd.read_csv("mnist_train.csv")
df_test=pd.read_csv("mnist_test.csv")

#Training dataset Input and labels
Y=np.array(df["label"])
X=np.array(df.loc[0:,"1x1":])
X=X.T
X=X/255

#Test dataset Input and Labels
Y_test=np.array(df_test["label"])
X_test=np.array(df_test.loc[0:,"1x1":])
X_test=X_test.T
X_test=X_test/255

In [None]:
def one_hot(Y,m):
    Y_hotCoded=np.zeros((10,m)).T
    Y_hotCoded[range(m),Y]=1
    Y_hotCoded=Y_hotCoded.T

    return Y_hotCoded

Y_hotCoded=one_hot(Y,Y.size)

In [None]:
def initiate_parameters(X):
    n_x=X.shape[0]

    W1=np.random.randn(512,n_x)*0.01
    b1=np.zeros((512,1))
    W2=np.random.randn(256,512)*0.01
    b2=np.zeros((256,1))
    W3=np.random.randn(10,256)*0.01
    b3=np.zeros((10,1))


    parameters={"W1":W1,
                "b1":b1,
                "W2":W2,
                "b2":b2,
                "W3":W3,
                "b3":b3}

    return parameters


In [None]:
def relu(x):
    A=np.maximum(0,x)

    return A

def derive_relu(x):
    return x>0

In [None]:
def softmax_activation(x):
    x=x-x.max(axis=0)
    x_e=np.exp(x)
    x_summed=np.sum(x_e,axis=0,keepdims=True)
    A=x_e/x_summed

    return A

In [None]:
def forward_propagation(X,parameters):
    W1=parameters["W1"]
    b1=parameters["b1"]
    W2=parameters["W2"]
    b2=parameters["b2"]
    W3=parameters["W3"]
    b3=parameters["b3"]


    Z1=W1@X+b1
    A1=relu(Z1)
    Z2=W2@A1+b2
    A2=relu(Z2)
    Z3=W3@A2+b3
    A3=softmax_activation(Z3)

    cache={"Z1":Z1,
    "A1":A1,"Z2":Z2,"A2":A2,"Z3":Z3,"A3":A3}

    return A3,cache

In [None]:
def calculate_cost(Y,A3):
    m=A3.shape[1]
    A3=A3.T
    A3=np.clip(A3,1e-7,1-1e-7)
    confidences=A3[range(m),Y]
    log_likelihood=-np.log(confidences)
    cost=np.mean(log_likelihood)

    return cost

In [None]:
def backward_propagation(X,Y,parameters,cache):
    m=X.shape[1]
    W1=parameters["W1"]
    b1=parameters["b1"]
    W2=parameters["W2"]
    b2=parameters["b2"]
    W3=parameters["W3"]
    b3=parameters["b3"]

    Z1=cache["Z1"]
    A1=cache["A1"]
    Z2=cache["Z2"]
    A2=cache["A2"]
    Z3=cache["Z3"]
    A3=cache["A3"]

    dZ3=A3-Y
    dW3=1/m*dZ3@(A2.T)
    db3=1/m*np.sum(dZ3,axis=1,keepdims=True)
    dZ2=W3.T@dZ3*(derive_relu(Z2))
    dW2=1/m*dZ2@A1.T
    db2=1/m*np.sum(dZ2,axis=1,keepdims=True)
    dZ1=W2.T@dZ2*(derive_relu(Z1))
    dW1=1/m*dZ1@X.T
    db1=1/m*np.sum(dZ1,axis=1,keepdims=True)


    gradient={"dW1":dW1,
              "db1":db1,
              "dW2":dW2,
              "db2":db2,
              "dW3":dW3,
              "db3":db3}

    return gradient

In [None]:
def update_parameters(parameters,gradient,alpha=0.4):
    W1=parameters["W1"]
    b1=parameters["b1"]
    W2=parameters["W2"]
    b2=parameters["b2"]
    W3=parameters["W3"]
    b3=parameters["b3"]
    

    dW1=gradient["dW1"]
    db1=gradient["db1"]
    dW2=gradient["dW2"]
    db2=gradient["db2"]
    dW3=gradient["dW3"]
    db3=gradient["db3"]

    W1=W1-alpha*dW1
    W2=W2-alpha*dW2
    W3=W3-alpha*dW3
    b1=b1-alpha*db1
    b2=b2-alpha*db2
    b3=b3-alpha*db3

    parameters={"W1":W1,
                "b1":b1,
                "W2":W2,
                "b2":b2,
                "W3":W3,
                "b3":b3}
    
    return parameters

In [None]:
def predict(X,parameters):
    A2,cache=forward_propagation(X,parameters)
    prediction=np.argmax(A2,axis=0)
    
    return prediction

def accuracy(prediction,Y):
    return float(np.sum(Y==prediction)/Y.size*100)

In [None]:
def gradient_descent(X,Y,Y_hotCoded,iterations=1000):
    parameters=initiate_parameters(X)
    cost=0
    for i in range(iterations):
        A2,cache=forward_propagation(X,parameters)
        cost=calculate_cost(Y,A2)
        gradient=backward_propagation(X,Y_hotCoded,parameters,cache)
        parameters=update_parameters(parameters,gradient)

    return float(cost),parameters

In [None]:
def mini_batch_GD(X,Y,num_epochs=15):
    parameters=initiate_parameters(X)
    X=X.T
    cost=[]
    accuracy_=[]
    for epoch in range(num_epochs):
        indices = np.random.permutation(len(X))
        X_train_shuffled = X[indices]
        y_train_shuffled = Y[indices]
        batch_size=100
        cost_epoch=0
        
        for i in range(0, len(X), batch_size):
            X_batch = X_train_shuffled[i:i+batch_size].T
            y_batch = y_train_shuffled[i:i+batch_size]
            Y_hotCoded=one_hot(y_batch,y_batch.shape[0])

            A2,cache=forward_propagation(X_batch,parameters)
            cost_epoch=calculate_cost(y_batch,A2)
            gradient=backward_propagation(X_batch,Y_hotCoded,parameters,cache)
            parameters=update_parameters(parameters,gradient)

        cost.append(float(cost_epoch))
        prediction=predict(X.T,parameters)
        accuracy_.append(accuracy(prediction,Y))

    return cost,parameters,accuracy_

In [None]:
cost,parameters,accuracy_=mini_batch_GD(X,Y)
pred_prob_test=forward_propagation(X_test,parameters)[0].T
print(cost[-1],accuracy_[-1])


prediction_test=predict(X_test,parameters)
accuracy(prediction_test,Y_test)

In [33]:
cm_train=confusion_matrix(Y,predict(X,parameters))
cr_train=classification_report(Y,predict(X,parameters))

cm_test=confusion_matrix(Y_test,prediction_test)
cr_test=classification_report(Y_test,prediction_test)

In [None]:
def per_class_auc(y_true, y_pred, classes):
    y_true_bin = label_binarize(y_true, classes=classes)
    aucs = np.array([roc_auc_score(y_true_bin[:, i], y_pred[:, i]) for i in range(len(classes))])
    return np.round(aucs,3)

In [None]:
from itertools import cycle
from sklearn.metrics import auc

def plot_roc_curves(y_true, y_score, classes, title):
    y_true_bin = label_binarize(y_true, classes=classes)
    plt.figure(figsize=(6,4))
    for i, color in zip(range(len(classes)), cycle(['aqua','darkorange','cornflowerblue','green'])):
        fpr, tpr, _ = roc_curve(y_true_bin[:, i], y_score[:, i])
        roc_auc = auc(fpr, tpr)
        plt.plot(fpr, tpr, color=color, lw=2, label=f'Class {classes[i]} (AUC = {roc_auc:.3f})')
    plt.plot([0,1], [0,1], 'k--', lw=1)
    plt.xlim([0.0, 1.0]); plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate'); plt.ylabel('True Positive Rate')
    plt.title(title)
    plt.legend(loc="lower right")
    plt.show()

In [None]:

aucs=np.array(per_class_auc(Y_test,pred_prob_test,classes=range(10)))
print(f"Neural Network AUCs: {aucs}")
plot_roc_curves(Y_test,pred_prob_test,classes=range(10),title="Neural Network ROC- Test")


In [None]:
from PIL import Image

n=624
pixels=X_test.T[n].reshape(28,28)
pixels_uint8 = (pixels * 255).astype(np.uint8)
img = Image.fromarray(pixels_uint8, mode='L')  # 'L' mode is for grayscale
print(prediction_test[n])
img