<a href="https://colab.research.google.com/github/anishana/Text-Recognition-on-a-MNIST-dataset/blob/main/Metrics_SVM_Logistic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from google.colab import drive,files
import matplotlib.pyplot as plt
import numpy as np 
import seaborn as sns
from sklearn.metrics import confusion_matrix,accuracy_score,f1_score,roc_curve,roc_auc_score
from keras.datasets import mnist


drive.mount('/content/drive')
(x_train, y_train), (x_test, y_test) = mnist.load_data()

Mounted at /content/drive
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [None]:
def show_images(x_train):
  n = 5
  fig, axs = plt.subplots(nrows=n, ncols=n, sharex=True, sharey=True, figsize=(12, 12))
  for i in range(n**2):
    ax = axs[i // n, i % n]
    
    ax.imshow((x_train[i, :, :] + 1)/2)
    ax.axis('off')
  plt.tight_layout()
  plt.show()

In [None]:
def display_bar_graph(df_label, df_counts, label, count, title):
  fig = plt.figure(figsize=(9, 6))
  fig.tight_layout()
  ax = fig.add_subplot(111)
  ax.bar(df_labels, df_counts)
  # sns.countplot(df_counts)
  ax.set_xlabel(label)
  ax.set_ylabel(count)
  ax.set_title(title)

In [None]:
def display_accuracy_score(y_true,y_pred):
  cm = confusion_matrix(y_true=y_true, y_pred=y_pred)
  plt.figure(figsize=(9,9))
  sns.heatmap(cm, annot=True, fmt="0", linewidths=.5, square = True, cmap = 'Blues_r');
  plt.ylabel('Actual label');
  plt.xlabel('Predicted label');
  all_sample_title = 'Accuracy Score: {0}'.format(accuracy_score(y_true, y_pred))
  plt.title(all_sample_title, size = 15);

In [None]:
def example_predictions(test, result):
  a = np.random.randint(0,270,6)
  plt.figure(figsize=(20,4))
  for plotIndex,i in enumerate(a):
    plt.subplot(1, 6, plotIndex+1)
    two_d = np.reshape(test[i], (28, 28))
    #(np.reshape(test[i], (28, 28)) * 255).astype(np.uint8)
    plt.title('Predicted Label: {0}'.format(result[i]))
    plt.imshow(two_d)

In [None]:
def misclassified_images(y_test, y_pred, X_test):
  index = 0
  misclassifiedIndexes = []
  for label, predict in zip(y_test, y_pred):
      if label != predict: 
          misclassifiedIndexes.append(index)
      index +=1

  # print("misclassifiedIndexes: ", misclassifiedIndexes[0:5])
  # print(X_test.shape)
  plt.figure(figsize=(20,4))
  for plotIndex, badIndex in enumerate(misclassifiedIndexes[0:5]):
      plt.subplot(1, 5, plotIndex + 1)
      plt.imshow(np.reshape(X_test[badIndex], (28,28)))
      plt.title('Predicted: {}, Actual: {}'.format(y_pred[badIndex], y_test[badIndex]), fontsize = 15)


In [None]:
def plot_roc_curve(y_train, y_pred):
    y_train  = (y_train == 9)
    fpr, tpr, thresholds = roc_curve(y_train, y_pred)

    roc_auc = roc_auc_score(y_train, y_pred)

    plt.figure(figsize=(8,4))
    plt.plot(fpr, tpr, label = 'ROC curve (area = {})'.format(roc_auc))
    plt.plot([0,1], [0,1], 'k--')
    plt.axis([0,1,0,1])
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.legend(loc="lower right")

In [None]:
uniq_labels, uniq_counts = np.unique(y_train, return_counts=True)
# display_bar_graph(uniq_labels, uniq_counts, 'Labels', 'Counts', 'Training Data')
show_images(x_train)
plt.figure(figsize=(9,9))
sns.countplot(y_train)
plt.ylabel('Count');
plt.xlabel('Labels');
plt.title('Training Data')

In [None]:
#SVM Predictions for Balanced Dataset
list_of_models =  ['svm','logistic']
list_of_filenames = ['imbalanced','balanced','imbalanced_asym', 'balanced_asym','imbalanced_sym','balanced_sym']
for model_type in list_of_models:
  for noise_type in list_of_filenames:
    df = pd.read_csv('/content/drive/My Drive/CSE555/'+noise_type+'_'+model_type+'.csv')
    df_labels, df_counts = np.unique(df['labels'], return_counts=True)
    display_bar_graph(df_labels, df_counts, 'Labels', 'Counts', model_type+' Predictions for '+noise_type+' Dataset')
    display_accuracy_score(y_test,df['labels'])
    # example_predictions(x_test,df['labels'])
    misclassified_images(y_test,df['labels'],x_test)
    plot_roc_curve(y_test, df['labels'])