# CIFAR models evaluation

In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds

import numpy as np
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score, roc_curve

import matplotlib.pyplot as plt
import seaborn as sns

import pandas as pd

## Load CIFAR dataset

In [None]:
builder, ds_info = tfds.load('cifar10',
                             split=["train", "test[:40%]", "test[40%:]"],
                             as_supervised=True, 
                             with_info=True)

In [None]:
ds_train = builder[0]
ds_validation = builder[1]
ds_test = builder[2]

print('train size: ' + str(len(ds_train)))
print('validation size: ' + str(len(ds_validation)))
print('test size: ' + str(len(ds_test)))

## Load models

In [None]:
from models import CIFAR_Detectition_Pre_trained_Model

xception_model = CIFAR_Detectition_Pre_trained_Model('models/cifar_xception/', (96, 96))
xception_fine_model = CIFAR_Detectition_Pre_trained_Model('models/cifar_xception_fine/', (96, 96))
cnn_model = CIFAR_Detectition_Pre_trained_Model('models/cifar_cnn/', (32, 32))

**Compute predictions**

In [None]:
y_pred_xception, y_test_xception, y_pred_classes_xception = xception_model.compute_predictions(ds_test)
y_pred_xception_fine, y_test_xception_fine, y_pred_classes_xception_fine = xception_fine_model.compute_predictions(ds_test)
y_pred_cnn, y_test_cnn, y_pred_classes_cnn = cnn_model.compute_predictions(ds_test)

### Compute confusion matrix

In [None]:
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
               'dog', 'frog', 'horse', 'ship', 'truck']

In [None]:
confusion_mtx_xception = tf.math.confusion_matrix(y_test_xception, y_pred_classes_xception)
confusion_mtx_xception_fine = tf.math.confusion_matrix(y_test_xception_fine, y_pred_classes_xception_fine)
confusion_mtx_cnn = tf.math.confusion_matrix(y_test_cnn, y_pred_classes_cnn)

In [None]:
fig, axs = plt.subplots(3, 1, figsize=(15, 15), sharex=True, sharey=True)

c_cnn = sns.heatmap(confusion_mtx_cnn, annot=True, fmt='g', ax=axs[0])
c_cnn.set(xticklabels=class_names, yticklabels=class_names)
c_cnn.set_yticklabels(class_names, rotation = 45)
c_cnn.set_title('Confusion matrix - CNN model')

c_xception = sns.heatmap(confusion_mtx_xception, annot=True, fmt='g', ax=axs[1])
c_xception.set(xticklabels=class_names, yticklabels=class_names)
c_xception.set_yticklabels(class_names, rotation = 45)
c_xception.set_title('Confusion matrix - Xception model (without fine-tuning)')

c_xception_fine = sns.heatmap(confusion_mtx_xception_fine, annot=True, fmt='g', ax=axs[2])
c_xception_fine.set(xticklabels=class_names, yticklabels=class_names)
c_xception_fine.set_yticklabels(class_names, rotation = 45)
c_xception_fine.set_title('Confusion matrix - Xception model (with fine-tuning)')


fig.savefig('confusion_matrix.png')

### Make a report

**Xception report**

In [None]:
report = classification_report(y_test_xception, y_pred_classes_xception, target_names=class_names, output_dict=True)
df_xception = pd.DataFrame(report).transpose()
df_xception

In [None]:
df_xception.to_csv('report_xception.csv')

**Xception fine-tuning**

In [None]:
report = classification_report(y_test_xception_fine, y_pred_classes_xception_fine, target_names=class_names, output_dict=True)
df_xception_fine = pd.DataFrame(report).transpose()
df_xception_fine

In [None]:
df_xception_fine.to_csv('report_xception_fine.csv')

**CNN model**

In [None]:
report = classification_report(y_test_cnn, y_pred_classes_cnn, target_names=class_names, output_dict=True)
df_cnn = pd.DataFrame(report).transpose()
df_cnn

In [None]:
df_cnn.to_csv('report_cnn.csv')

### Comparatie ROC-AUC Curve for each class

In [None]:
def plot_ax(y_pred_xception, y_test_xception, y_pred_xception_fine, y_test_xception_fine, y_pred_cnn, y_test_cnn, ax, i, label):

    fpr_xception, tpr_xception, thresh_xception = roc_curve(y_test_xception, y_pred_xception[:,i], pos_label=i)
    fpr_xception_fine, tpr_xception_fine, thresh_xception_fine = roc_curve(y_test_xception_fine, y_pred_xception_fine[:,i], pos_label=i)
    fpr_cnn, tpr_cnn, thresh_cnn = roc_curve(y_test_cnn, y_pred_cnn[:,i], pos_label=i)
    
    ax.plot(fpr_xception_fine, tpr_xception_fine, color='blue', label='Xception model (fine-tuning)')
    ax.plot(fpr_xception, tpr_xception, linestyle='--',color='orange', label='Xception model')
    ax.plot(fpr_cnn, tpr_cnn,color='green', label='CNN model')
    ax.set_title(label)
    ax.legend(loc='best')
    


In [None]:
fig, axs = plt.subplots(5, 2, figsize=(15, 15), sharex=True, sharey=True)

i = 0
for label in class_names:  
    
    plot_ax(y_pred_xception, y_test_xception, y_pred_xception_fine, y_test_xception_fine, y_pred_cnn, y_test_cnn, axs[int(i/2), i%2], i, label)
    i += 1

fig.suptitle('AUC- ROC Curve One vs All')

# Set common labels
fig.add_subplot(111, frameon=False)
plt.tick_params(labelcolor='none', which='both', top=False, bottom=False, left=False, right=False)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive rate')


plt.tight_layout()
plt.show()

**ROC curve over hard-predicted set**

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(15, 10), sharex=True, sharey=True)

i = 0
for label in class_names[3:7]:  
    
    plot_ax(y_pred_xception, y_test_xception, y_pred_xception_fine, y_test_xception_fine, y_pred_cnn, y_test_cnn, axs[int(i/2), i%2], i, label)
    i += 1

fig.suptitle('AUC- ROC Curve One vs All')

# Set common labels
fig.add_subplot(111, frameon=False)
plt.tick_params(labelcolor='none', which='both', top=False, bottom=False, left=False, right=False)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive rate')


plt.tight_layout()
fig.savefig('auc_roc_curve.png')
plt.show()

## Cohen Kappa score

**CNN**

In [None]:
from sklearn.metrics import cohen_kappa_score

cohen_kappa_score(y_test_cnn, y_pred_classes_cnn)

**Xception (without fine-tuning)**

In [None]:
cohen_kappa_score(y_test_xception, y_pred_classes_xception)

**Xception (with fine-tuning)**

In [None]:
cohen_kappa_score(y_test_xception_fine, y_pred_classes_xception_fine)