 #  Generate confusion matrices on validation data from uncertainty assessment
 

## Import libraries

In [None]:
# Put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
# setup CUDA_VISIBLE DEVICES for titan.sci.utah.edu
#import os
#os.environ["CUDA_VISIBLE_DEVICES"] = "0"


In [None]:
#Import libraries - fastai_v1

from fastai.vision import *
from fastai.metrics import error_rate

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.utils.multiclass import unique_labels

from fastai.basic_train import DatasetType
from fastai.torch_core import to_np
import torch


## I/O and hyper parameters

In [None]:
# Parameters and hyper-parameters


# CSV file contains validation dataset only (synthetic data)
csv_val_FileName = 'Dataset_TargetClass_Overlap-9Blocks_25000xOnly_shuffled_fastai-v1_val.csv'
csv_val = os.path.join('../CSV_InputFiles_TargetClass',csv_val_FileName)

# CSV file contains validation dataset only (synthetic data)
csv_result = os.path.join(os.getcwd(),'Dataset_TargetClass_Overlap-9Blocks_25000xOnly_shuffled_fastai-v1_val-Prediction.csv')
csv_result_Uncertainty = os.path.join(os.getcwd(),'Dataset_TargetClass_Overlap-9Blocks_25000xOnly_shuffled_fastai-v1_val-PredictionWithUncertainty.csv')

csv_result_MajVoting = os.path.join(os.getcwd(),'Dataset_TargetClass_Overlap-9Blocks_25000xOnly_shuffled_fastai-v1_val-PredictionWithUncertainty_MajVoting.csv')
#csv_result_Top2_MajVoting = os.path.join(os.getcwd(),'Dataset_MixedMaterials_ImageClassification_oversample_shuffled_fastai-v1_val-PredictionTop2_MajVoting.csv')



## Define dataset

In [None]:
# Read csv file and create dataframe
df_val = pd.read_csv(csv_val, sep=',')
df_val.head()

In [None]:
# Read csv file and create dataframe
df_preds_val_U = pd.read_csv(csv_result_Uncertainty, sep=',')
df_preds_val_U.head()

In [None]:
df_preds_val_U.shape

In [None]:
# # Generate ground truth - StartingMaterial
# df_preds_val_U['Label'] = df_preds_val_U['File'].apply(lambda x: x.split('/', -1)[0])
# df_preds_val_U.head()

In [None]:
result = df_preds_val_U

In [None]:
result.shape

In [None]:
sns.set(style="whitegrid")
sns_plot = sns.countplot(x="Label", data=result)
sns_plot.set_xticklabels(sns_plot.get_xticklabels(), rotation=90)
fig = sns_plot.get_figure()
#fig.savefig("BarGraph_Distribution_StartingMaterial_Val.png")

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels

def plot_confusion_matrix(y_true, y_pred, classes,
                          normalize=False,
                          title=None,
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if not title:
        if normalize:
            title = 'Normalized confusion matrix'
        else:
            title = 'Confusion matrix, without normalization'

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    # Only use the labels that appear in the data
    #classes = classes[unique_labels(y_true, y_pred)]
    #classes = [classes[i] for i in unique_labels(y_true, y_pred)]
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix')

    #print(cm)

    fig, ax = plt.subplots(1,1,figsize=(8,6))
    plt.grid(False,which='major')
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax)
    # We want to show all ticks...
    ax.set(xticks=np.arange(cm.shape[1]),
           yticks=np.arange(cm.shape[0]),
           # ... and label them with the respective list entries
           xticklabels=classes, yticklabels=classes,
           title=title,
           ylabel='True label',
           xlabel='Predicted label')

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=90, ha="right",
             rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, format(cm[i, j], fmt),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black")
    #fig.tight_layout()
    return ax



In [None]:
def plot_matrix(matrix, classes,
                          title=None,
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if not title:
        title = 'Average matrix'

    #classes = [classes[i] for i in unique_labels(y_true, y_pred)]
    print('Average matrix')

    fig, ax = plt.subplots(1,1,figsize=(10,8))
    
    plt.grid(False)
    im = ax.imshow(matrix, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax)
    # We want to show all ticks...
    ax.set(xticks=np.arange(matrix.shape[1]),
           yticks=np.arange(matrix.shape[0]),
           # ... and label them with the respective list entries
           xticklabels=classes, yticklabels=classes,
           title=title,
           ylabel='True label',
           xlabel='Predicted label')

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=90, ha="right",
             rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    #fmt = '.2f' if normalize else 'd'
    fmt ='.3f'
    thresh = np.nanmax(matrix) / 1.5
    for i in range(matrix.shape[0]):
        for j in range(matrix.shape[1]):
            ax.text(j, i, format(matrix[i, j], fmt), fontsize=10, 
                    ha="center", va="center",
                    color="white" if matrix[i, j] > thresh else "black")
    #fig.tight_layout()
    return ax



## Generate matrices on Real data

In [None]:
List_TrueClass_val = result['Label'].tolist()
List_PredClass_val = result['Prediction'].tolist()
List_Values_Entropy = result['Entropy'].tolist()
List_Values_PredMean = result['Pred_Mean'].tolist()

classes = ['Class1', \
 'Class2', \
 'Class3', \
 'Class4', \
 'Class5']

# Back to class_nb
List_TrueValue_val = [pd.Index(classes).get_loc(x) for x in List_TrueClass_val]
List_PredValue_val = [pd.Index(classes).get_loc(x) for x in List_PredClass_val]

In [None]:
tmp_List_TrueClass_val = List_TrueClass_val[:10]
tmp_List_TrueValue_val = [pd.Index(classes).get_loc(x) for x in tmp_List_TrueClass_val]

In [None]:
print(tmp_List_TrueClass_val)
print(tmp_List_TrueValue_val)

In [None]:
ax = plot_confusion_matrix(List_TrueValue_val, List_PredValue_val, classes, title='Confusion Matrix - Validation data')
plt.tight_layout()
plt.savefig('ConfusionMatrix-Uncertainty_ValData.png')

In [None]:
# Generate average matrix (each cell display average value per predicted / true class)

def generate_averagematrix(actual, predicted, values, labels):
    cm = np.zeros((len(labels), len(labels)))
    am = np.zeros((len(labels), len(labels)))
    for a, p, v in zip(actual, predicted, values):
        cm[a][p] += 1
        am[a][p] += v
    # Generate average value
    for i in range(len(labels)):
        for j in range(len(labels)):
            am[i][j] = am[i][j] / cm[i][j]
    return am

In [None]:
# Generate matrices

am_Entropy = generate_averagematrix(List_TrueValue_val, List_PredValue_val, List_Values_Entropy, classes)

am_PredMean = generate_averagematrix(List_TrueValue_val, List_PredValue_val, List_Values_PredMean, classes)

In [None]:
ax = plot_matrix(am_Entropy, classes, title='Matrix - Average entropy')
plt.tight_layout()
plt.savefig('Matrix_AverageEntropy_ValData.png')


In [None]:
ax = plot_matrix(am_PredMean, classes, title='Matrix - Average Pred')
plt.tight_layout()
plt.savefig('Matrix_AveragePredMean_ValData.png')


In [None]:
# Generate results - am_PredMean
Nb_Diag = am_PredMean.shape[1]
am_PredMean_Avg = np.nanmean(am_PredMean)
am_PredMean_Avg_Trace = am_PredMean.trace() / Nb_Diag
print(Nb_Diag,round(am_PredMean_Avg,4),round(am_PredMean_Avg_Trace,4))

In [None]:
# Generate results - am_Entropy
Nb_Diag = am_Entropy.shape[1]
am_Entropy_Avg = np.nanmean(am_Entropy)
am_Entropy_Avg_Trace = am_Entropy.trace() / Nb_Diag
print(Nb_Diag,round(am_Entropy_Avg,4),round(am_Entropy_Avg_Trace,4))