 #  fastai-v1  inference on Test data - experiment

## Import libraries

In [None]:
# Put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
# setup CUDA_VISIBLE DEVICES for titan.sci.utah.edu
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"


In [None]:
#Import libraries - fastai_v1

from fastai.vision import *
from fastai.metrics import error_rate


import numpy as np
import pandas as pd
import seaborn as sns

from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels


## I/O and hyper parameters

In [None]:
# Parameters and hyper-parameters

# CSV file contains test dataset only (synthetic data)
csv_test_FileName = 'Dataset_TargetClass_Overlap-9Blocks_25000xOnly_shuffled_fastai-v1_test.csv'
csv_test = os.path.join('../CSV_InputFiles_TargetClass',csv_test_FileName)

csv_result = os.path.join(os.getcwd(),'Dataset_TargetClass_Overlap-9Blocks_25000xOnly_shuffled_fastai-v1_test-Prediction.csv')

csv_result_MajVoting = os.path.join(os.getcwd(),'Dataset_TargetClass_Overlap-9Blocks_25000xOnly_shuffled_fastai-v1_test-Prediction_MajVoting.csv')

# Network
model_path = os.path.join(os.getcwd(),'models')
model_file = ('TargetClass_fastai-v1_224_all_resnet50.pkl')

# Network architecture
arch = models.resnet50
# Image size
sz = 224
# Batch size
bs = 32
# Default learning rate
lr = 0.01

## Define Test dataset

In [None]:
# Read csv file and create dataframe
df_test = pd.read_csv(csv_test, sep=',')
df_test.head()

In [None]:
df_test.shape

In [None]:
df_test.groupby(['Label']).size()

In [None]:
df_test_size = df_test.groupby(['Label']).size()
#df_test_size = df_test_size.reindex(classes_Labels_ordered)
df_test_size


In [None]:
# Generate bar graph
# pd.value_counts(df_test['Label']).sort_index().plot(kind='bar', title = 'Starting Material - test dataset')
# fig1 = plt.gcf()
# plt.tight_layout()
# fig1.savefig('BarGraph_Distribution_StartingMaterial_TestData.png')
# plt.show()

In [None]:
fig, ax = plt.subplots(1,1,figsize=(5,5))
sns.set(style="whitegrid")
sns_plot = sns.countplot(x="Label", data=df_test)
sns_plot.set_xticklabels(sns_plot.get_xticklabels(), rotation=90)
plt.tight_layout()
plt.show()
fig = sns_plot.get_figure()
fig.savefig("BarGraph_Distribution_TargetClass_TestData.png")

## Deep Learning analysis

## Inference - Test dataset - without TTA

In [None]:
test = ImageList.from_csv(os.getcwd(), csv_test_FileName, folder='../Data_TargetClass')


In [None]:
test


In [None]:
# Main commands to load data and model
learn = load_learner(model_path,model_file, test=test)

In [None]:
learn

In [None]:
y_pred_test, _, losses = learn.get_preds(ds_type=DatasetType.Test,with_loss=True)


In [None]:
y_pred_test_classes = [learn.data.classes[np.argmax(pred)] for pred in y_pred_test]


In [None]:
print(y_pred_test[0])
print(y_pred_test[0].numpy())
print(np.sum(y_pred_test[0].numpy()))
print(np.argmax(y_pred_test[0]))
print(y_pred_test_classes[0])
print(losses[0])

In [None]:
print(y_pred_test_classes[:10])

In [None]:
#FileNames = [i.split('/', -1)[-1] for i in learn.data.test_ds.items]
FileNames = ['/'.join(i.split('/', -1)[-4:]) for i in learn.data.test_ds.items]
print(FileNames[:10])

In [None]:
# Create dataframe for prediction on test data
df_preds_test = pd.DataFrame({'File':FileNames, 'Prediction':y_pred_test_classes})
df_preds_test.head()

In [None]:
# Generate ground truth - StartingMaterial
df_preds_test.shape

In [None]:
result = df_test.merge(df_preds_test,on='File',how='left')
result.shape


In [None]:
result.head()

In [None]:
# Save results as CSV file
result.to_csv(csv_result, index=False, na_rep = 'NA')

In [None]:
learn.data.classes

In [None]:
# Generete proper arrays
List_TrueClass_test = result['Label'].tolist()
List_PredClass_test = result['Prediction'].tolist()

# Back to class_nb
List_TrueValue_test = [pd.Index(learn.data.classes).get_loc(x) for x in List_TrueClass_test]
List_PredValue_test = [pd.Index(learn.data.classes).get_loc(x) for x in List_PredClass_test]

In [None]:
print(List_TrueClass_test[:10])
print(List_PredClass_test[:10])

In [None]:
print(List_TrueValue_test[:10])
print(List_PredValue_test[:10])

In [None]:
from sklearn.utils.multiclass import unique_labels

print(unique_labels(List_TrueValue_test))
print(unique_labels(List_PredValue_test))
print(unique_labels(List_TrueValue_test, List_PredValue_test))
a = unique_labels(List_TrueValue_test, List_PredValue_test)
print(a)
b = [learn.data.classes[i] for i in a]
print(b)
#classes = learn.data.classes[unique_labels(List_TrueValue_test, List_PredValue_test)]
#print(classes)

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(List_PredValue_test,List_TrueValue_test)

In [None]:
from sklearn.metrics import confusion_matrix

confusion_matrix(List_TrueClass_test,List_PredClass_test, labels=learn.data.classes)

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels

def plot_confusion_matrix(y_true, y_pred, classes,
                          normalize=False,
                          title=None,
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if not title:
        if normalize:
            title = 'Normalized confusion matrix'
        else:
            title = 'Confusion matrix, without normalization'

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    # Only use the labels that appear in the data
    #classes = classes[unique_labels(y_true, y_pred)]
    #classes = [classes[i] for i in unique_labels(y_true, y_pred)]
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix')

    #print(cm)

    fig, ax = plt.subplots(1,1,figsize=(8,6))
    plt.grid(False,which='major')
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax)
    # We want to show all ticks...
    ax.set(xticks=np.arange(cm.shape[1]),
           yticks=np.arange(cm.shape[0]),
           # ... and label them with the respective list entries
           xticklabels=classes, yticklabels=classes,
           title=title,
           ylabel='True label',
           xlabel='Predicted label')

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=90, ha="right",
             rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, format(cm[i, j], fmt),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black")
    #fig.tight_layout()
    return ax



In [None]:
ax = plot_confusion_matrix(List_TrueValue_test, List_PredValue_test, learn.data.classes, title='Confusion Matrix - Test data')
plt.tight_layout()
plt.savefig('ConfusionMatrix_TestData.png')

In [None]:
from sklearn.metrics import classification_report
print(classification_report(List_TrueValue_test,List_PredValue_test,target_names=learn.data.classes,digits=4))


## Majority Voting


In [None]:
result_TrueLabel_MajVoting = result.groupby(['Acquisition'])['Label'].apply(lambda x: x.mode()[0]).reset_index(name='TrueLabel_MajVoting')
result_TrueLabel_MajVoting.head(15)

In [None]:
#result.groupby(['Acquisition'])['Prediction'].apply(lambda x: x.mode())

In [None]:
result_PredLabel_MajVoting = result.groupby(['Acquisition'])['Prediction'].apply(lambda x: x.mode()[0]).reset_index(name='PredLabel_MajVoting')
result_PredLabel_MajVoting.head(15)

In [None]:
# Combine data frames (to double check proper order)
result_MajVoting = pd.merge(result_TrueLabel_MajVoting, result_PredLabel_MajVoting, how='left', on='Acquisition')
result_MajVoting.head()


In [None]:
result_MajVoting.to_csv(csv_result_MajVoting, index=False, na_rep = 'NA')

In [None]:
result_MajVoting.shape

In [None]:
# Generete proper arrays
List_TrueClass_MajVoting_test = result_MajVoting['TrueLabel_MajVoting'].tolist()
List_PredClass_MajVoting_test = result_MajVoting['PredLabel_MajVoting'].tolist()

# Back to class_nb
List_TrueValue_MajVoting_test = [pd.Index(learn.data.classes).get_loc(x) for x in List_TrueClass_MajVoting_test]
List_PredValue_MajVoting_test = [pd.Index(learn.data.classes).get_loc(x) for x in List_PredClass_MajVoting_test]

In [None]:
accuracy_score(List_PredValue_MajVoting_test,List_TrueValue_MajVoting_test)

In [None]:
confusion_matrix(List_TrueClass_MajVoting_test,List_PredClass_MajVoting_test, labels=learn.data.classes)

In [None]:
print(classification_report(List_TrueValue_MajVoting_test,List_PredValue_MajVoting_test,target_names=learn.data.classes,digits=4))


In [None]:
cm = confusion_matrix(List_TrueClass_MajVoting_test,List_PredClass_MajVoting_test, labels=learn.data.classes)
cm.sum()

In [None]:
ax = plot_confusion_matrix(List_TrueValue_MajVoting_test, List_PredValue_MajVoting_test, \
                           learn.data.classes, title='Confusion Matrix - Test data - MajVoting')
plt.tight_layout()
plt.savefig('ConfusionMatrix_TestData-MajVoting.png')