## ResNet50 - multi classification

## Import libraries

In [None]:
# Put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
# setup CUDA_VISIBLE DEVICES for titan.sci.utah.edu
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"


In [None]:
#Import libraries

from fastai.imports import *
from fastai.conv_learner import *
from fastai.transforms import *

import numpy as np
import pandas as pd
import seaborn as sns

## I/O and hyper parameters

In [None]:
# Parameters and hyper-parameters

path = '~/Project_SEM/Project_TargetClass/Image_Classification'
csv_all = os.path.join(os.getcwd(),'Dataset_ImageClassification_TargetClass_Filtered.csv')
csv_analysis = os.path.join(os.getcwd(),'Dataset_ImageClassification_TargetClass_Filtered_All.csv')
csv_analysis_fastai = os.path.join(os.getcwd(),'Dataset_ImageClassification_TargetClass_All_fastai.csv')
# Network architecture
arch = resnet50
# Image size
rsz = 400
sz = 224
# Batch size
bs = 16
# Default learning rate
lr = 0.01

## Dataset creation for specific analysis

In [None]:
# Read csv file and create dataframe
df1 = pd.read_csv(csv_all, sep=',', parse_dates = ['AcquisitionDate'])
#df['TargetClass'] = df['TargetClass'].astype(str)

df1.shape

In [None]:
pd.value_counts(df1['TargetClass'])

In [None]:
# Filder dataset
#Filter_List = ['Filter1','Filter2,'Filter3']

# Create new dataframe
#df2 = df1[df1['TargetClass'].isin(Filter_List)]
df2 = df1
df2.shape

In [None]:
pd.value_counts(df2['TargetClass'])

In [None]:
df2.head()

In [None]:
# Shuffle data for DL analysis
df2 = df2.sample(frac=1, random_state = 1, replace = True)


In [None]:
# Save dataframe to CSV file (to be used for analysis)
df2.to_csv(csv_analysis, index=False, na_rep = 'NA')

In [None]:
# Save dataframe to CSV file (to be used for fastai - Deep learning)
# Keep only 2 columns: location, and dependent variable 'TargetClass'
df2 = df2[['Location','TargetClass']]
df2.to_csv(csv_analysis_fastai, index=False, na_rep = 'NA')

## Dataset overview

In [None]:
# Read csv file and create dataframe
df = pd.read_csv(csv_analysis, sep=',', parse_dates = ['AcquisitionDate'])

# Shuffle dataframe
#df = df.sample(frac=1, random_state = 1, replace = True)
df.shape

In [None]:
df.iloc[0]['Location']

In [None]:
df.head()

In [None]:
df.groupby(['TargetClass']).size()

In [None]:
# Generate bar graph
pd.value_counts(df['TargetClass']).sort_index().plot(kind='bar', title = 'TargetClass distribution')
fig1 = plt.gcf()
plt.tight_layout()
plt.xticks(rotation='horizontal')
fig1.savefig('BarGraph_Distribution_TargetClass-All.png')
plt.show()

In [None]:
df.groupby(['Magnification']).size()

In [None]:
pd.value_counts(df['Magnification']).sort_index().plot(kind='bar', title = 'Magnification distribution')


In [None]:
g = sns.catplot(x="TargetClass", y="Magnification", kind="box", legend=False, data=df);
g.set_xticklabels(rotation=90)
fig1 = plt.gcf()
fig1.savefig('Magnification_by_Category_BoxPlot-All.png', bbox_inches="tight")
plt.show()

In [None]:
g = sns.catplot(x="TargetClass", y="Magnification", data=df, kind="swarm");
g.set_xticklabels(rotation=90)
fig1 = plt.gcf()
plt.tight_layout()
plt.gcf().subplots_adjust(bottom=0.15)
fig1.savefig('Magnification_by_Category_SwarmPlot-All.png', bbox_inches="tight")
plt.show()

## Define validation dataset

In [None]:
# Generate validation ids
n = len(df)
print("Data length:", n)
# 20% validation split
val_idxs = get_cv_idxs(n, val_pct=0.2, seed=0)
#print("val_idxs:",val_idxs)
print("Validation length:", len(val_idxs))

In [None]:
df_val = df.iloc[val_idxs,:]
df_val.groupby(['TargetClass']).size()

In [None]:
pd.value_counts(df_val['TargetClass']).sort_index().plot(kind='bar', title = 'TargetClass distribution - Validation dataset')

## Deep Learning analysis

In [None]:
# Data augmentation
transforms = [RandomRotate(5), RandomLighting(0.05, 0.05), RandomDihedral()]

In [None]:
def get_data(rsz):
    tfms = tfms_from_model(arch,sz,aug_tfms=transforms, crop_type=CropType.RANDOM, max_zoom=1.0)
    return ImageClassifierData.from_csv(path,'data', csv_analysis_fastai, bs=bs, tfms=tfms, val_idxs=val_idxs, suffix='', test_name='', skip_header=True, num_workers=0)


### Check data augmentation

In [None]:
data = get_data(sz)

In [None]:
#data = data.resize(rsz, 'tmp')

In [None]:
# Validation dataset
list_val = iter(data.val_dl)


In [None]:
x,y=next(list_val)
idx=0

fig,axes = plt.subplots(3,3, figsize=(12,12))
for i,ax in enumerate(axes.flat):
    ima=data.val_ds.denorm(x)[i]
    ax.set_title(data.classes[y[i]])
    ax.imshow(ima)

In [None]:
#Training dataset
list_trn = iter(data.trn_dl)

In [None]:
x,y=next(list_trn)
idx=0

fig,axes = plt.subplots(3,3, figsize=(12,12))
for i,ax in enumerate(axes.flat):
    ima=data.trn_ds.denorm(x)[i]
    ax.set_title(data.classes[y[i]])
    ax.imshow(ima)

### Network

In [None]:
# Main commands to load data and model
learn = ConvLearner.pretrained(arch, data, precompute=False, pretrained=True, ps=[0.25,0.5])


In [None]:
learn

In [None]:
# Find automated learning rate
lrf = learn.lr_find(end_lr=10)

In [None]:
learn.sched.plot_lr()

In [None]:
learn.sched.plot(n_skip=2)

In [None]:
lr = 5e-3

In [None]:
learn.fit(lr,5)

In [None]:
learn.fit(lr,10, cycle_len=1)

In [None]:
learn.sched.plot_lr()

In [None]:
learn.sched.plot_loss()

In [None]:
learn.save('224_lastlayer_resnet50_TargetClass_All')

In [None]:
learn.load('224_lastlayer_resnet50_TargetClass_All')

In [None]:
learn.unfreeze()
lrs=np.array([lr/9,lr/3,lr])

In [None]:
lrf = learn.lr_find()

In [None]:
learn.sched.plot_lr()

In [None]:
learn.sched.plot()

In [None]:
lr=5e-4
lrs=np.array([lr/9,lr/3,lr])
learn.fit(lrs, 5, cycle_len=1, cycle_mult=2)

In [None]:
learn.sched.plot_lr()

In [None]:
learn.sched.plot_loss()

In [None]:
learn.save('224_all_resnet50_TargetClass_All')

In [None]:
learn.load('224_all_resnet50_TargetClass_All')

## Inference

In [None]:
#Inference on validation data
log_preds,y = learn.TTA()
probs = np.mean(np.exp(log_preds),0)

accuracy_np(probs,y)

In [None]:
# Confusion matrix
from sklearn.metrics import confusion_matrix
from fastai.plots import *
y_pred = np.argmax(probs,1)
cm = confusion_matrix(y,y_pred)
plot_confusion_matrix(cm, data.classes)
fig1 = plt.gcf()
plt.tight_layout()
fig1.savefig('ConfusionMatrix_TargetClass_All.png')
plt.show()

In [None]:
from sklearn.metrics import f1_score
print(f1_score(y, y_pred, average=None))
print(f1_score(y, y_pred, average='micro'))
print(f1_score(y, y_pred, average='macro'))
print(f1_score(y, y_pred, average='weighted'))

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y, y_pred, target_names=data.classes))

### Exploratory analysis - variable size / shape information

In [None]:
data.classes

In [None]:
# Validation size: 340 images
y.shape

In [None]:
# Predictions shape: 
# - dimension 1: 5 TTA images ( 1 main image + 4 augmented)
# - dimension 2: 340 validation images
# - dimension 3: 5 classes
log_preds.shape

In [None]:
log_preds[0].shape

In [None]:
np.exp(log_preds[0])


In [None]:
probs_0 = np.mean(np.exp(log_preds[0]),0)
probs_0

In [None]:
np.argmax(probs[0],0)

In [None]:
y[0]

### Quality Control

In [None]:
y_pred = np.argmax(probs,1)
y_pred

In [None]:
y

In [None]:
# Number of element for Quality control
Nb_elements = 4


In [None]:

def Retrieve_CorrectIndices(TrueLabel, PredictedLabel, ClassNb):
    # Array of correct predictions
    Index_equal = np.equal(TrueLabel, PredictedLabel)
    # Array corresponding to ClassNb of Interest
    TrueLabel_ClassNb = np.equal(TrueLabel, ClassNb)
    # Logical Operator AND
    Output_AND = np.logical_and(Index_equal, TrueLabel_ClassNb)
    # Return list of indices where value = True
    Output = np.where(Output_AND == True)[0]
    # Return 5 random indices only
    Output_rand = np.random.choice(Output, Nb_elements, replace=False)
    
    #print('Index_equal',Index_equal)
    #print('TrueLabel_ClassNb',TrueLabel_ClassNb)
    #print('Output_AND',Output_AND)
    #print('Output',Output)
    #print('Output_rand',Output_rand)
    
    return Output_rand
    
    
    

In [None]:
Retrieve_CorrectIndices(y,y_pred,0)

In [None]:
def Retrieve_InCorrectIndices(TrueLabel, PredictedLabel, ClassNb):
    # Array of correct predictions
    Index_notequal = np.not_equal(TrueLabel, PredictedLabel)
    # Array corresponding to ClassNb of Interest
    TrueLabel_ClassNb = np.equal(TrueLabel, ClassNb)
    # Logical Operator AND
    Output_AND = np.logical_and(Index_notequal, TrueLabel_ClassNb)
    # Return list of indices where value = True
    Output = np.where(Output_AND == True)[0]
    # Return 5 random indices only
    Output_rand = np.random.choice(Output, Nb_elements, replace=False)
    
    #print('Index_notequal',Index_notequal)
    #print('TrueLabel_ClassNb',TrueLabel_ClassNb)
    #print('Output_AND',Output_AND)
    #print('Output',Output)
    #print('Output_rand',Output_rand)
    
    return Output_rand

In [None]:
Retrieve_InCorrectIndices(y,y_pred,0)

In [None]:
# Inputs: idxs- list of 5 random indices, plot title
def plot_Clem(idxs, title):
    print(title)
    #print(idxs)
    #title_probs = [y_pred[x] for x in idxs]
    #print(title_probs)
    fig,axes = plt.subplots(1,Nb_elements, figsize=(20,20))
    for i,ax in enumerate(axes.flat):
        idx = idxs[i]
        plot_title = data.classes[y_pred[idx]]
        ima=open_image(df.at[idx,"Location"])
        ax.set_title(plot_title)
        ax.imshow(ima)

In [None]:
ClassNb = 0
title = 'Correctly classified - Class ' + data.classes[ClassNb]
plot_Clem(Retrieve_CorrectIndices(y,y_pred,ClassNb), title)

In [None]:
ClassNb = 0
title = 'Incorrectly classified - Class ' + data.classes[ClassNb]
plot_Clem(Retrieve_InCorrectIndices(y,y_pred,ClassNb), title)

In [None]:
ClassNb = 1
title = 'Correctly classified - Class ' + data.classes[ClassNb]
plot_Clem(Retrieve_CorrectIndices(y,y_pred,ClassNb), title)

In [None]:
ClassNb = 1
title = 'Incorrectly classified - Class ' + data.classes[ClassNb]
plot_Clem(Retrieve_InCorrectIndices(y,y_pred,ClassNb), title)

In [None]:
ClassNb = 2
title = 'Correctly classified - Class ' + data.classes[ClassNb]
plot_Clem(Retrieve_CorrectIndices(y,y_pred,ClassNb), title)

In [None]:
ClassNb = 2
title = 'Incorrectly classified - Class ' + data.classes[ClassNb]
plot_Clem(Retrieve_InCorrectIndices(y,y_pred,ClassNb), title)

In [None]:
ClassNb = 3
title = 'Correctly classified - Class ' + data.classes[ClassNb]
plot_Clem(Retrieve_CorrectIndices(y,y_pred,ClassNb), title)

In [None]:
ClassNb = 4
title = 'Correctly classified - Class ' + data.classes[ClassNb]
plot_Clem(Retrieve_CorrectIndices(y,y_pred,ClassNb), title)