## Classification - 25000x only - ResNet50

## Import libraries

In [None]:
# Put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
# setup CUDA_VISIBLE DEVICES for titan.sci.utah.edu
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"


In [None]:
#Import libraries - fastai_v1

from fastai.vision import *
from fastai.metrics import error_rate


import numpy as np
import pandas as pd
import seaborn as sns

## I/O and hyper parameters

In [None]:
# Parameters and hyper-parameters

path = '~/Project_SEM/Project_Uncertainty/Analysis_TargetClass/Data_TargetClass'
csv_FileName = 'Dataset_TargetClass_Overlap-9Blocks_25000xOnly_shuffled_fastai-v1.csv'
csv = os.path.join('../CSV_InputFiles_TargetClass',csv_FileName)
# Network architecture
arch = models.resnet50
# Image size
rsz = 400
sz = 224
# Batch size
bs = 32
# Default learning rate
lr = 0.01

In [None]:
# Read csv file and create dataframe
df = pd.read_csv(csv, sep=',')
df.head()

## Dataset overview

In [None]:
df.shape

In [None]:
df.groupby(['Label']).size()

In [None]:
sns.set(style="whitegrid")
sns_plot = sns.countplot(x="Label", data=df)
sns_plot.set_xticklabels(sns_plot.get_xticklabels(), rotation=90)
fig = sns_plot.get_figure()
fig.savefig("BarGraph_Distribution_Label.png")

In [None]:
df.dtypes

In [None]:
df_train = df.loc[df["is_valid"]==False]
df_train.shape

In [None]:
sns.set(style="whitegrid")
sns_plot = sns.countplot(x="Label", data=df_train)
sns_plot.set_xticklabels(sns_plot.get_xticklabels(), rotation=90)
fig = sns_plot.get_figure()
fig.savefig("BarGraph_Distribution_Label_Training.png")

In [None]:
df_train.groupby(['Label']).size()

In [None]:
df_val = df.loc[df["is_valid"]==True]
df_val.shape

In [None]:
sns.set(style="whitegrid")
sns_plot = sns.countplot(x="Label", data=df_val)
sns_plot.set_xticklabels(sns_plot.get_xticklabels(), rotation=90)
fig = sns_plot.get_figure()
fig.savefig("BarGraph_Distribution_Label_Val.png")

In [None]:
df_val.groupby(['Label']).size()

In [None]:
# Generate size of each material
df_size = df.groupby(['Label']).size()
df_train_size = df_train.groupby(['Label']).size()
df_val_size = df_val.groupby(['Label']).size()
df_concatsize = pd.concat([df_train_size, df_val_size,df_size],axis=1)
df_concatsize.columns=['train','val','total']
#df_concatsize = df_concatsize.reindex(classes_Labels_ordered)
df_concatsize

In [None]:
# Create smaller dataset (for faster experiment purposes)
#df = df.sample(frac=0.5).reset_index(drop=True)
#df.head()

In [None]:
#df.shape

## Deep Learning analysis

In [None]:
# Data augmentation
#transforms = [RandomRotate(5), RandomLighting(0.05, 0.05), RandomDihedral()]

In [None]:
#def get_data(rsz):
#    tfms = tfms_from_model(arch,sz,aug_tfms=transforms, crop_type=CropType.RANDOM, max_zoom=1.0)
#    return ImageClassifierData.from_csv(path,'data_train', csv, bs=bs, tfms=tfms, val_idxs=val_idxs, suffix='', test_name='', skip_header=True, num_workers=1)


In [None]:
tfms = get_transforms(do_flip=True, flip_vert=True, max_rotate=5, max_zoom=1, max_lighting=0.05, \
                      max_warp=0.0, p_affine=1, p_lighting=1)

In [None]:
np.random.seed(4)
src = (ImageList.from_csv(os.getcwd(), csv, folder='../Data_TargetClass')
       .split_from_df(col='is_valid')
       .label_from_df())

In [None]:
data = (src.transform(tfms, size=sz)
        .databunch(bs=bs).normalize(imagenet_stats))

In [None]:
print(data.classes)

### Check data augmentation

In [None]:
fig = data.show_batch(rows=3, figsize=(12,9))
plt.savefig('Screenshot_TrainingImages.png')

### Network

In [None]:
learn = cnn_learner(data, arch, metrics=[error_rate, accuracy])

In [None]:
print(learn.summary())

In [None]:
learn

In [None]:
# Find automated learning rate
learn.lr_find()

In [None]:
fig = learn.recorder.plot(return_fig=True)
plt.savefig("Screenshot_LearningRateFinder_Phase1.png")

In [None]:
lr = 5e-3

In [None]:
learn.fit(7, lr)

In [None]:
#learn.fit_one_cycle(15, slice(lr))

In [None]:
#learn.sched.plot_lr()
fig = learn.recorder.plot_lr(show_moms=True, return_fig=True)
plt.savefig("Screenshot_LearningRate_Phase1")

In [None]:
fig = learn.recorder.plot_losses(return_fig=True)
plt.savefig("Screenshot_Loss_Phase1")

In [None]:
learn.save('fastai-v1_224_lastlayer_resnet50')

In [None]:
learn.load('fastai-v1_224_lastlayer_resnet50')

In [None]:
learn.unfreeze()

In [None]:
learn.lr_find()

In [None]:
fig = learn.recorder.plot(skip_start=1,skip_end=1,return_fig=True)
plt.savefig("Screenshot_LearningRateFinder_Phase2.png")

In [None]:
#lr=1e-4
#lrs=np.array([lr/9,lr/3,lr])
#learn.fit(lrs, 3, cycle_len=3, cycle_mult=2)
learn.fit_one_cycle(15, max_lr=slice(1e-6, lr/10))

In [None]:
fig = learn.recorder.plot_lr(show_moms=True,return_fig=True)
plt.savefig("Screenshot_LearningRate_Phase2")

In [None]:
fig = learn.recorder.plot_losses(return_fig=True)
plt.savefig("Screenshot_Loss_Phase2")

In [None]:
learn.save('fastai-v1_224_all_resnet50')

In [None]:
learn.load('fastai-v1_224_all_resnet50')

## Data Interpretation

In [None]:
fig = learn.show_results(rows=3)
plt.savefig('Screenshot_ImagePredictions.png')

In [None]:
interp = ClassificationInterpretation.from_learner(learn)

losses,idxs = interp.top_losses()

len(data.valid_ds)==len(losses)==len(idxs)

In [None]:
fig = interp.plot_top_losses(9, figsize=(15,11))
plt.savefig('Screenshot_Images-TopLosses.png')

In [None]:
#doc(interp.plot_top_losses)

In [None]:
#plot_confusion_matrix(cm, data.classes)
interp.plot_confusion_matrix(figsize=(6,6))
fig1 = plt.gcf()
plt.tight_layout()
plt.xticks(rotation='vertical')
fig1.savefig('ConfusionMatrix_Validation_ResNet50.png')
plt.show()

In [None]:
interp.most_confused(min_val=1)

## Model deployment

In [None]:
learn.export('./models/TargetClass_fastai-v1_224_all_resnet50.pkl')

In [None]:
# learn = load_learner('./models/export.pkl')
#pred_class,pred_idx,outputs = learn.predict(img)

In [None]:
data.classes