In [None]:
#-----------------------------------------------------------------------------
#  Work for EfficientNetB3 Version2
#  author @HarshitaSharma
#  Code Collaborator : @Kottana Priyanka
#  Dated : 11 December , 2020
# Kaggle dataset used .
#-----------------------------------------------------------------------------

## Classification of OCT retinal data: an exercise using EfficientNet and Transfer Learning

Optical Coherence Tomography (OCT) is a diagnostic imaging technique that uses light to obtain high resolution images of the cross-sectional structure of the retina. The images are used by ophthalmologists to diagnose and track age-related macular degeneration (AMD), diabetes and other systemic pathologies that manifest in the eyes.

To train the OCT classification model, we will use a transfer learning workflow and leverage pretrained weights from ImageNet. The first step is feature extraction where the base model weights are frozen. OCT retinal images are very different from the natural images of imagenet  so the next step is to fine-tune the model by unfreezing some fraction of the upper layers of the base model and retrain the model.


## I. Setup

### Imports

In [None]:
!pip install -q efficientnet

In [None]:
import tensorflow as tf
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import glob
from itertools import repeat
from itertools import product

import efficientnet.tfkeras as efn


from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.callbacks import Callback
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import plot_model

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from PIL import Image

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from collections import Counter

from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix


import warnings
warnings.filterwarnings("ignore")

print("The TensorFlow version is ", tf.__version__)
print("The Keras version is ", tf.keras.__version__)

In [None]:
# Data path
TRAIN_PATH = r'../input/kermany2018/OCT2017 /train/'
VAL_PATH = r'../input/kermany2018/OCT2017 /val/'
TEST_PATH = r'../input/kermany2018/OCT2017 /test/'

# Model/path
MODEL_WEIGHTS_PATH = r'../input/oct-efficientnetb3-feature-extraction/'
FE_FILE = 'oct_efficientnetb3_fe_model.h5'
FT_FILE = 'oct_efficientnetb3_ft_model.h5'


# Parameters
CLASSES = ['CNV', 'DME', 'DRUSEN', 'NORMAL']
NUM_CLASSES = len(CLASSES)
IMG_DIMS = 300  #224
IMG_SHAPE = (IMG_DIMS, IMG_DIMS, 3)

FE_EPOCHS = 7    # feature extraction epochs
FT_EPOCHS = 5    # fine tuning epochs

BATCH_SIZE = 16
OPTIMIZER = Adam()

# Training flags 
# due to Kaggle training time limits, we set a flag to fine-tune
# for 5 (FT_1) or 5+5 (FT_2) epochs
TRAIN_FT_2 = 0 # set to 0/1 to fine tune for 5 or 5+5 epoch

## II. Data  
EDA and image preprocessing utility functions

### Load data

In [None]:
# load data, plot class distribution

def load_data(path, label, class_type):
    
    image_list = glob.glob(path + class_type + '/*')
    label_list = list(repeat(label, len(image_list)))
    
    data = list(zip(image_list,label_list))
    df = pd.DataFrame(data, columns = ['images', 'labels'])

    return df
        

def plot_dataset_imbalance(sample_distribution): 
    colors = ['blue', 'red', 'pink', 'turquoise']
    fig = plt.figure()
    ax = fig.add_axes([0,0,1,1])
    ax.bar(CLASSES, sample_distribution, color=colors)
    plt.show()
        
        
        
for dataset, path in list(zip(['train', 'val', 'test'], [TRAIN_PATH, VAL_PATH, TEST_PATH])):
  
    for label,class_type in enumerate(CLASSES):
        df_var = "df_" + dataset + "_" + class_type
        vars()[df_var] = load_data(path, str(label), class_type)
        sample_size_var = "num_" + class_type
        vars()[sample_size_var] = vars()[df_var].shape[0]        
        print('The size of ', df_var, ' is {} '.format(vars()[df_var].shape))
    
    sample_distribution = [num_CNV, num_DME, num_DRUSEN, num_NORMAL]
    plot_dataset_imbalance(sample_distribution)


### Display raw images for four classes

In [None]:
# plot randomly selected images of each class

fig=plt.figure(figsize=(12, 10))
cols = 3
rows = 4

for idx, rand_idx in enumerate(np.random.randint(0,1000, size=cols*rows)):
    class_type = CLASSES[int(idx/cols)]
    df_var = "df_train_" + class_type
    img = vars()[df_var]['images'][rand_idx]
    img = plt.imread(img)     
    ax = fig.add_subplot(rows, cols, idx+1)
    if idx%cols==0:
        plt.ylabel(CLASSES[int(idx/cols)], fontsize=16)
    plt.imshow(img, cmap='gist_gray')
  

### Upsampling imbalanced data, make train/test dataframes 

From the histograms it is clear that 'CNV' class which is roughly 3x the 'DMV' and 4x the 'DRUSEN' class, dominates the training data set distribution. To rebalance the data set two  approaches are available:   
i) use Random Oversampler/Undersampler (implemented in sklearn)
ii) SMOTE (implemented in sklearn)    
These methods are memory intensive and only feasible for very small data sets. 

We take a different approach to oversampling minority class image data: we add (concatenate) the minority class dataframes multiple times to the training dataset. We then shuffle the joint dataframe to create a new oversampled dataset. This method is easy to implement for larger image data sets.
  
Here we oversample DME and DRUSEN by 2x each, partially balancing the dataset. We do not balance the dataset completely because oversampling can introduce noise (overfitting) to the model. 
Once the dataset is partially rebalanced, we compute class weights that are applied to the loss function so that the minority classes are weighted higher.

In [None]:
def get_train_set(rebalance=0):
    
    if rebalance == 0:
        df_train_upsample = pd.concat([df_train_CNV, 
                               df_train_DME,
                               df_train_DRUSEN,
                               df_train_NORMAL], axis=0).reset_index(drop=True)
    else:
        df_train_upsample = pd.concat([df_train_CNV, 
                               #df_train_DME,
                               #df_train_DRUSEN,
                               df_train_DME, df_train_DME, 
                               df_train_DRUSEN, df_train_DRUSEN, 
                               df_train_NORMAL], axis=0).reset_index(drop=True)

    df_train = shuffle(df_train_upsample)
    print('The size of df_train is {}'.format(df_train.shape))
      
    return df_train

In [None]:
df_train = get_train_set(rebalance=0)
#df_train.head()

Since validation dataset has very little images , we merge it to the test set itself.

In [None]:
def get_test_set():
    
    df_test_combined = pd.concat([df_test_CNV, df_val_CNV, 
                                  df_test_DME, df_val_DME, 
                                  df_test_DRUSEN, df_val_DRUSEN,
                                  df_test_NORMAL, df_val_NORMAL], 
                                 axis=0).reset_index(drop=True)
    df_test = shuffle(df_test_combined)
    print('The size of df_test is {}'.format(df_test.shape))

    return df_test

In [None]:
# df_test = get_test_set()

In [None]:
#As its already split in train and test set, we need not split again.
def get_train_val_split_data(df_train, test_size=0.05):
# split the train set into a train/val set

    # select the column that we will use for stratification
    y = df_train['labels']
    df_train, df_val = train_test_split(df_train, test_size=test_size, random_state=2020, stratify=y)

    print('The size of df_train is {}'.format(df_train.shape))
    print('The size of df_val is {}'.format(df_val.shape))
    
    return df_train, df_val

In [None]:
# df_train, df_val = get_train_val_split_data(df_train)

### Plot train/test set class distribution

In [None]:
#train_counts = df_train['labels'].value_counts().sort_index()
#test_counts = df_test['labels'].value_counts().sort_index()

def plot_dataset_distribution(df_train, df_test):
    
    fig, ax = plt.subplots(1,2, figsize=(10, 6))
    colors = ['blue', 'red', 'pink', 'turquoise']


    for index,dataset in enumerate(['train', 'test']):
    
        df_var = "df_" + dataset 
        counts = vars()[df_var]['labels'].value_counts().sort_index()
        ax[index].pie(counts, labels=CLASSES, autopct='%.1f %%', colors=colors)
        ax[index].set_title( '{} set'.format(dataset))
    
    fig.suptitle('Class distribution for train and test datasets',
                 y=1.1, fontsize=16)
    fig.tight_layout()

In [None]:
# plot_dataset_distribution(df_train, df_test) 

### Get class weights

In [None]:
def get_class_weights(verbose=1):
    
    counter = Counter(df_train.labels)                          
    max_count = float(max(counter.values()))  
    class_weights = {int(class_label) : max_count/num_images for class_label, 
                     num_images in counter.items()}                     
    if verbose:
        print('Class weights: \n', class_weights)
    return class_weights

In [None]:
# class_weights = get_class_weights()

### Find image size dimensions 

In [None]:
def find_img_dims(image_list):
    
    min_size = []
    max_size = []
    
    for i in range(len(image_list)):
        im = Image.open(image_list[i])
        min_size.append(min(im.size))
        max_size.append(max(im.size))
    
    return min(min_size), max(max_size)

In [None]:
image_list = df_train.images
image_list=list(image_list)
min_size, max_size = find_img_dims(image_list)
print('The min and max image dims are {} and {} respectively.'
      .format(min_size, max_size))

We have set the model input image size (IMG_DIMS) to 300x300 pixels which is the expected input resolution for the EfficientNet B3 model. This resolution is smaller than the smallest image in the data set.

### Image Augmentation/Data Generator

In [None]:
def get_train_data(augmentation=0):
    
    if augmentation:
        train_datagen = ImageDataGenerator(rescale=1./255,
                                    rotation_range=20,
                                    width_shift_range=0.2,
                                    height_shift_range=0.2,
                                    fill_mode='nearest',
                                    zoom_range = 0.3,
                                    horizontal_flip = True)        
    else:    
        train_datagen = ImageDataGenerator(rescale=1./255)
        
    
    train_data= train_datagen.flow_from_dataframe(dataframe=df_train, 
                                            #directory=TRAIN_IMG_DIR, 
                                            directory=None,    # paths specified in x_col
                                            x_col="images", 
                                            y_col="labels", 
                                            class_mode="categorical",  # for multiclass
                                            target_size=(IMG_DIMS, IMG_DIMS),
                                            batch_size=BATCH_SIZE)
    return train_data

In [None]:
def get_valid_data():
    
    valid_datagen = ImageDataGenerator(rescale=1./255)
    valid_data = valid_datagen.flow_from_dataframe(dataframe=df_val, 
                                             directory=None, 
                                             x_col="images", 
                                             y_col="labels", 
                                             class_mode="categorical",
                                             shuffle= True,
                                             target_size=(IMG_DIMS, IMG_DIMS),
                                             batch_size=BATCH_SIZE)
    return valid_data

In [None]:
def get_test_data():
    
    test_datagen = ImageDataGenerator(rescale=1./255)
    test_data = test_datagen.flow_from_dataframe(dataframe=df_test, 
                                             directory=None, 
                                             x_col="images", 
                                             y_col="labels", 
                                             class_mode="categorical",
                                             shuffle= False,
                                             target_size=(IMG_DIMS, IMG_DIMS),
                                             batch_size=BATCH_SIZE)
    return test_data

## III. Training  
Utility functions (plotting, callbacks)

### Plot training performance

In [None]:
def display_training_curves(accuracy, loss, start_epoch):
       
    fig, ax = plt.subplots(2,1, figsize=(10, 8))
   

    for index,lcurve in enumerate(['accuracy', 'loss']):
    
        df_var = lcurve 
        data = vars()[df_var]
        ax[index].plot(data, color='b' if lcurve=='accuracy' else 'r')
        ax[index].set_title( 'Training {}'.format(lcurve))
        if start_epoch >= FE_EPOCHS: 
            ax[index].plot([FE_EPOCHS, FE_EPOCHS], 
                 plt.ylim(), linestyle='--', label='Fine tuning 1')
        if start_epoch >= FE_EPOCHS+FT_EPOCHS: 
            ax[index].plot([FE_EPOCHS+FT_EPOCHS, FE_EPOCHS+FT_EPOCHS], 
                 plt.ylim(), linestyle='--', label='Fine Tuning 2')
        ax[index].legend(loc='lower left')
    
    plt.xlabel('epochs')
    fig.suptitle('Training curves',
                 y=1.1, fontsize=16)
    fig.tight_layout()
    

### Callbacks

In [None]:
class LRFinder(Callback):
    
    '''
    The Learning Rate range test: a callback for finding the optimal learning rate range  
    This function will 
    
    # Usage
        ```
            lr_finder = LRFinder(min_lr=1e-5, 
                                 max_lr=1e-2, 
                                 steps_per_epoch=np.ceil(data_size/batch_size),  
                                 epochs=3
                                 beta=0.9)
            model.fit(X_train, Y_train, callbacks=[lr_finder])
            
            lr_finder.plot_loss()
        ```
    
    # Arguments
        min_lr: The lower bound of the learning rate  
        max_lr: The upper bound of the learning rate 
        steps_per_epoch: Number of iterations/mini-batches -- calculated as `np.ceil(data_size/batch_size)`. 
        epochs: Number of epochs to run experiment. Usually between 2 and 4 epochs is sufficient. 
        beta: the smoothing parameter. 0.99 ~ weighted over 100 previous values, 
                                       0.9 - 10 values.
        
    # Acknowledgements
        https://raw.githubusercontent.com/Meena-Mani/IDC_breast_cancer/master/lrate_callback.py
        Original paper: https://arxiv.org/abs/1506.01186

    '''
    
    def __init__(self, min_lr=1e-5, max_lr=1e-2, steps_per_epoch=None, epochs=None, beta=0.9):
        super().__init__()
        
        self.min_lr = min_lr
        self.max_lr = max_lr
        self.total_iterations = steps_per_epoch * epochs
        self.iteration = 0
        self.history = {}
        self.beta = beta
        
    def clr(self):
        '''Calculate the learning rate.'''
        #use log fn to sample very small values
        x = np.log(1 + self.iteration / self.total_iterations) #use log fn to sample very small values
        return self.min_lr + (self.max_lr-self.min_lr) * x
        
    def on_train_begin(self, logs=None):
        '''Initialize the learning rate to the minimum value at the start of training.'''
        logs = logs or {}
        tf.keras.backend.set_value(self.model.optimizer.lr, self.min_lr)
        
    def on_batch_end(self, epoch, logs=None):
        '''For every iteration, record batch statistics and update the learning rate.'''
        logs = logs or {}
        self.iteration += 1

        self.history.setdefault('lr', []).append(K.get_value(self.model.optimizer.lr))
        self.history.setdefault('iterations', []).append(self.iteration)

        for k, v in logs.items():
            self.history.setdefault(k, []).append(v)
            
        tf.keras.backend.set_value(self.model.optimizer.lr, self.clr())
 

    def smooth_fn(self, y):
        '''Helper function to smooth input over a weighted average.'''
        n = len(self.history['iterations'])
        beta_c = 1 - self.beta
        ewa = np.zeros(n)
        ewa_corrected = np.zeros(n)
        ewa_corrected[0] = ewa[0] = y[0]
        for i in range (1,n):
            ewa[i] = self.beta*ewa[i-1] + beta_c*y[i] 
            ewa_corrected[i] = ewa[i] / (1 - self.beta**n)
        return ewa_corrected

    def plot_lr(self):
        '''Helper function to quickly inspect the learning rate schedule.'''
        plt.figure(figsize=(10,6))
        plt.plot(self.history['iterations'], self.history['lr'])
        plt.yscale('log')
        plt.xlabel('Iteration')
        plt.ylabel('LR')
        plt.title("Learning rate")
        
    def plot_loss(self):
        '''Plot the loss versus the learning rate'''
        plt.figure(figsize=(10,6))
        smoothed_loss = self.smooth_fn(self.history['loss'])
        plt.plot(self.history['lr'][1::10], smoothed_loss[1::10])
        plt.xscale('log')
        plt.xlabel('LR (log scale)')
        plt.ylabel('Loss')
        plt.title("Loss vs Learning Rate")


In [None]:
# Callbacks


def lrfn(epoch, lr):
    
    if epoch == 1:
        lr = lr
    elif epoch<=3:
        lr = lr**1.1
    else :
        lr = lr / 2

    return lr


lr_scheduler = LearningRateScheduler(lrfn,verbose=1) 


checkpoint = ModelCheckpoint(filepath='/kaggle/working/best_weights.hdf5', 
                             save_best_only=True, save_weights_only=True)
lr_reduce = ReduceLROnPlateau(monitor='val_loss', factor=0.3, patience=2, verbose=2, mode='max')
early_stop = EarlyStopping(monitor='val_loss', min_delta=0.1, patience=1, mode='min')


### Transfer Learning step 1: Feature Extraction 

EfficientNet considerations:  
We use the EfficientNet B3 model since it is a mobile-sized architecture with 11M trainable parameters with a higher accuracy than models in this class.


Some strategies used for feature extraction:  
- small batch size for faster convergence to a good solution (batch size = 16)
- larger learning rate (we use the default Adam lr: lr = 0.001)
- no data augmentation
- do not rebalance the data with oversampling; use class weights 

In [None]:
df_train = get_train_set(rebalance=0)

train_data = get_train_data(augmentation=0)

class_weights = get_class_weights()

In [None]:
df_test = get_test_set()

In [None]:
plot_dataset_distribution(df_train, df_test)

### Model

In [None]:
# Create the base model from the pre-trained model EfficientNet B3

base_model = efn.EfficientNetB3(input_shape=IMG_SHAPE,
                                include_top=False, 
                                weights='imagenet',
                               )

In [None]:
base_model.trainable = False
#base_model.summary()

In [None]:
# build model

model = tf.keras.Sequential([
  base_model,
  layers.GlobalAveragePooling2D(),
  layers.Dense(NUM_CLASSES, activation='softmax'),
])


model.summary()
plot_model(model)

#### Find learning rate

In [None]:
# function to find/plot the learning rate using the LRFinder callback

def find_lr(MIN_LR=1e-5, MAX_LR=1e-2, STEPS=train_data.samples // BATCH_SIZE, 
            EPOCHS=3, CLASS_WEIGHTS={0: 1.0, 1: 3.3, 2: 4.3, 3: 1.4}, 
            beta = 0.99):
    
    lr_finder = LRFinder(min_lr=MIN_LR, 
                         max_lr=MAX_LR, 
                         steps_per_epoch=STEPS, 
                         epochs=EPOCHS,
                         beta = beta)
    
    history = model.fit(
        train_data,
        steps_per_epoch=train_data.samples // BATCH_SIZE,
        epochs=3,
        #workers=3,
        callbacks=[lr_finder],
        class_weight=CLASS_WEIGHTS)
    
    lr_finder.plot_loss()
    lr_finder.plot_lr()

In [None]:
OPTIMIZER = Adam()
model.compile(optimizer=OPTIMIZER, 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

find_lr(CLASS_WEIGHTS=class_weights)

#### Feature Extraction

In [None]:
OPTIMIZER = Adam(lr=0.001)
model.compile(optimizer=OPTIMIZER, 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

In [None]:
history_FE = model.fit(
           train_data, 
           steps_per_epoch= train_data.samples // BATCH_SIZE, 
           epochs=FE_EPOCHS,
           class_weight=class_weights, 
           #workers=4,
           #validation_data=valid_data, 
           #validation_steps=valid_data.samples // BATCH_SIZE,
           callbacks=[checkpoint])

model.save_weights(FE_FILE)

In [None]:
# plot training curves
    
accuracy = history_FE.history['accuracy']
loss = history_FE.history['loss']
display_training_curves(accuracy, loss, start_epoch=0)

### Transfer Learning step 2: Fine-tuning

This is the second stage of transfer learning. We unfreeze some fraction of the layers and fit the model using smaller learning rate (Adam lr = 0.0001).

We address the data imbalance by:

* rebalancing the training dataset (oversampling with copies of DME and DRUSEN)
* augmenting the data
* using class weights

In [None]:
df_train = get_train_set(rebalance=1)
train_data = get_train_data(augmentation=1)
    
class_weights = get_class_weights()
    
plot_dataset_distribution(df_train, df_test)

The training dataset is now more balanced (but not completely equal).

In [None]:
# set layers from 'block7a_se_excite' trainable
    
print("Number of layers in the base model: ", len(base_model.layers))
# plot_model(base_model)


base_model.trainable = True

#set_trainable = False
for layer in base_model.layers:
       #if layer.name == 'block7a_se_excite':
      #  set_trainable = True
    #if set_trainable:
    if not isinstance(layer, layers.BatchNormalization):
            layer.trainable = True
    else:
        layer.trainable = False

model.summary()

After unfreezing **block7a_se_excite** and its successive layers (this is roughly the top 10% of the layers), the number of trainable parameters is now 3.38M.

#### Train for the first 5 epochs (FT_1)

In [None]:
OPTIMIZER = Adam(lr=1e-4)
model.compile(optimizer=OPTIMIZER, 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

In [None]:
history_FT_1 = model.fit(
            train_data, 
            steps_per_epoch = train_data.samples // BATCH_SIZE, 
            epochs = FT_EPOCHS,
            class_weight = class_weights, 
            #workers = 4,
            #validation_data=valid_data, 
            #validation_steps=valid_data.samples // BATCH_SIZE,
            callbacks=[checkpoint, lr_scheduler])
    
model.save_weights(FT_FILE)

In [None]:
# plot FE and FT learning curves 

accuracy += history_FT_1.history['accuracy']
loss += history_FT_1.history['loss']
display_training_curves(accuracy, loss, start_epoch=FE_EPOCHS)

#### Train for another 5 epochs (FT_2)

In [None]:
if TRAIN_FT_2:
    
    OPTIMIZER = Adam(lr=1e-4)
    model.compile(optimizer=OPTIMIZER, 
                  loss='categorical_crossentropy', 
                  metrics=['accuracy'])

In [None]:
if TRAIN_FT_2:
    
    history_FT_2 = model.fit(
                train_data, 
                steps_per_epoch = train_data.samples // BATCH_SIZE, 
                epochs = FT_EPOCHS,
                class_weight = class_weights, 
                #workers = 4,
                #validation_data=valid_data, 
                #validation_steps=valid_data.samples // BATCH_SIZE,
                callbacks=[checkpoint, lr_scheduler])

In [None]:
# plot learning curves for FE, FT_1, FT_2

if TRAIN_FT_2:
    
    accuracy += history_FT_2.history['accuracy']
    loss += history_FT_2.history['loss']
    display_training_curves(accuracy, loss, start_epoch=FE_EPOCHS+FT_EPOCHS)

## IV. Test Performance

In [None]:
# for test resultes only
# model.load_weights(MODEL_WEIGHTS_PATH+FT_FILE)

In [None]:
def plot_confusion_matrix(cm, 
                          classes,
                          title='Confusion matrix',
                          cmap=plt.cm.Purples):
   

    accuracy = np.trace(cm) / float(np.sum(cm))
    misclass = 1 - accuracy  
    
    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45, fontsize=15)
    plt.yticks(tick_marks, classes, fontsize=15)
    
    plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'
           .format(accuracy, misclass),fontsize=15)
    plt.ylabel('True label', fontsize=15)
    plt.title(title, fontsize=22);
    plt.colorbar()

    thresh = cm.max() / 2.0
    for i, j in product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, "{:,}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
   

In [None]:
# print the model used for the test results 

if TRAIN_FT_2:
    print("Test results for the fine tuning model trained for 5+5 epochs")
else:
    print("Test results for the fine tuning model trained for 5 epochs")    


In [None]:
#df_test = get_test_set()
test_data = get_test_data()

In [None]:
y_prob = model.predict(test_data, steps=len(test_data), verbose=1)
y_pred = np.argmax(y_prob,axis=1)
y_true = df_test['labels'].astype('int64').to_numpy()

In [None]:
# print results

print(classification_report(y_true, y_pred))
print('The accuracy is {}'.format(accuracy_score(y_true, y_pred)))

In [None]:
# confusion matrix
    
cm = confusion_matrix(y_true,y_pred)
classes = np.array(CLASSES)
title = 'Confusion matrix of results'
plot_confusion_matrix(cm, classes, title)