Run three experiments. One with no class imbalance measures, one with resampling and one with class_weights. Mease training history and compute f1-metrics

In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf

import numpy as np
import datetime
import time
import os
import pathlib
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

# Some stuff to make utils-function work
import sys
sys.path.append('../utils')
from pipeline import create_dataset, split_and_create_dataset, prepare_for_training
from create_model import create_model, create_callbacks, get_class_weights
from utils import write_to_file, unpipe
%load_ext autoreload
%autoreload 2

# Jupyter-specific
%matplotlib inline

project_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

In [2]:
data_dir = pathlib.Path('/home/henriklg/master-thesis/data/hyper-kvasir/labeled_ttv/')

dir_name = "baseline"
experiment = "resample"
log_dir = "./logs/{}_{}/{}".format(project_time, experiment, dir_name)

conf = {
    # Dataset
    "data_dir": data_dir,
    "ds_info": 'hypkva',
    "augment": ["xcrop","flip","brightness","saturation","contrast","rotate"],
    "aug_mult": 0.2,
    "resample": False,
    "class_weight": False,
    "shuffle_buffer_size": 2000,        # 0=no shuffling
    "seed": 2511,
    "neg_class": None,                 # select neg class for binary ds (normal class)
    "outcast": None,                   # list of folders to drop - currently only works for 1 item
    # Model
    "model": 'EfficientNetB0',
    "weights": None,                   # which weights to initialize the model with
    "dropout": 0.1,
    "num_epochs": 15,
    "batch_size": 64,
    "img_shape": (128, 128, 3),
    "learning_rate": 0.001,
    "optimizer": 'Adam',
    "final_activation": 'softmax',     # sigmoid for binary ds
    # Callbacks
    "tensorboard": False,
    "learning_schedule": False,
    "decay_rate": 0,                   # 128:0.25   64:1.0   32:4.0   16:16   8:64
    "checkpoint": False,
    "early_stopp": False,
    "early_stopp_patience": 7,
    # Misc
    "verbosity": 0,
    "keep_threshold": 0.0,
    "log_dir": log_dir,
    "cache_dir": "./cache"
    }

In [3]:
ds = create_dataset(conf)

history_dict = {}
evaluate_list = []

Category                    : train | test  | val   | total | % of total 
------------------------------------------------------------------------
barretts-short-segment      :    37 |     8 |     8 |    53 |  0.50%
retroflex-stomach           :   534 |   115 |   115 |   764 |  7.17%
ulcerative-colitis-0-1      :    24 |     5 |     6 |    35 |  0.33%
ulcerative-colitis-grade-3  :    93 |    20 |    20 |   133 |  1.25%
esophagitis-b-d             :   182 |    39 |    39 |   260 |  2.44%
dyed-resection-margins      :   692 |   148 |   149 |   989 |  9.28%
hemorrhoids                 :     4 |     1 |     1 |     6 |  0.06%
normal-z-line               :   652 |   140 |   140 |   932 |  8.74%
esophagitis-a               :   282 |    60 |    61 |   403 |  3.78%
ulcerative-colitis-1-2      :     7 |     2 |     2 |    11 |  0.10%
barretts                    :    28 |     6 |     7 |    41 |  0.38%
bbps-2-3                    :   803 |   172 |   173 |  1148 | 10.77%
ileum                    

In [4]:
from model_evaluation import get_classification_report
from model_evaluation import get_metrics, get_confusion_matrix
from model_evaluation import show_dataset_predictions
from model_evaluation import plot_confusion_matrix, plot_lr_and_accuracy

# Create true_labels and pred_labels for later evaluations
eval_ds = unpipe(ds["val"], conf["ds_sizes"]["val"]).as_numpy_iterator()
eval_ds = np.array(list(eval_ds))
true_labels = list(eval_ds[:,1])
eval_images = np.stack(eval_ds[:,0], axis=0)

def evaluate_model(model, history, ds, conf):
    
    # Save the metrics from training
    write_to_file(history.history, conf, "history")
    write_to_file(conf, conf, "conf")
    with open(conf["log_dir"]+"/history_dict.pkl", 'wb') as f:
        pickle.dump(history_dict, f)
    
    # Evaluate model on test dataset
    model_evaluation = model.evaluate(ds["val"], verbose=2, steps=conf["steps"]["val"])
    write_to_file(model_evaluation, conf, "evaluate_val")
    evaluate_list.append(model_evaluation[-1])
    
    # Create predictions and pred_labels
    predictions = model.predict(eval_images, verbose=1)
    pred_confidence = [np.max(pred) for pred in predictions]
    pred_labels = [np.argmax(pred) for pred in predictions]
    
    # Classification report
    report = get_classification_report(
            true_labels, 
            pred_labels, 
            range(conf["num_classes"]), 
            target_names=conf["class_names"]
    )
    print (report)
    write_to_file(report, conf, "classification_report")

    # Confusion matrix
    cm = get_confusion_matrix(true_labels, pred_labels)
    plot_confusion_matrix(cm, conf["log_dir"], conf["class_names"], figsize=(12,10), show=False)

## Baseline

In [5]:
model = create_model(conf)
callbacks = create_callbacks(conf)

history = model.fit(
        ds["train"],
        steps_per_epoch = conf["steps"]["train"],
        epochs = conf["num_epochs"],
        validation_data = ds["test"],
        validation_steps = conf["steps"]["test"],
        validation_freq = 1,
        callbacks = callbacks,
        class_weight = None,
        verbose = 1
)

history_dict["baseline"] = history.history
evaluate_model(model, history, ds, conf)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnet-b0 (Model)      (None, 4, 4, 1280)        4049564   
_________________________________________________________________
global_average_pooling2d (Gl (None, 1280)              0         
_________________________________________________________________
dropout (Dropout)            (None, 1280)              0         
_________________________________________________________________
dense (Dense)                (None, 512)               655872    
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 23)                11799     
Total params: 4,717,235
Trainable params: 4,675,219
Non-trainable params: 42,016
_________________________________________

## Weighted

In [None]:
conf["log_dir"] = "./logs/{}_{}/{}".format(project_time, experiment, "weighted")
pathlib.Path(conf["log_dir"]).mkdir(parents=True, exist_ok=True)
conf["class_weight"] = True

model = create_model(conf)
callbacks = create_callbacks(conf)
class_weights = get_class_weights(ds["train"], conf)

history = model.fit(
        ds["train"],
        steps_per_epoch = conf["steps"]["train"],
        epochs = conf["num_epochs"],
        validation_data = ds["test"],
        validation_steps = conf["steps"]["test"],
        validation_freq = 1,
        callbacks = callbacks,
        class_weight = class_weights,
        verbose = 1
)

history_dict["class_weight"] = history.history
evaluate_model(model, history, ds, conf)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnet-b0 (Model)      (None, 4, 4, 1280)        4049564   
_________________________________________________________________
global_average_pooling2d_1 ( (None, 1280)              0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 1280)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 512)               655872    
_________________________________________________________________
dropout_3 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 23)                11799     
Total params: 4,717,235
Trainable params: 4,675,219
Non-trainable params: 42,016
_______________________________________

## Resample

In [None]:
# Remove the cache
!rm -rf ./cache/*

In [None]:
conf["log_dir"] = "./logs/{}_{}/{}".format(project_time, experiment, "resampled")
pathlib.Path(conf["log_dir"]).mkdir(parents=True, exist_ok=True)

conf["resample"] = True
conf["class_weight"] = False
ds = create_dataset(conf)

model = create_model(conf)
callbacks = create_callbacks(conf)

history = model.fit(
        ds["train"],
        steps_per_epoch = conf["steps"]["train"],
        epochs = conf["num_epochs"],
        validation_data = ds["test"],
        validation_steps = conf["steps"]["test"],
        validation_freq = 1,
        callbacks = callbacks,
        class_weight = None,
        verbose = 1
)

history_dict["resample"] = history.history
evaluate_model(model, history, ds, conf)

## Get previous results

# Plotting

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

SMALL_SIZE = 12
MEDIUM_SIZE = 14
BIGGER_SIZE = 16

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=BIGGER_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=MEDIUM_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

In [None]:
x = range(conf["num_epochs"])
legends = list(history_dict.keys())

# Plot train-val accuracy and loss
plt.figure(figsize=(14, 6))

# Subplot 1
plt.subplot(1, 2, 1)
for (name, history) in history_dict.items():
    plt.plot(x, history['val_sparse_categorical_accuracy'])
plt.legend(legends, loc='lower right')
plt.ylim([0, 1])
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Validation Accuracy')

# Subplot 2
plt.subplot(1, 2, 2)
for (name, history) in history_dict.items():
    plt.plot(x, history['val_loss'])
plt.legend(legends, loc='upper right')
plt.ylim([0.0, 4])
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Validation Loss')

plt.tight_layout()
plt.savefig('figures/resample_vs_weighted_val.pdf', format='pdf')
plt.show()

In [None]:
# Plot train-val accuracy and loss
plt.figure(figsize=(14, 6))

# Subplot 1
plt.subplot(1, 2, 1)
for (name, history) in history_dict.items():
    plt.plot(x, history['sparse_categorical_accuracy'])
plt.legend(legends, loc='lower right')
plt.ylim([0, 1])
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training Accuracy')

# Subplot 2
plt.subplot(1, 2, 2)
for (name, history) in history_dict.items():
    plt.plot(x, history['loss'])
plt.legend(legends, loc='upper right')
plt.ylim([0.0, 4])
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss')

plt.tight_layout()
plt.savefig('figures/resample_vs_weighted_train.pdf', format='pdf')
plt.show()