In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf

import numpy as np
import datetime
import time
import os
import pathlib
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

# Some stuff to make utils-function work
import sys
sys.path.append('../utils')
from pipeline import create_dataset, split_and_create_dataset, prepare_for_training
from create_model import create_model, create_callbacks, get_class_weights
from utils import write_to_file, unpipe
%load_ext autoreload
%autoreload 2

# Jupyter-specific
%matplotlib inline

project_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

In [None]:
data_dir = pathlib.Path('/home/henriklg/master-thesis/data/hyper-kvasir/labeled_ttv/')
unlab_dir = pathlib.Path('/home/henriklg/master-thesis/data/hyper-kvasir/unlabeled_ttv/')

dir_name = "none"
experiment = "weights"
log_dir = "./logs/{}_{}/{}".format(project_time, experiment, dir_name)

conf = {
    # Dataset
    "data_dir": data_dir,
    "unlab_dir": unlab_dir,
    "ds_info": 'hypkva',
    "augment": ["crop","flip","brightness","saturation","contrast","rotate"],
    "aug_mult": 0.5,
    "resample": True,
    "class_weight": False,
    "shuffle_buffer_size": 2000,        # 0=no shuffling
    "seed": 2511,
    "neg_class": None,                 # select neg class for binary ds (normal class)
    "outcast": None,                   # list of folders to drop - currently only works for 1 item
    # Model
    "model": 'EfficientNetB0',
    "weights": None,                   # which weights to initialize the model with
    "dropout": 0.2,
    "num_epochs": 20,
    "batch_size": 16,
    "img_shape": (128, 128, 3),
    "learning_rate": 0.001,
    "optimizer": 'Adam',
    "final_activation": 'softmax',     # sigmoid for binary ds
    # Callbacks
    "tensorboard": False,
    "learning_schedule": False,
    "decay_rate": 0,                   # 128:0.25   64:1.0   32:4.0   16:16   8:64
    "checkpoint": False,
    "early_stopp": False,
    "early_stopp_patience": 7,
    # Misc
    "verbosity": 0,
    "keep_threshold": 0.0,
    "log_dir": log_dir,
    "cache_dir": "./cache"
    }

In [None]:
ds = create_dataset(conf)

history_list = []
weights_list = [None, "imagenet", "noisy-student"]

In [None]:
from model_evaluation import get_classification_report
from model_evaluation import get_metrics, get_confusion_matrix
from model_evaluation import show_dataset_predictions
from model_evaluation import plot_confusion_matrix, plot_lr_and_accuracy

# Create true_labels and pred_labels for later evaluations
eval_ds = unpipe(ds["val"], conf["ds_sizes"]["val"]).as_numpy_iterator()
eval_ds = np.array(list(eval_ds))
true_labels = list(eval_ds[:,1])
eval_images = np.stack(eval_ds[:,0], axis=0)

def evaluate_model(model, history, ds, conf):
    
    # Save the metrics from training
    write_to_file(history.history, conf, "history")
    write_to_file(conf, conf, "conf")
    with open(conf["log_dir"]+"/history_list.pkl", 'wb') as f:
        pickle.dump(history_list, f)
    
    # Evaluate model on test dataset
    model_evaluation = model.evaluate(ds["val"], verbose=2, steps=conf["steps"]["val"])
    write_to_file(model_evaluation, conf, "evaluate_val")
    
    # Create predictions and pred_labels
    predictions = model.predict(eval_images, verbose=1)
    pred_confidence = [np.max(pred) for pred in predictions]
    pred_labels = [np.argmax(pred) for pred in predictions]
    
    # Classification report
    report = get_classification_report(
            true_labels, 
            pred_labels, 
            range(conf["num_classes"]), 
            target_names=conf["class_names"]
    )
    print (report)
    write_to_file(report, conf, "classification_report")

    # Confusion matrix
    cm = get_confusion_matrix(true_labels, pred_labels)
    plot_confusion_matrix(cm, conf["log_dir"], conf["class_names"], figsize=(12,10), show=False)

## Create training history

In [None]:
start_time = time.time()

for weights in weights_list:
    log_dir = "./logs/{}_{}/{}".format(project_time, experiment, str(weights).lower())
    pathlib.Path(log_dir).mkdir(parents=True, exist_ok=True)
    conf["log_dir"] = log_dir
    conf["weights"] = weights
    print ("\n\n----------{}----------".format(weights))
    
    model = create_model(conf)
    callbacks = create_callbacks(conf)
    
    history = model.fit(
            ds["train"],
            steps_per_epoch = conf["steps"]["train"],
            epochs = conf["num_epochs"],
            validation_data = ds["test"],
            validation_steps = conf["steps"]["test"],
            validation_freq = 1,
            callbacks = callbacks,
            verbose = 1
    )

    # Save the metrics from training
    evaluate_model(model, history, ds, conf)
    history_list.append(history.history)
        
seconds = time.time() - start_time
minutes = seconds/60
hours = minutes/60
print ("Run for {:.2f} hours.".format(hours))

## Get previous results

## Plot

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

SMALL_SIZE = 12
MEDIUM_SIZE = 14
BIGGER_SIZE = 16

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=BIGGER_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=MEDIUM_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

In [None]:
x = range(conf["num_epochs"])

# Plot train-val accuracy and loss
plt.figure(figsize=(14, 6))

# Subplot 1
plt.subplot(1, 2, 1)
for hist in history_list:
    plt.plot(x, hist['val_sparse_categorical_accuracy'])
plt.legend([str(w).lower() for w in weights_list], loc='lower right')
plt.ylim([0, 1])
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Validation Accuracy')

# Subplot 2
plt.subplot(1, 2, 2)
for hist in history_list:
    plt.plot(x, hist['val_loss'])
plt.legend([str(w).lower() for w in weights_list], loc='upper right')
plt.ylim([0.0, 3])
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Validation Loss')

plt.tight_layout()
plt.savefig('figures/model_weights_study_val.pdf', format='pdf')
plt.show()

In [None]:
# Plot train-val accuracy and loss
plt.figure(figsize=(14, 6))

# Subplot 1
plt.subplot(1, 2, 1)
for hist in history_list:
    plt.plot(x, hist['sparse_categorical_accuracy'])
plt.legend([str(w).lower() for w in weights_list], loc='lower right')
plt.ylim([0, 1])
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training Accuracy')

# Subplot 2
plt.subplot(1, 2, 2)
for hist in history_list:
    plt.plot(x, hist['loss'])
plt.legend([str(w).lower() for w in weights_list], loc='upper right')
plt.ylim([0.0, 3])
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss')

plt.tight_layout()
plt.savefig('figures/model_weights_study_train.pdf', format='pdf')
plt.show()