In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf

import numpy as np
import datetime
import time
import os
import pathlib
import matplotlib.pyplot as plt
import pickle

# Some stuff to make utils-function work
import sys
sys.path.append('../utils')
from pipeline import create_dataset, split_and_create_dataset, prepare_for_training
from utils import show_image, class_distribution
from utils import print_split_info, unpipe, tf_bincount, checkout_dataset
%load_ext autoreload
%autoreload 2


from create_model import create_model
from create_model import create_callbacks
from utils import write_to_file
from utils import get_class_weights

# Jupyter-specific
%matplotlib inline

project_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

In [None]:
data_dir = pathlib.Path('/home/henriklg/master-thesis/data/hyper-kvasir/labeled_ttv/')
unlab_dir = pathlib.Path('/home/henriklg/master-thesis/data/hyper-kvasir/unlabeled_ttv/')

model_name = "EfficientNetB0"
experiment = "_size_study_noweights"
log_dir = "./logs/{}{}/{}".format(project_time, experiment, model_name)

conf = {
    # Dataset
    "data_dir": data_dir,
    "unlab_dir": unlab_dir,
    "log_dir": log_dir,
    "cache_dir": "./cache",
    "ds_info": 'hypkva',
    "augment": ["crop","flip","brightness","saturation","contrast","rotate"],
    "aug_mult": 0.5,
    "resample": True,
    "class_weight": False,
    "shuffle_buffer_size": 2000,        # 0=no shuffling
    "seed": 2511,
    "neg_class": None,                 # select neg class for binary ds (normal class)
    "outcast": None,                   # list of folders to drop - currently only works for 1 item
    # Model
    "model_name": model_name,
    "model": 'EfficientNetB0',
    "weights": None,                   # which weights to initialize the model with
    "dropout": 0.2,
    "num_epochs": 40,
    "batch_size": 16,
    "img_shape": (128, 128, 3),
    "learning_rate": 0.001,
    "optimizer": 'Adam',
    "final_activation": 'softmax',     # sigmoid for binary ds
    # Callbacks
    "tensorboard": False,
    "learning_schedule": False,
    "decay_rate": 0,                   # 128:0.25   64:1.0   32:4.0   16:16   8:64
    "checkpoint": False,
    "early_stopp": False,
    "early_stopp_patience": 7,
    # Misc
    "verbosity": 0,
    "keep_threshold": 0.0
    }

In [None]:
ds = create_dataset(conf)

history_list = []
# models = ["B0", "B1", "B2", "B3", "B4", "B5", "B6", "B7"]
models = ["EfficientNetB0", "EfficientNetB2", "EfficientNetB4", "EfficientNetB6"]

## Create training history

In [None]:
start_time = time.time()

for model_name in models:
    log_dir = "./logs/{}{}/{}".format(project_time, experiment, model_name)
    pathlib.Path(log_dir).mkdir(parents=True, exist_ok=True)
    
    conf["model_name"] = model_name
    conf["log_dir"] = log_dir
    conf["model"] = model_name
    print ("\n\n----------{}----------".format(conf["model"]))
    
    model = create_model(conf)
    callbacks = create_callbacks(conf)
    
    history = model.fit(
            ds["train"],
            steps_per_epoch = conf["steps"]["train"],
            epochs = conf["num_epochs"],
            validation_data = ds["test"],
            validation_steps = conf["steps"]["test"],
            validation_freq = 1,
            callbacks = callbacks,
            verbose = 1
    )

    # Save the metrics from training
    write_to_file(history.history, conf, "history")
    write_to_file(conf, conf, "conf")
    history_list.append(history.history)
    with open(conf["log_dir"]+"/history_list.pkl", 'wb') as f:
        pickle.dump(history_list, f)
        
seconds = time.time() - start_time
minutes = seconds/60
hours = minutes/60
print ("Run for {:.2f} hours.".format(hours))

## Get previous results

## Plot

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

SMALL_SIZE = 12
MEDIUM_SIZE = 14
BIGGER_SIZE = 16

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=BIGGER_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=MEDIUM_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

In [None]:
x = range(conf["num_epochs"])

# Plot train-val accuracy and loss
plt.figure(figsize=(14, 6))

# Subplot 1
plt.subplot(1, 2, 1)
for hist in history_list:
    plt.plot(x, hist['val_sparse_categorical_accuracy'])
plt.legend(models, loc='lower right')
plt.ylim([0, 1])
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Validation Accuracy')

# Subplot 2
plt.subplot(1, 2, 2)
for hist in history_list:
    plt.plot(x, hist['val_loss'])
plt.legend(models, loc='upper right')
plt.ylim([0.0, 3])
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Validation Loss')

plt.tight_layout()
plt.savefig('figures/model_size_study_val.pdf', format='pdf')
plt.show()

In [None]:
history_list[0]['val_sparse_categorical_accuracy'][-1] - history_list[-1]['val_sparse_categorical_accuracy'][-1]

In [None]:
history_list[0]['sparse_categorical_accuracy'][-1] - history_list[-1]['sparse_categorical_accuracy'][-1]