## Train a model

The objective of this notebook is train and evaluate a given model specified in the parameters file.

In [None]:
# For Development and debugging:
# Reload modul without restarting the kernel
#%load_ext autoreload
#%autoreload 2

In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
import pandas as pd
pd.options.display.max_columns = None
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os
import shutil
import json
import math
import time
from datetime import datetime
import logging
import socket

# Set terminal output (to send mesages to the terminal stdout)
terminal_output = open('/dev/stdout', 'w')
print('Execution of Notebook started at {}'.format(datetime.now()), file=terminal_output)

In [None]:
# Load external libraries
key = 'external_libs_path'
if socket.gethostname() == 'hughes-machine':
    external_libs_path = '/home/hhughes/Documents/Master_Thesis/Project/workspace/libs'
else:
    external_libs_path= '/storage/groups/ml01/code/andres.becker/master_thesis/workspace/libs'
print('External libs path: \n'+external_libs_path, file=terminal_output)

if not os.path.exists(external_libs_path):
    msg = 'External library path {} does not exist!'.format(external_libs_path)
    raise Exception(msg)

# Add EXTERNAL_LIBS_PATH to sys paths (for loading libraries)
sys.path.insert(1, external_libs_path)
# Load external libraries
#from Utils import lr_schedule_Callback
#from Utils import save_best_model_Callback
import Utils as utils
from Utils import evaluate_model
import Data_augmentation
from Models_V2 import Individual_Model_Training
# load function to print in the here and in the log file at the same time
from Utils import print_stdout_and_log as printc
from Utils import set_GPU_config as set_GPU_config
import tfds_utils

from Costum_Callbacks import set_tensorboard as set_tensorboard_CB
from Costum_Callbacks import save_best_model_base_on_CMA_Callback as CMA_CB
#from Costum_Callbacks import save_best_model_weights_in_memory_Callback as save_w_and_b
from Costum_Callbacks import print_progress_to_log as train_progress_to_log_CB

Load model parameters:

In [None]:
# Do not touch the value of PARAMETERS_FILE!
# When this notebook is executed with jupyter-nbconvert (from script), 
# it will be replaced outomatically
#PARAMETERS_FILE = '/home/hhughes/Documents/Master_Thesis/Project/workspace/scripts/Parameters/model_params/local/Quick_test_local.json'
PARAMETERS_FILE = 'dont_touch_me-input_parameters_file'

# Open parameters
if os.path.exists(PARAMETERS_FILE):
    with open(PARAMETERS_FILE) as file:
        p = json.load(file)
else:
    raise Exception('Parameter file {} does not exist!'.format(PARAMETERS_FILE))

# IMPORTANT
# All outputs are saved using the model name and the name of the parameters file
# For instance, if model='ResNet50V2' and param file='test_1.json', then
# the model will be saved saved at p['model_path']/ResNet50V2/test_1

# Save parameter file path
p['parameters_file_path'] = PARAMETERS_FILE
p['external_libs_path'] = external_libs_path
# check and set default parameters
p, info = utils.set_model_default_parameters(p)
print(info)
print(p.keys())

Set logging:

In [None]:
# Set logging configuration
logging.basicConfig(
    filename=p['log_file'],
    filemode='w', 
    level=getattr(logging, 'INFO')
)
logging.info('Parameters loaded from file:\n{}'.format(PARAMETERS_FILE))
msg = 'Log file: '+p['log_file']
# print selected parameters into the log
logging.info(info)
# Print location of the log file into the terminal
print(msg, file=terminal_output)

In [None]:
# Set GPU config
set_GPU_config(p['disable_gpu'], p['set_memory_growth'])

Create dirs where model output will be saved:

In [None]:
# If you want to avoid cleaning (deleting) model dir, then uncomment the next line:
#p['clean_model_dir'] = 0

base_path, model_path, checkpoints_path = utils.create_model_dirs(parameters=p)

msg = 'Base path:\n{}'.format(base_path)
msg += '\nModel path:\n{}'.format(model_path)
msg += '\nCheckpoints path:\n{}'.format(checkpoints_path)
printc(msg)

# 1.- Dataset

## 1.1.- Load the dataset

In [None]:
# Path where tf datasets are
dataset, ds_info = tfds.load(
    name=p['tf_ds_name'], 
    data_dir=p['local_tf_datasets'], 
    # If False, returns a dictionary with all the features
    as_supervised=True, 
    shuffle_files=p['shuffle_files'],
    with_info=True)

# Load splits
train_data, val_data = dataset['train'], dataset['validation']

msg = 'Tensorflow dataset {} loaded from:\n{}'.format(p['tf_ds_name'], p['local_tf_datasets'])
printc(msg)

In [None]:
print(ds_info.description)

In [None]:
ds_info.splits

In [None]:
ds_info.features

In [None]:
# Load TFDS metadata
tfds_metadata = tfds_utils.Costum_TFDS_metadata().load_metadata(ds_info.data_dir)
tfds_metadata.keys()

In [None]:
tfds_metadata['channels_df']

In [None]:
tfds_metadata['metadata_df']

## 1.2.- Data preprocessing, data augmentation techniques

Before training the network, we discriminate some channels, apply some linear transformations (90deg rotations and horizontal flipping) to augment the **Training** dataset, create the batches and shuffle them. Also, we perform other operations to improve performance.

**Tune performance**<br>
tf.data.Dataset.prefetch overlaps data preprocessing and model execution while training.
It can be used to decouple the time when data is produced from the time when data is consumed. In particular, the transformation uses a background thread and an internal buffer to prefetch elements from the input dataset ahead of the time they are requested. The number of elements to prefetch should be equal to (or possibly greater than) the number of batches consumed by a single training step. You could either manually tune this value, or set it to **tf.data.experimental.AUTOTUNE** which will prompt the tf.data runtime to tune the value dynamically at runtime.

**Shuffling**<br>
dataset.shuffle() Randomly shuffles the elements of this dataset.
This dataset fills a buffer with `buffer_size` elements, then randomly samples elements from this buffer, replacing the selected elements with new elements. For perfect shuffling, a buffer size greater than or equal to the full size of the dataset is required.

For instance, if your dataset contains 10,000 elements but buffer_size is set to 1,000, then `shuffle` will initially select a random element from only the first 1,000 elements in the buffer. Once an element is selected, its space in the buffer is replaced by the next (i.e. 1,001-st) element, maintaining the 1,000 element buffer.

**reshuffle_each_iteration** controls whether the shuffle order should be different for each epoch.

### Specify input channels

In [None]:
# Load Channels file
selected_channels = p['input_channels']
msg = 'Selected input channels:\n{}'.format(selected_channels)
logging.info(msg)
print(msg)

# Get selected channel ids
input_ids = np.array(tfds_metadata['channels_df'].set_index(['name']).loc[selected_channels].TFDS_channel_id.values)
input_ids = input_ids.astype(np.int16)
msg = 'Corresponding input channel ids:\n{}'.format(input_ids)
printc(msg)
printc('\nNumber of input channels to use in the model:\n{}'.format(input_ids.shape[0]))

### Apply preprocessing and data augemntation

In [None]:
# if cell size is sampled from a normal distribution, calculate the parameters 
# of the distribution using the train set
if (p['CenterZoom_mode'] == 'random_normal') and p['CenterZoom']:
    
    mask = (tfds_metadata['metadata_df'].set == 'train')
    p['cell_size_ratio_mean'] = tfds_metadata['metadata_df'][mask].cell_size_ratio.mean()
    p['cell_size_ratio_stddev'] = tfds_metadata['metadata_df'][mask].cell_size_ratio.std()
    p['cell_size_ratio_low_bound'] = tfds_metadata['metadata_df'][mask].cell_size_ratio.min()
    msg = '\nrandom_normal selected as CenterZoom_mode. Distribution parameters:'
    msg += '\nmean: {}, stddev: {}, lower bound: {}'.format(p['cell_size_ratio_mean'], p['cell_size_ratio_stddev'], p['cell_size_ratio_low_bound'])
    printc(msg)
    
    hue_order = ['G1', 'S', 'G2']
    sns.kdeplot(data=tfds_metadata['metadata_df'][mask],
            x='cell_size_ratio',
            hue='cell_cycle',
            hue_order=hue_order,
            shade=True,
            bw_method=0.2
           )
    plt.title('cell_size_ratio distribution')

Take a look into one image and the selected data augmentation techniques:

In [None]:
# Get one batch to print a cell
images, targets = next(iter(val_data))

# add an extra dim (like a batch)
image = tf.expand_dims(images, axis=0)
Data_augmentation.visualize_data_augmentation(image, p)

In [None]:
# Get some necessary stuff for data preprocessing (projection tensor to filter the input channels)
input_shape = np.array(ds_info.features['image'].shape)
projection_tensor = Data_augmentation.get_projection_tensor(input_shape, input_ids)

# Prepare train and validation datasets
train_data, val_data = Data_augmentation.prepare_train_and_val_TFDS(train_data,
                                                                    val_data,
                                                                    projection_tensor,
                                                                    p)

# 3.- Model Selection

Models are selected from a group of predefined models in the class `Predef_models` (in `Models.py`). The name of the selected model is specified in the parameter `p['model_method']`.

First we need to init the `Predef_models` class:

In [None]:
img_shape = ds_info.features['image'].shape[:-1] + (input_ids.shape[0],)

# init model class
temp_run = Individual_Model_Training()

# init model architectur
temp_run.init_model(arch_name=p['model_name'],
                    input_shape=img_shape,
                    conv_reg=p['conv_reg'],
                    dense_reg=p['dense_reg'],
                    bias_l2_reg=p['bias_l2_reg'],
                    pre_training=p['pre_training'],
                    return_custom_model=p['custom_model_class']
                   )

# select loss function and build the model
temp_run.build_model(loss_name=p['loss'], learning_rate=p['learning_rate'])

### Set model callbacks

In [None]:
# Callback to print train progren in log file
train_progress_to_log = train_progress_to_log_CB(p['number_of_epochs'])
temp_run.callbacks.append(train_progress_to_log)

# tensorboard callbacks
if p['tensorboard']:
    tensorboard = set_tensorboard_CB(log_path=p['log_path'], log_dir_name=p['model_name'])
    temp_run.callbacks.append(tensorboard)

# set Center moving average callback
avg_sizes = [11, 21, 31]
monitor='val_mean_absolute_error'
loss_CMA = CMA_CB(monitor, avg_sizes)
temp_run.callbacks.append(loss_CMA)

# Early stop
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_mean_absolute_error', 
                                              mode='min', 
                                              patience=p['early_stop_patience'],
                                              min_delta=0,
                                              restore_best_weights=False,
                                              verbose=1
                                             )
temp_run.callbacks.append(early_stop)

# print loaded callbacks
msg = 'Loaded callbacks:\n{}'.format(temp_run.callbacks)
printc(msg)

### Fit the model

In [None]:
temp_run.fit_model(train_data, val_data, p['number_of_epochs'], p['verbose_level'])

### Plot Loss

In [None]:
# rename some vars to make it easier
history = temp_run.history.history
metrics = temp_run.metrics

In [None]:
utils.plot_train_metrics(history=history, metrics=['loss']+metrics, p=p, figsize=(15,23))

In [None]:
msg = 'Saiving trained model'
logging.info(msg)

# Save history
with open(os.path.join(base_path, 'history.json'), 'w') as file:
    json.dump(history, file, indent=4)
    
# Save CMA history
# First wee need to convert from np.int64 and np.float64 to regular python int and float
temp_dict = {}
for key in loss_CMA.CMA_history.keys():
    temp_dict[key] = [[int(item[0]), float(item[1])] for item in loss_CMA.CMA_history[key]]
with open(os.path.join(base_path, 'CMA_history.json'), 'w') as file:
    json.dump(temp_dict, file, indent=4)
    
# Save parameters
with open(os.path.join(base_path, 'parameters.json'), 'w') as file:
    json.dump(p, file, indent=4)

In [None]:
# Load history
#path = ''
#with open(os.path.join(path, 'history.json'), 'r') as file:
#    history = json.load(file)
# Save parameters
#with open(os.path.join(base_path, 'parameters.json'), 'r') as file:
#    p = json.load(file)
#metrics = ['mse', 'mean_absolute_error']

# Model evaluation

In [None]:
# Create data frame to save model metrics
metrics_df = pd.DataFrame()

## 1.- Last model

In [None]:
# Evaluate model
model_eval = evaluate_model(p, temp_run.model, projection_tensor, tfds_metadata['metadata_df'])
model_eval.targets_df.head()

In [None]:
# Error distribution plot
sets = ['train','val']
model_eval.plot_error_dist(figsize=(18,6), sets=sets)
# y and y_hat distribution plot
model_eval.plot_y_dist(figsize=(15,7), sets=sets)
# Residuals plot
model_eval.plot_residuals(figsize=(10,7), sets=sets)
# Target vs predicted
model_eval.plot_y_vs_y_hat(figsize=(7.6,7), sets=sets)

In [None]:
model_eval.get_metrics()
mask = model_eval.metrics_df.Set == 'test'
model_eval.metrics_df[~mask]

## 2.- Best model with no Center Moving Average (CMA_0)

In [None]:
eval_name='CMA_0'
# Load weights
temp_run.model.set_weights(loss_CMA.best_models[eval_name][3])
# Save model
temp_run.model.save(os.path.join(model_path, eval_name))
# Evaluate model
model_eval = evaluate_model(p, temp_run.model, projection_tensor, tfds_metadata['metadata_df'])
# Save model data (y_hat values and metrics)
model_eval.save_model_evaluation_data(base_path, eval_name=eval_name)
model_eval.targets_df.head()

In [None]:
# Loss plot
utils.plot_train_metrics(history=history, 
                         metrics=['mean_absolute_error'], 
                         p=p,
                         figsize=(15,23))
# Error distribution plot
sets = ['train','val']
model_eval.plot_error_dist(figsize=(18,6), sets=sets)
# y and y_hat distribution plot
model_eval.plot_y_dist(figsize=(15,7), sets=sets)
# Residuals plot
model_eval.plot_residuals(figsize=(10,7), sets=sets)
# Target vs predicted
model_eval.plot_y_vs_y_hat(figsize=(7.6,7), sets=sets)

In [None]:
model_eval.get_metrics(CMA_size=0, 
                       CMA=loss_CMA.best_models[eval_name][1], 
                       CMA_Std=loss_CMA.best_models[eval_name][2], 
                       Epoch=loss_CMA.best_models[eval_name][0])
metrics_df = metrics_df.append(model_eval.metrics_df, ignore_index=True)
mask = model_eval.metrics_df.Set == 'test'
model_eval.metrics_df[~mask]

## 3.- Best model wrt Central Moving Average of size 11 (CMA_11)

In [None]:
eval_name='CMA_11'
# Load weights
temp_run.model.set_weights(loss_CMA.best_models[eval_name][3])
# Save model
temp_run.model.save(os.path.join(model_path, eval_name))
# Evaluate model
model_eval = evaluate_model(p, temp_run.model, projection_tensor, tfds_metadata['metadata_df'])
# Save model data (y_hat values and metrics)
model_eval.save_model_evaluation_data(base_path, eval_name=eval_name)
model_eval.targets_df.head()

In [None]:
# Loss plot
utils.plot_train_metrics(history=history, 
                         CMA_history=loss_CMA.CMA_history[eval_name],
                         CMA_metric='mean_absolute_error',
                         metrics=['mean_absolute_error'], 
                         p=p,
                         title=eval_name,
                         figsize=(15,23))
# Error distribution plot
sets = ['train','val']
model_eval.plot_error_dist(figsize=(18,6), sets=sets)
# y and y_hat distribution plot
model_eval.plot_y_dist(figsize=(15,7), sets=sets)
# Residuals plot
model_eval.plot_residuals(figsize=(10,7), sets=sets)
# Target vs predicted
model_eval.plot_y_vs_y_hat(figsize=(7.6,7), sets=sets)

In [None]:
model_eval.get_metrics(CMA_size=11, 
                       CMA=loss_CMA.best_models[eval_name][1], 
                       CMA_Std=loss_CMA.best_models[eval_name][2], 
                       Epoch=loss_CMA.best_models[eval_name][0])
metrics_df = metrics_df.append(model_eval.metrics_df, ignore_index=True)
mask = model_eval.metrics_df.Set == 'test'
model_eval.metrics_df[~mask]

## 4.- Best model wrt Central Moving Average of size 21 (CMA_21)

In [None]:
eval_name='CMA_21'
# Load weights
temp_run.model.set_weights(loss_CMA.best_models[eval_name][3])
# Save model
temp_run.model.save(os.path.join(model_path, eval_name))
# Evaluate model
model_eval = evaluate_model(p, temp_run.model, projection_tensor, tfds_metadata['metadata_df'])
# Save model data (y_hat values and metrics)
model_eval.save_model_evaluation_data(base_path, eval_name=eval_name)
model_eval.targets_df.head()

In [None]:
# Loss plot
utils.plot_train_metrics(history=history, 
                         CMA_history=loss_CMA.CMA_history[eval_name],
                         CMA_metric='mean_absolute_error',
                         metrics=['mean_absolute_error'], 
                         p=p,
                         title=eval_name,
                         figsize=(15,23))
# Error distribution plot
sets = ['train','val']
model_eval.plot_error_dist(figsize=(18,6), sets=sets)
# y and y_hat distribution plot
model_eval.plot_y_dist(figsize=(15,7), sets=sets)
# Residuals plot
model_eval.plot_residuals(figsize=(10,7), sets=sets)
# Target vs predicted
model_eval.plot_y_vs_y_hat(figsize=(7.6,7), sets=sets)

In [None]:
model_eval.get_metrics(CMA_size=21, 
                       CMA=loss_CMA.best_models[eval_name][1], 
                       CMA_Std=loss_CMA.best_models[eval_name][2], 
                       Epoch=loss_CMA.best_models[eval_name][0])
metrics_df = metrics_df.append(model_eval.metrics_df, ignore_index=True)
mask = model_eval.metrics_df.Set == 'test'
model_eval.metrics_df[~mask]

## 5.- Best model wrt Central Moving Average of size 31 (CMA_31)

In [None]:
eval_name='CMA_31'
# Load weights
temp_run.model.set_weights(loss_CMA.best_models[eval_name][3])
# Save model
temp_run.model.save(os.path.join(model_path, eval_name))
# Evaluate model
model_eval = evaluate_model(p, temp_run.model, projection_tensor, tfds_metadata['metadata_df'])
# Save model data (y_hat values and metrics)
model_eval.save_model_evaluation_data(base_path, eval_name=eval_name)
model_eval.targets_df.head()

In [None]:
# Loss plot
utils.plot_train_metrics(history=history, 
                         CMA_history=loss_CMA.CMA_history[eval_name],
                         CMA_metric='mean_absolute_error',
                         metrics=['mean_absolute_error'], 
                         p=p,
                         title=eval_name,
                         figsize=(15,23))
# Error distribution plot
sets = ['train','val']
model_eval.plot_error_dist(figsize=(18,6), sets=sets)
# y and y_hat distribution plot
model_eval.plot_y_dist(figsize=(15,7), sets=sets)
# Residuals plot
model_eval.plot_residuals(figsize=(10,7), sets=sets)
# Target vs predicted
model_eval.plot_y_vs_y_hat(figsize=(7.6,7), sets=sets)

In [None]:
model_eval.get_metrics(CMA_size=31, 
                       CMA=loss_CMA.best_models[eval_name][1], 
                       CMA_Std=loss_CMA.best_models[eval_name][2], 
                       Epoch=loss_CMA.best_models[eval_name][0])
metrics_df = metrics_df.append(model_eval.metrics_df, ignore_index=True)
mask = model_eval.metrics_df.Set == 'test'
model_eval.metrics_df[~mask]

# Compare metrics

In [None]:
mask = metrics_df.Set == 'test'
metrics_df[~mask]

In [None]:
mask = metrics_df.Set == 'val'
metrics_df[mask].sort_values(by=['MAE', 'Bias', 'Std'])

In [None]:
# Save metrics
with open(os.path.join(base_path, 'metrics.csv'), 'w') as file:
    metrics_df.to_csv(file, index=False)

# Save a copy into a common dir to comparation among models
temp_path = os.path.join(p['model_path'], 'Model_Metrics_RI_2')
os.makedirs(temp_path, exist_ok=True)
with open(os.path.join(temp_path, p['basename']+'.csv'), 'w') as file:
    metrics_df.to_csv(file, index=False)

In [None]:
msg = 'Notebook execution finished!'
logging.info(msg)