In [1]:
import tensorflow.keras.backend as K
import numpy as np
import pandas as pd
import json
import multiprocessing
import matplotlib.pyplot as plt

from glob import glob
from copy import deepcopy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.metrics import BinaryAccuracy
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

2022-05-11 10:51:45.632782: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


In [2]:
n_cores = multiprocessing.cpu_count()
n_cores

20

In [3]:
from fl_tissue_model_tools import data_prep, dev_config, models, defs
import fl_tissue_model_tools.preprocessing as prep

In [4]:
dirs = dev_config.get_dev_directories("../../dev_paths.txt")

# Set up model training parameters

In [5]:
with open("../../model_training/invasion_depth_training_values.json", 'r') as fp:
    training_values = json.load(fp)
training_values["rs_seed"] = None if (training_values["rs_seed"] == "None") else training_values["rs_seed"]

In [6]:
training_values

{'batch_size': 32,
 'frozen_epochs': 50,
 'fine_tune_epochs': 50,
 'val_split': 0.2,
 'early_stopping_patience': 25,
 'early_stopping_min_delta': 0.0001,
 'rs_seed': None,
 'resnet_inp_shape': [128, 128, 3],
 'class_labels': {'no_invasion': 0, 'invasion': 1},
 'cls_thresh': 0.5,
 'n_models': 5}

In [1]:
with open("../../model_training/invasion_depth_best_hp.json", 'r') as fp:
    best_hp = json.load(fp)

NameError: name 'json' is not defined

In [8]:
best_hp

{'adam_beta_1': 0.8690806056354333,
 'adam_beta_2': 0.9817045929249536,
 'fine_tune_lr': 4.837200625807212e-05,
 'frozen_lr': 0.00023296415902195254,
 'last_resnet_layer': 'conv5_block1_out'}

In [9]:
### Data paths ###
root_data_path = f"{dirs.data_dir}/invasion_data/"
model_training_path = f"{dirs.analysis_dir}/resnet50_invasion_model"
best_ensemble_training_path = f"{model_training_path}/best_ensemble"


### Model building & training parameters ###
resnet_inp_shape = tuple(training_values["resnet_inp_shape"])
class_labels = training_values["class_labels"]
n_models = training_values["n_models"]
# Binary classification -> only need 1 output unit
n_outputs = 1

seed = training_values["rs_seed"]
val_split = training_values["val_split"]
batch_size = training_values["batch_size"]
frozen_epochs = training_values["frozen_epochs"]
fine_tune_epochs = training_values["fine_tune_epochs"]
# frozen_epochs = 5
# fine_tune_epochs = 5
adam_beta_1 = best_hp["adam_beta_1"]
adam_beta_2 = best_hp["adam_beta_2"]
frozen_lr = best_hp["frozen_lr"]
fine_tune_lr = best_hp["fine_tune_lr"]
last_resnet_layer = best_hp["last_resnet_layer"]


### Early stopping ###
es_criterion = "val_loss"
es_mode = "min"
# Update these depending on seriousness of experiment
es_patience = training_values["early_stopping_patience"]
es_min_delta = training_values["early_stopping_min_delta"]


### Model saving ###
mcp_criterion = "val_loss"
mcp_mode = "min"
mcp_best_only = True
# Need to set to True otherwise base model "layer" won't save/load properly
mcp_weights_only = True

In [10]:
data_prep.make_dir(best_ensemble_training_path)

In [11]:
rs = np.random.RandomState(seed)

# Train ensemble of models using best hyperparameters

In [12]:
for model_idx in range(n_models):
    print(f"Training model {model_idx}...")
    ### Prepare data (each model should be trained on a randomly assigned train/validation set) ###
    # Training & validation data (drawn from same image set & randomly assigned)
    tv_class_paths = {v: glob(f"{root_data_path}/train/{k}/*.tif") for k, v in class_labels.items()}
    mcp_best_frozen_weights_file = f"{best_ensemble_training_path}/best_frozen_weights_{model_idx}.h5"
    mcp_best_finetune_weights_file = f"{best_ensemble_training_path}/best_finetune_weights_{model_idx}.h5"

    for k, v in tv_class_paths.items():
        rs.shuffle(v)

    train_data_paths, val_data_paths = data_prep.get_train_val_split(tv_class_paths, val_split=val_split)


    ### Build datasets ###
    train_datagen = data_prep.InvasionDataGenerator(
        train_data_paths,
        class_labels,
        batch_size,
        resnet_inp_shape[:2],
        rs,
        class_weights=True,
        shuffle=True,
        augmentation_function=prep.augment_imgs
    )
    val_datagen = data_prep.InvasionDataGenerator(
        val_data_paths,
        class_labels,
        batch_size,
        resnet_inp_shape[:2],
        rs,
        class_weights=train_datagen.class_weights,
        shuffle=True,
        augmentation_function=train_datagen.augmentation_function
    )


    ### Build model ###
    K.clear_session()
    tl_model = models.build_ResNet50_TL(
        n_outputs,
        resnet_inp_shape,
        base_last_layer=last_resnet_layer,
        base_model_trainable=False
    )


    ### Frozen training ###
    tl_model.compile(
        optimizer=Adam(learning_rate=frozen_lr, beta_1=adam_beta_1, beta_2=adam_beta_2),
        loss=BinaryCrossentropy(),
        weighted_metrics=[BinaryAccuracy()]
    )

    es_callback = EarlyStopping(monitor=es_criterion, mode=es_mode, min_delta=es_min_delta, patience=es_patience)
    mcp_callback = ModelCheckpoint(mcp_best_frozen_weights_file, monitor=mcp_criterion, mode=mcp_mode, save_best_only=mcp_best_only, save_weights_only=mcp_weights_only)

    h1 = tl_model.fit(
        train_datagen,
        validation_data=val_datagen,
        epochs=frozen_epochs,
        callbacks=[es_callback, mcp_callback],
        workers=n_cores
    )


    ### Finetune training ###
    # Load best frozen weights before fine tuning
    tl_model.load_weights(mcp_best_frozen_weights_file)
    # Make base model trainable
    models.toggle_TL_freeze(tl_model)
    tl_model.compile(
        optimizer=Adam(learning_rate=fine_tune_lr, beta_1=adam_beta_1, beta_2=adam_beta_2),
        loss=BinaryCrossentropy(),
        weighted_metrics=[BinaryAccuracy()]
    )

    es_callback = EarlyStopping(monitor=es_criterion, mode=es_mode, min_delta=es_min_delta, patience=es_patience)
    mcp_callback = ModelCheckpoint(mcp_best_finetune_weights_file, monitor=mcp_criterion, mode=mcp_mode, save_best_only=mcp_best_only, save_weights_only=mcp_weights_only)

    h2 = tl_model.fit(
        train_datagen,
        validation_data=val_datagen,
        epochs=fine_tune_epochs,
        callbacks=[es_callback, mcp_callback],
        workers=n_cores
    )


    ### Save results for comparison later ###
    h1_df = pd.DataFrame(h1.history)
    h1_df["training_stage"] = ["frozen"] * len(h1.epoch)

    h2_df = pd.DataFrame(h2.history)
    h2_df["training_stage"] = ["finetune"] * len(h2.epoch)

    h_df = pd.concat([h1_df, h2_df], axis=0, ignore_index=True)

    h_df.to_csv(f"{best_ensemble_training_path}/best_model_history_{model_idx}.csv", index=False)

Training model 0...


2022-05-11 10:52:28.947517: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2022-05-11 10:52:28.950763: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2022-05-11 10:52:29.467322: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:05:00.0 name: Tesla M60 computeCapability: 5.2
coreClock: 1.1775GHz coreCount: 16 deviceMemorySize: 7.94GiB deviceMemoryBandwidth: 149.31GiB/s
2022-05-11 10:52:29.467376: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
2022-05-11 10:52:29.472969: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
2022-05-11 10:52:29.473037: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublasLt.so.10
2022-05-11 

Epoch 1/50


2022-05-11 10:52:35.397685: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
2022-05-11 10:52:36.853337: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.7
2022-05-11 10:52:38.494508: W tensorflow/stream_executor/gpu/asm_compiler.cc:63] Running ptxas --version returned 256
2022-05-11 10:52:38.603140: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] Internal: ptxas exited with non-zero error code 256, output: 
Relying on driver to perform ptx compilation. 
Modify $PATH to customize ptxas location.
This message will be only logged once.


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Training model 1...
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoc

# Look at history

In [26]:
# for model_idx in range(n_models):
# model_idx = 0
for model_idx in range(n_models):
    print(f"Model {model_idx} results: ")
    h_df = pd.read_csv(f"{best_ensemble_training_path}/best_model_history_{model_idx}.csv")
    ft_h_df = h_df.query("training_stage=='finetune'")
    print(f"Best train loss: {ft_h_df.loss.min()}")
    print(f"Best train acc: {ft_h_df.binary_accuracy.max()}")
    print(f"Best val loss: {ft_h_df.val_loss.min()}")
    print(f"Best val acc: {ft_h_df.val_binary_accuracy.max()}")
    print()

Model 0 results: 
Best train loss: 0.1112985089421272
Best train acc: 0.950018048286438
Best val loss: 0.3064514994621277
Best val acc: 0.8875739574432373

Model 1 results: 
Best train loss: 0.062589481472969
Best train acc: 0.9745664596557616
Best val loss: 0.2852932214736938
Best val acc: 0.8526936769485474

Model 2 results: 
Best train loss: 0.1044299378991127
Best train acc: 0.9564787745475768
Best val loss: 0.3060031831264496
Best val acc: 0.8758629560470581

Model 3 results: 
Best train loss: 0.1337691843509674
Best train acc: 0.94807630777359
Best val loss: 0.2941640615463257
Best val acc: 0.8622913360595703

Model 4 results: 
Best train loss: 0.0652048736810684
Best train acc: 0.9736419916152954
Best val loss: 0.3436527848243713
Best val acc: 0.8776894807815552

