In [1]:
import numpy as np
import dask as d
import cv2
import matplotlib.pyplot as plt
import keras_tuner as kt
import json
import multiprocessing
import os

from glob import glob
from tensorflow import data
from tensorflow.keras import Sequential, Input, Model
from tensorflow.keras.layers import Dense, Flatten, GlobalAveragePooling2D, Softmax
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.metrics import BinaryAccuracy
from tensorflow.keras.utils import Sequence
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.applications import resnet50

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [2]:
from fl_tissue_model_tools import data_prep, dev_config, models, defs
import fl_tissue_model_tools.preprocessing as prep

In [3]:
n_cores = multiprocessing.cpu_count()
n_cores

16

In [4]:
dirs = dev_config.get_dev_directories("../dev_paths.txt")

# Set up model training parameters

In [5]:
with open("../model_training/invasion_depth_training_values.json", 'r') as fp:
       training_values = json.load(fp)
training_values["rs_seed"] = None if (training_values["rs_seed"] == "None") else training_values["rs_seed"]

In [6]:
training_values

{'batch_size': 32,
 'frozen_epochs': 50,
 'fine_tune_epochs': 50,
 'val_split': 0.2,
 'early_stopping_patience': 25,
 'early_stopping_min_delta': 0.0001,
 'rs_seed': None}

In [7]:
with open("../model_training/invasion_depth_hp_space.json", 'r') as fp:
       hp_search_space = json.load(fp)

In [8]:
hp_search_space

{'adam_beta_1_range': [0.8, 0.99],
 'adam_beta_2_range': [0.98, 0.999],
 'frozen_lr_range': [0.0001, 0.01],
 'fine_tune_lr_range': [1e-05, 0.001],
 'last_layer_options': ['conv5_block3_out',
  'conv5_block2_out',
  'conv5_block1_out',
  'conv4_block6_out',
  'conv4_block5_out',
  'conv4_block4_out',
  'conv4_block3_out',
  'conv4_block2_out',
  'conv4_block1_out'],
 'num_initial_points': 25,
 'max_opt_trials': 50}

In [9]:
### Data paths ###
root_data_path = f"{dirs.data_dir}/invasion_data/"
model_training_path = f"{dirs.analysis_dir}/resnet50_invasion_model"
project_name = "invasion_hp_trials"
hypermodel_name = "invasion_depth_hypermodel"
hp_search_hp_path = f"{model_training_path}/hyperparameter_search_hps"
hp_search_weights_path = f"{model_training_path}/hyperparameter_search_weights"
best_hp_file = f"{hp_search_hp_path}/best_hyperparams_v1.json"
mcp_best_frozen_weights_file = f"{hp_search_weights_path}/best_frozen_weights.h5"


### General training parameters ###
resnet_inp_shape = (128, 128, 3)
class_labels = {"no_invasion": 0, "invasion": 1}
# Binary classification -> only need 1 output unit
n_outputs = 1

seed = training_values["rs_seed"]
val_split = training_values["val_split"]
batch_size = training_values["batch_size"]
frozen_epochs = training_values["frozen_epochs"]
fine_tune_epochs = training_values["fine_tune_epochs"]
# frozen_epochs = 5
# fine_tune_epochs = 5


### Early stopping ###
es_criterion = "val_loss"
es_mode = "min"
# Update these depending on seriousness of experiment
es_patience = training_values["early_stopping_patience"]
es_min_delta = training_values["early_stopping_min_delta"]


### Frozen model saving (for transitioning from frozen model to fine-tuned model) ###
mcp_criterion = "val_loss"
mcp_mode = "min"


### Hyperparameter search ###
adam_beta_1_range = tuple(hp_search_space["adam_beta_1_range"])
adam_beta_2_range = tuple(hp_search_space["adam_beta_2_range"])
frozen_lr_range = tuple(hp_search_space["frozen_lr_range"])
fine_tune_lr_range = tuple(hp_search_space["fine_tune_lr_range"])
last_layer_options = hp_search_space["last_layer_options"]
num_initial_points = hp_search_space["num_initial_points"]
max_opt_trials = hp_search_space["max_opt_trials"]
# num_initial_points = 3
# max_opt_trials = 5

In [10]:
data_prep.make_dir(hp_search_hp_path)
data_prep.make_dir(hp_search_weights_path)

# Prep for loading data

In [11]:
rs = np.random.RandomState(seed)

In [12]:
# Training & validation data (drawn from same image set & randomly assigned)
tv_class_paths = {v: glob(f"{root_data_path}/train/{k}/*.tif") for k, v in class_labels.items()}
for k, v in tv_class_paths.items():
    rs.shuffle(v)

In [13]:
train_data_paths, val_data_paths = data_prep.get_train_val_split(tv_class_paths, val_split=val_split)

# Datasets

In [14]:
train_datagen = data_prep.InvasionDataGenerator(
    train_data_paths,
    class_labels,
    batch_size,
    resnet_inp_shape[:2],
    rs,
    class_weights=True,
    shuffle=True,
    augmentation_function=prep.augment_imgs
)

In [15]:
train_datagen.class_counts

{0: 407, 1: 109}

In [16]:
train_datagen.class_weights

{0: 0.6339066339066339, 1: 2.36697247706422}

In [17]:
val_datagen = data_prep.InvasionDataGenerator(
    val_data_paths,
    class_labels,
    batch_size,
    resnet_inp_shape[:2],
    rs,
    class_weights=train_datagen.class_weights,
    shuffle=True,
    augmentation_function=train_datagen.augmentation_function
)

In [18]:
val_datagen.class_counts

{0: 102, 1: 27}

# Build hyper model

In [19]:
hypermodel = models.ResNet50TLHyperModel(
    n_outputs=n_outputs,
    img_shape=resnet_inp_shape,
    loss=BinaryCrossentropy(),
    weighted_metrics=[BinaryAccuracy()],
    name=hypermodel_name,
    output_act="sigmoid",
    adam_beta_1_range=adam_beta_1_range,
    adam_beta_2_range=adam_beta_2_range,
    frozen_lr_range=frozen_lr_range,
    fine_tune_lr_range=fine_tune_lr_range,
    frozen_epochs=frozen_epochs,
    fine_tune_epochs=fine_tune_epochs,
    base_model_name="base_model",
    # EarlyStopping callback parameters
    es_criterion=es_criterion,
    es_mode=es_mode,
    es_patience=es_patience,
    es_min_delta=es_min_delta,
    # Frozen ModelCheckpoint callback parameters
    mcp_criterion=mcp_criterion,
    mcp_mode=mcp_mode,
    mcp_best_frozen_weights_path=mcp_best_frozen_weights_file
)

In [20]:
tuner = kt.BayesianOptimization(
    hypermodel=hypermodel,
    objective="val_loss",
    num_initial_points=num_initial_points,
    max_trials=max_opt_trials,
    seed=seed,
    # directory="../model_training/",
    directory=model_training_path,
    project_name=project_name
)

In [21]:
# Cannot use external callbacks. Callbacks are defined inside the hypermodel's fit function
tuner.search(
    train_datagen,
    validation_data=val_datagen,
    workers=n_cores
)

Trial 5 Complete [00h 01m 27s]
val_loss: 0.3612251281738281

Best val_loss So Far: 0.31538963317871094
Total elapsed time: 00h 06m 47s
INFO:tensorflow:Oracle triggered exit


In [22]:
tuner.results_summary()

Results summary
Results in D:/fogg_lab_tissue_model_analysis/resnet50_invasion_model\invasion_hp_trials
Showing 10 best trials
Objective(name='val_loss', direction='min')
Trial summary
Hyperparameters:
last_resnet_layer: conv5_block2_out
frozen_lr: 0.0005510469719519641
adam_beta_1: 0.8338606444044201
adam_beta_2: 0.9867297612315291
fine_tune_lr: 1e-05
Score: 0.31538963317871094
Trial summary
Hyperparameters:
last_resnet_layer: conv5_block3_out
frozen_lr: 0.01
adam_beta_1: 0.8
adam_beta_2: 0.9990000000000001
fine_tune_lr: 1e-05
Score: 0.3248416781425476
Trial summary
Hyperparameters:
last_resnet_layer: conv5_block3_out
frozen_lr: 0.005704251931830293
adam_beta_1: 0.9308085430088172
adam_beta_2: 0.9846493316136915
fine_tune_lr: 1.82406562825328e-05
Score: 0.33920347690582275
Trial summary
Hyperparameters:
last_resnet_layer: conv5_block3_out
frozen_lr: 0.0001
adam_beta_1: 0.9899999999999999
adam_beta_2: 0.9990000000000001
fine_tune_lr: 1e-05
Score: 0.3612251281738281
Trial summary
Hyperp

In [23]:
best_hp = tuner.get_best_hyperparameters()[0]

In [24]:
best_hp.values

{'last_resnet_layer': 'conv5_block2_out',
 'frozen_lr': 0.0005510469719519641,
 'adam_beta_1': 0.8338606444044201,
 'adam_beta_2': 0.9867297612315291,
 'fine_tune_lr': 1e-05}

In [25]:
with open(best_hp_file, "w") as fp:
    json.dump(best_hp.values, fp, sort_keys=True)