In [None]:
import numpy as np
import keras_tuner as kt
import json
import multiprocessing
import os
import subprocess

#CONDA_PREFIX = os.environ['CONDA_PREFIX']
#CUDA_PATH='/usr/local/apps/cuda/11.2'
#os.environ["LD_LIBRARY_PATH"] = f'{CONDA_PREFIX}/lib/'
#os.environ['CUDA_PATH']=CUDA_PATH
#os.environ['XLA_FLAGS']=f'--xla_gpu_cuda_data_dir={CUDA_PATH}'

from glob import glob
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.metrics import BinaryAccuracy

In [5]:
subprocess.check_output('module load cuda/11.2;module list', shell=True)


Currently Loaded Modules:
  1) slurm/22.05   2) cuda/11.2

 



b''

In [4]:
subprocess.check_output('module list', shell=True)


Currently Loaded Modules:
  1) slurm/22.05

 



b''

In [6]:
from fl_tissue_model_tools import data_prep, dev_config, models, helper
import fl_tissue_model_tools.preprocessing as prep

In [7]:
import tensorflow as tf

In [8]:
print(tf.config.list_physical_devices('GPU'))

[]


2022-12-23 15:13:55.859455: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /nfs/hpc/share/wigginno/.conda/envs/tissue-model-analysis/lib/python3.10/site-packages/cv2/../../../../lib:/apps/slurm/22.05/lib:/nfs/hpc/share/wigginno/.conda/envs/tissue-model-analysis/lib/
2022-12-23 15:13:55.860559: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /nfs/hpc/share/wigginno/.conda/envs/tissue-model-analysis/lib/python3.10/site-packages/cv2/../../../../lib:/apps/slurm/22.05/lib:/nfs/hpc/share/wigginno/.conda/envs/tissue-model-analysis/lib/
2022-12-23 15:13:55.860644: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.

In [3]:
n_cores = multiprocessing.cpu_count()
n_cores

96

In [4]:
dirs = dev_config.get_dev_directories()

# Set up model training parameters

In [5]:
with open("../model_training/invasion_depth_training_values.json", 'r') as fp:
    training_values = json.load(fp)
training_values["rs_seed"] = None if (training_values["rs_seed"] == "None") else training_values["rs_seed"]

In [6]:
training_values

{'batch_size': 32,
 'frozen_epochs': 50,
 'fine_tune_epochs': 50,
 'val_split': 0.2,
 'early_stopping_patience': 25,
 'early_stopping_min_delta': 0.0001,
 'rs_seed': None,
 'resnet_inp_shape': [256, 256, 3],
 'class_labels': {'no_invasion': 0, 'invasion': 1},
 'cls_thresh': 0.5,
 'n_models': 5}

In [7]:
with open("../model_training/invasion_depth_hp_space.json", 'r') as fp:
       hp_search_space = json.load(fp)

In [8]:
hp_search_space

{'adam_beta_1_range': [0.8, 0.99],
 'adam_beta_2_range': [0.98, 0.999],
 'frozen_lr_range': [0.0001, 0.01],
 'fine_tune_lr_range': [1e-05, 0.001],
 'last_layer_options': ['conv5_block3_out',
  'conv5_block2_out',
  'conv5_block1_out',
  'conv4_block6_out'],
 'num_initial_points': 25,
 'max_opt_trials': 50}

In [9]:
### Data paths ###
root_data_path = f"{dirs.data_dir}/invasion_data/"
model_training_path = f"{dirs.analysis_dir}/resnet50_invasion_model"
project_name = "invasion_hp_trials"
hypermodel_name = "invasion_depth_hypermodel"
hp_search_hp_path = f"{model_training_path}/hyperparameter_search_hps"
hp_search_weights_path = f"{model_training_path}/hyperparameter_search_weights"
best_hp_file = f"{hp_search_hp_path}/best_hyperparams_v1.json"
mcp_best_frozen_weights_file = f"{hp_search_weights_path}/best_frozen_weights.h5"


### General training parameters ###
resnet_inp_shape = tuple(training_values["resnet_inp_shape"])
class_labels = training_values["class_labels"]
# Binary classification -> only need 1 output unit
n_outputs = 1

seed = training_values["rs_seed"]
val_split = training_values["val_split"]
batch_size = training_values["batch_size"]
frozen_epochs = training_values["frozen_epochs"]
fine_tune_epochs = training_values["fine_tune_epochs"]
# frozen_epochs = 5
# fine_tune_epochs = 5


### Early stopping ###
es_criterion = "val_loss"
es_mode = "min"
# Update these depending on seriousness of experiment
es_patience = training_values["early_stopping_patience"]
es_min_delta = training_values["early_stopping_min_delta"]


### Frozen model saving (for transitioning from frozen model to fine-tuned model) ###
mcp_criterion = "val_loss"
mcp_mode = "min"


### Hyperparameter search ###
adam_beta_1_range = tuple(hp_search_space["adam_beta_1_range"])
adam_beta_2_range = tuple(hp_search_space["adam_beta_2_range"])
frozen_lr_range = tuple(hp_search_space["frozen_lr_range"])
fine_tune_lr_range = tuple(hp_search_space["fine_tune_lr_range"])
last_layer_options = hp_search_space["last_layer_options"]
num_initial_points = hp_search_space["num_initial_points"]
max_opt_trials = hp_search_space["max_opt_trials"]
# num_initial_points = 3
# max_opt_trials = 5

In [10]:
os.makedirs(hp_search_hp_path, exist_ok=True)
os.makedirs(hp_search_weights_path, exist_ok=True)

# Prep for loading data

In [11]:
rs = np.random.RandomState(seed)

In [12]:
# Training & validation data (drawn from same image set & randomly assigned)
tv_class_paths = {v: glob(f"{root_data_path}/train/{k}/*.tif") for k, v in class_labels.items()}
for k, v in tv_class_paths.items():
    rs.shuffle(v)

In [13]:
train_data_paths, val_data_paths = data_prep.get_train_val_split(tv_class_paths, val_split=val_split)

# Datasets

In [14]:
train_datagen = data_prep.InvasionDataGenerator(
    train_data_paths,
    class_labels,
    batch_size,
    resnet_inp_shape[:2],
    rs,
    class_weights=True,
    shuffle=True,
    augmentation_function=prep.augment_invasion_imgs
)

In [15]:
train_datagen.class_counts

{0: 678, 1: 119}

In [16]:
train_datagen.class_weights

{0: 0.5877581120943953, 1: 3.3487394957983194}

In [17]:
val_datagen = data_prep.InvasionDataGenerator(
    val_data_paths,
    class_labels,
    batch_size,
    resnet_inp_shape[:2],
    rs,
    class_weights=train_datagen.class_weights,
    shuffle=True,
    augmentation_function=train_datagen.augmentation_function
)

In [18]:
val_datagen.class_counts

{0: 170, 1: 30}

# Build hyper model

In [19]:
hypermodel = models.ResNet50TLHyperModel(
    n_outputs=n_outputs,
    img_shape=resnet_inp_shape,
    loss=BinaryCrossentropy(),
    weighted_metrics=[BinaryAccuracy()],
    name=hypermodel_name,
    output_act="sigmoid",
    adam_beta_1_range=adam_beta_1_range,
    adam_beta_2_range=adam_beta_2_range,
    frozen_lr_range=frozen_lr_range,
    fine_tune_lr_range=fine_tune_lr_range,
    frozen_epochs=frozen_epochs,
    fine_tune_epochs=fine_tune_epochs,
    base_model_name="base_model",
    # EarlyStopping callback parameters
    es_criterion=es_criterion,
    es_mode=es_mode,
    es_patience=es_patience,
    es_min_delta=es_min_delta,
    # Frozen ModelCheckpoint callback parameters
    mcp_criterion=mcp_criterion,
    mcp_mode=mcp_mode,
    mcp_best_frozen_weights_path=mcp_best_frozen_weights_file
)

2022-12-23 14:46:02.225299: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /nfs/hpc/share/wigginno/.conda/envs/tissue-model-analysis/lib/python3.10/site-packages/cv2/../../../../lib:/apps/slurm/22.05/lib
2022-12-23 14:46:02.226116: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /nfs/hpc/share/wigginno/.conda/envs/tissue-model-analysis/lib/python3.10/site-packages/cv2/../../../../lib:/apps/slurm/22.05/lib
2022-12-23 14:46:02.226385: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such

In [20]:
import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))

[]


In [21]:
tuner = kt.BayesianOptimization(
    hypermodel=hypermodel,
    objective="val_loss",
    num_initial_points=num_initial_points,
    max_trials=max_opt_trials,
    seed=seed,
    # directory="../model_training/",
    directory=model_training_path,
    project_name=project_name
)

In [22]:
# Cannot use external callbacks. Callbacks are defined inside the hypermodel's fit function
tuner.search(
    train_datagen,
    validation_data=val_datagen,
    workers=36
)

Trial 50 Complete [00h 08m 23s]
val_loss: 0.21549305319786072

Best val_loss So Far: 0.19290809333324432
Total elapsed time: 06h 44m 13s
INFO:tensorflow:Oracle triggered exit


In [23]:
tuner.results_summary()

Results summary
Results in /nfs/stak/users/cookcar/hpc-share/fogg_lab_analysis/resnet50_invasion_model/invasion_hp_trials
Showing 10 best trials
<keras_tuner.engine.objective.Objective object at 0x2b2ff8cd2970>
Trial summary
Hyperparameters:
last_resnet_layer: conv5_block1_out
frozen_lr: 0.00023296415902195254
adam_beta_1: 0.8690806056354333
adam_beta_2: 0.9817045929249536
fine_tune_lr: 4.837200625807212e-05
Score: 0.19290809333324432
Trial summary
Hyperparameters:
last_resnet_layer: conv5_block2_out
frozen_lr: 0.00695461878420833
adam_beta_1: 0.9164616012708694
adam_beta_2: 0.992845865126449
fine_tune_lr: 1.0869955378279433e-05
Score: 0.19577443599700928
Trial summary
Hyperparameters:
last_resnet_layer: conv5_block3_out
frozen_lr: 0.00037464381329162526
adam_beta_1: 0.8352069226681756
adam_beta_2: 0.981236038221177
fine_tune_lr: 1.03699753028074e-05
Score: 0.19703063368797302
Trial summary
Hyperparameters:
last_resnet_layer: conv5_block3_out
frozen_lr: 0.009801011964790135
adam_beta_1

In [24]:
best_hp = tuner.get_best_hyperparameters()[0]

In [25]:
best_hp.values

{'last_resnet_layer': 'conv5_block1_out',
 'frozen_lr': 0.00023296415902195254,
 'adam_beta_1': 0.8690806056354333,
 'adam_beta_2': 0.9817045929249536,
 'fine_tune_lr': 4.837200625807212e-05}

In [26]:
with open(best_hp_file, "w") as fp:
    json.dump(best_hp.values, fp, sort_keys=True)