In [1]:
import os
from typing import List, Tuple, Dict, Union, Optional

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import random
# random.seed(2023)
import argparse
from argparse import BooleanOptionalAction
use_fp16 = False
use_wandb = False
use_neptune = False
use_tensorboard = True
import numpy as np

# np.random.seed(2023)  # Set seed for reproducibility
import keras
import tensorflow as tf
from tqdm import tqdm

# tf.random.set_seed(2023)
tf.config.run_functions_eagerly(True)
keras.backend.clear_session()
from keras.callbacks import Callback, EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, \
    TensorBoard
from keras.optimizers import Adam
import keras_tuner as kt

from archs.segmentation.unet import build_unet
from data.data_generator import loaders
from utils.helper import create_dirs, write_setup, gpu_setup
from utils.loss import FocalDiceLoss, dice_coef, dice_coef_loss
from utils.visualizations import plot_sample
# from archs.hpo import hyperparameter_build
gpu_setup(fp16=use_fp16)
# Set up directories and variables
array_labels = ['t1', 't1ce', 't2', 'flair', 'mask']
name = "tumor-segmentation-keras-tuner"
gen_dir = "/home/thjo/Datasets/Brats/"
model_dir = "./search/"
if not os.path.isdir(model_dir):
    os.makedirs(model_dir)
max_iter = 40
batch_size = 256
# Initialize WandB if specified
if use_wandb:
    print(f"Initalizing wandb for project {name}")
    import wandb
    from wandb.keras import WandbCallback
    wandb.init(project=name)
elif use_neptune:
    print(f"Initalizing neptune for project {name}")
    import neptune.legacy as neptune
    import neptunecontrib.monitoring.kerastuner as npt_utils
    neptune.init(project_qualified_name=name)
    neptune.create_experiment(name=name, params={"fp16": use_fp16})
    neptune.create_experiment('bayesian-sweep')
elif use_tensorboard:
    log_dir = os.path.join(model_dir, name, "logs")
    if not os.path.isdir(log_dir):
        os.makedirs(log_dir)

GPU(s) available (using '/physical_device:GPU:0'). Training will be lightning fast!


In [2]:

# Create the tuner
input_shape = (64, 64, len(array_labels) - 1)
num_classes = 1
max_depth = 6
kernel_size = 3
strides = 1
activations = ["relu", "gelu", "selu"]
padding  = "same"
output_activation = "sigmoid"
decoder_types = ["concat", "add"]
upsample_types = ["transposed", "bilinear"]
losses = {"focal": FocalDiceLoss, "dice": dice_coef_loss}
losses_keys = list(losses.keys())
filter_step = 16
depth = 5
def hyperparameter_build(kt:kt.HyperParameters):
    depth = kt.Int('depth', min_value=4, max_value=max_depth, step=1)
    filters = [kt.Int(f'filter_{i}', min_value=2**(i+3), max_value=2**(i+5), step=filter_step) for i in range(depth)]
    activation = kt.Choice('activation', values=activations)
    depth_encoder = [kt.Int(f'depth_encoder_{i}', min_value=1, max_value=i+1, step=1) for i in range(depth)]
    depth_decoder = [kt.Int(f'depth_decoder_{i}', min_value=1, max_value=i+1, step=1) for i in range(depth)]
    drop_rate_encoder = [kt.Float(f'drop_rate_encoder_{i}', min_value=0.0, max_value=0.3, step=0.1, default=0.05) for i in range(depth)]
    drop_rate_decoder = [kt.Float(f'drop_rate_decoder_{i}', min_value=0.0, max_value=0.3, step=0.1, default=0.05) for i in range(depth)]
    output_depth = kt.Int('output_depth', min_value=1, max_value=6, step=1)
    # decoder_type = kt.Choice('decoder_type', values=decoder_types)
    upsample_type = kt.Choice('upsample_type', values=upsample_types)
    unet = build_unet(
            input_shape=input_shape,
            num_classes=num_classes,
            filters=filters,
            kernel_size=kernel_size,
            strides=strides,
            padding=padding,
            activation=activation,
            depth_encoder=depth_encoder,
            depth_decoder=depth_decoder,
            drop_rate_encoder=drop_rate_encoder,
            drop_rate_decoder=drop_rate_decoder,
            output_depth=output_depth,
            output_activation=output_activation,
            decoder_type="concat",
            upsample_type=upsample_type,
        )
    loss = losses[kt.Choice('loss', values=losses_keys)]
    if loss == FocalDiceLoss:
        gamma = kt.Float('gamma', min_value=0.0, max_value=5.0, step=0.25)
        w_focal = kt.Float('w_focal', min_value=0.0, max_value=1, step=0.05)
        w_dice = 1 - w_focal
        loss = loss(w_focal=w_focal,w_dice=w_dice,gamma=gamma)
    learning_rate = kt.Float('learning_rate', min_value=5e-4, max_value=5e-2, default=1e-2, step=1e-3)
    weight_decay = kt.Float('weight_decay', min_value=1e-6, max_value=5e-2, default=1e-3, step=1e-3)
    optimizer = Adam(learning_rate=learning_rate, decay=weight_decay)
    unet.compile(optimizer=optimizer, loss=loss, metrics=[dice_coef])
    return unet

In [3]:

if use_wandb:
    logger = WandbCallback()
elif use_neptune:
    logger = npt_utils.NeptuneLogger()
else:
    logger = None
monitor = "val_dice_coef"
direction = "max"
tuner = kt.BayesianOptimization(
    hyperparameter_build,
    objective=kt.Objective(monitor, direction=direction),
    max_trials=60,
    directory=model_dir,
    project_name=name,
    overwrite=True,
    seed=2023,
    logger=logger,
)

callbacks = []
if use_wandb:
    callbacks.append(WandbCallback())
elif use_neptune:
    callbacks.append(npt_utils.NeptuneCallback(log_models=True))
elif use_tensorboard:
    callbacks.append(TensorBoard(log_dir=log_dir, histogram_freq=1))
early_stopping = EarlyStopping(monitor=monitor, patience=5, verbose=1, mode=direction, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor=monitor, factor=0.5, patience=2, verbose=1, mode=direction)
callbacks.append(early_stopping)
callbacks.append(reduce_lr)

In [4]:

gen_train, gen_val, gen_test, _ = loaders(gen_dir=gen_dir, batch_size=batch_size, augment=True, array_labels=array_labels)

In [5]:
tuner.search(gen_train, epochs=max_iter, validation_data=gen_val, callbacks=callbacks)
tuner.results_summary()

Trial 28 Complete [00h 00m 12s]

Best val_dice_coef So Far: 0.8717613816261292
Total elapsed time: 01h 23m 04s

Search: Running Trial #29

Value             |Best Value So Far |Hyperparameter
5                 |5                 |depth
24                |24                |filter_0
32                |16                |filter_1
64                |96                |filter_2
80                |80                |filter_3
gelu              |relu              |activation
1                 |1                 |depth_encoder_0
1                 |2                 |depth_encoder_1
2                 |1                 |depth_encoder_2
4                 |3                 |depth_encoder_3
1                 |1                 |depth_decoder_0
2                 |2                 |depth_decoder_1
1                 |3                 |depth_decoder_2
1                 |2                 |depth_decoder_3
0                 |0                 |drop_rate_encoder_0
0.2               |0.2               

Traceback (most recent call last):
  File "/home/thjo/miniconda3/envs/tf2/lib/python3.9/site-packages/keras_tuner/engine/base_tuner.py", line 270, in _try_run_and_update_trial
    self._run_and_update_trial(trial, *fit_args, **fit_kwargs)
  File "/home/thjo/miniconda3/envs/tf2/lib/python3.9/site-packages/keras_tuner/engine/base_tuner.py", line 235, in _run_and_update_trial
    results = self.run_trial(trial, *fit_args, **fit_kwargs)
  File "/home/thjo/miniconda3/envs/tf2/lib/python3.9/site-packages/keras_tuner/engine/tuner.py", line 287, in run_trial
    obj_value = self._build_and_fit_model(trial, *args, **copied_kwargs)
  File "/home/thjo/miniconda3/envs/tf2/lib/python3.9/site-packages/keras_tuner/engine/tuner.py", line 214, in _build_and_fit_model
    results = self.hypermodel.fit(hp, model, *args, **kwargs)
  File "/home/thjo/miniconda3/envs/tf2/lib/python3.9/site-packages/keras_tuner/engine/hypermodel.py", line 144, in fit
    return model.fit(*args, **kwargs)
  File "/home/thjo/m

RuntimeError: Number of consecutive failures excceeded the limit of 3.
Traceback (most recent call last):
  File "/home/thjo/miniconda3/envs/tf2/lib/python3.9/site-packages/keras_tuner/engine/base_tuner.py", line 270, in _try_run_and_update_trial
    self._run_and_update_trial(trial, *fit_args, **fit_kwargs)
  File "/home/thjo/miniconda3/envs/tf2/lib/python3.9/site-packages/keras_tuner/engine/base_tuner.py", line 235, in _run_and_update_trial
    results = self.run_trial(trial, *fit_args, **fit_kwargs)
  File "/home/thjo/miniconda3/envs/tf2/lib/python3.9/site-packages/keras_tuner/engine/tuner.py", line 287, in run_trial
    obj_value = self._build_and_fit_model(trial, *args, **copied_kwargs)
  File "/home/thjo/miniconda3/envs/tf2/lib/python3.9/site-packages/keras_tuner/engine/tuner.py", line 214, in _build_and_fit_model
    results = self.hypermodel.fit(hp, model, *args, **kwargs)
  File "/home/thjo/miniconda3/envs/tf2/lib/python3.9/site-packages/keras_tuner/engine/hypermodel.py", line 144, in fit
    return model.fit(*args, **kwargs)
  File "/home/thjo/miniconda3/envs/tf2/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "/home/thjo/Code/School/AppliedDL-project/archs/segmentation/model_utils.py", line 73, in call
    y = self.activation(self.seq(x)+self.skip(residual))
tensorflow.python.framework.errors_impl.ResourceExhaustedError: Exception encountered when calling layer 'batch_normalization_44' (type BatchNormalization).

{{function_node __wrapped__FusedBatchNormV3_device_/job:localhost/replica:0/task:0/device:GPU:0}} OOM when allocating tensor with shape[256,24,32,32] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:FusedBatchNormV3]

Call arguments received by layer 'batch_normalization_44' (type BatchNormalization):
  • inputs=tf.Tensor(shape=(256, 32, 32, 24), dtype=float32)
  • training=True
  • mask=None
