In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import keras_tuner as kt

from tensorflow import data
from tensorflow.keras import Sequential, Input, Model
from tensorflow.keras.layers import Dense, Flatten, GlobalAveragePooling2D, Softmax
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.metrics import BinaryAccuracy
from tensorflow.keras.applications import resnet50
from tensorflow.keras.callbacks import EarlyStopping

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [2]:
from fl_tissue_model_tools import data_prep, models

In [3]:
root_data_path = f"D:/malaria_data/cell_images_split"
seed = 2049
# Resize using "area" method
# Multiples of 32 work best with ResNet
resnet_inp_shape = (128, 128, 3)
n_outputs = 1
val_split = 0.2
batch_size = 32
frozen_epochs = 10
fine_tune_epochs = 10
min_fine_tune_lr = 1e-5
last_layer_options = ["conv5_block3_out", "conv5_block2_out", "conv5_block1_out", "conv4_block6_out"]
max_opt_trials = 11
hyperparam_path = "../malaria_v2_output/malaria_v2_hp_trials"

# Early stopping
es_criterion = "val_loss"
es_mode = "min"
es_patience = 1
es_min_delta = 0.1

# Data

## Data split

In [4]:
train_datagen = ImageDataGenerator(
    horizontal_flip=True,
    vertical_flip=True,
    rotation_range=360,
    # Make sure to preprocess the same as the original model
    preprocessing_function=resnet50.preprocess_input,
    # Use 20% of data for validation
    validation_split=val_split
)

In [5]:
test_datagen = ImageDataGenerator(
    preprocessing_function=resnet50.preprocess_input
)

## Generators

In [6]:
train_generator = train_datagen.flow_from_directory(
    f"{root_data_path}/train",
    target_size=resnet_inp_shape[:2],
    batch_size=batch_size,
    # Want uninfected to be labeled "0"
    classes={"uninfected": 0, "parasitized": 1},
    class_mode="binary",
    seed=seed,
    subset="training"
)

val_generator = train_datagen.flow_from_directory(
    f"{root_data_path}/train",
    target_size=resnet_inp_shape[:2],
    batch_size=batch_size,
    classes={"uninfected": 0, "parasitized": 1},
    class_mode="binary",
    seed=seed,
    subset="validation"
)

Found 17638 images belonging to 2 classes.
Found 4408 images belonging to 2 classes.


In [7]:
test_generator = test_datagen.flow_from_directory(
    f"{root_data_path}/test",
    target_size=resnet_inp_shape[:2],
    batch_size=batch_size,
    classes={"uninfected": 0, "parasitized": 1},
    class_mode=None,
    shuffle=False
)

Found 5512 images belonging to 2 classes.


# Build hyper model

In [None]:
hypermodel = models.ResNet50TLHyperModel(
    n_outputs=n_outputs,
    img_shape=resnet_inp_shape,
    frozen_optimizer=Adam(),
    fine_tune_optimizer=Adam,
    loss=BinaryCrossentropy(),
    metrics=[BinaryAccuracy()],
    name=hyperparam_path,
    output_act="sigmoid",
    min_fine_tune_lr=min_fine_tune_lr,
    frozen_epochs=frozen_epochs,
    fine_tune_epochs=fine_tune_epochs,
    base_model_name="base_model"
)

In [None]:
tuner = kt.BayesianOptimization(
    hypermodel=hypermodel,
    objective="val_loss",
    max_trials=max_opt_trials,
    seed=seed,
    directory=".",
    project_name=hyperparam_path
)

In [None]:
es_callback = EarlyStopping(monitor=es_criterion, mode=es_mode, min_delta=es_min_delta, patience=es_patience)

In [None]:
tuner.search(
    train_generator,
    validation_data=val_generator,
    steps_per_epoch=train_generator.n // batch_size,
    validation_steps=val_generator.n // batch_size,
    callbacks=[es_callback]
)

In [None]:
tuner.results_summary()

In [None]:
best_hp = tuner.get_best_hyperparameters()[0]

In [None]:
best_hp.values

In [None]:
best_tl_model = tuner.get_best_models()[0]

In [None]:
best_tl_model.summary()