# Hyperparameter optimization for Berlin Trees dataset

- Author: Daniel Lusk
- Date: 2022-09-27

## Imports and functions

In [None]:
import glob

import numpy as np
import tifffile as tiff
from patchify import patchify
from sklearn.model_selection import KFold, train_test_split
from tensorflow.keras.layers import (
    Activation,
    BatchNormalization,
    Concatenate,
    Conv2D,
    Conv2DTranspose,
    Input,
    MaxPool2D,
)
from tensorflow.keras.metrics import MeanIoU
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tqdm import tqdm


def patch_train_label(raster, labels, img_size, channels=False, merge_channel=False):
    samp_rast = tiff.imread(raster[0])
    img_base_size = samp_rast.shape[0]
    n = len(raster)
    m = (img_base_size // img_size) ** 2

    if not channels:
        channels = samp_rast.shape[-1]

    if merge_channel:
        channels += tiff.imread(merge_channel[0]).shape[-1]

    data_train = np.zeros((n * m, img_size, img_size, channels))
    data_label = np.zeros((n * m, img_size, img_size))

    for k in range(n):
        if merge_channel:
            r = np.concatenate(
                (tiff.imread(raster[k]), tiff.imread(merge_channel[k])), axis=-1
            )
        else:
            r = tiff.imread(raster[k])[..., :channels]

        # Only read in the specified number of channels from input raster
        patches_train = patchify(
            r,
            (img_size, img_size, channels),
            step=img_size,
        )
        patches_label = patchify(
            tiff.imread(labels[k]), (img_size, img_size), step=img_size
        )
        data_train[k * m : (k + 1) * m, :, :, :] = patches_train.reshape(
            -1, img_size, img_size, channels
        )
        data_label[k * m : (k + 1) * m, :, :] = patches_label.reshape(
            -1, img_size, img_size
        )

    data_label = (data_label > 0).astype("int")
    data_label = np.expand_dims(data_label, axis=-1)
    data_train = data_train.astype("float") / 255

    print(
        f"\nData sizes:\ndata_train: {data_train.shape}\ndata_label: {data_label.shape}\n"
    )

    return data_train, data_label


# Construct the U-Net
def conv_block(input, num_filters):
    x = Conv2D(num_filters, 3, padding="same")(input)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = Conv2D(num_filters, 3, padding="same")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    return x


def encoder_block(input, num_filters):
    x = conv_block(input, num_filters)
    p = MaxPool2D((2, 2))(x)
    return x, p


def decoder_block(input, skip_features, num_filters):
    x = Conv2DTranspose(num_filters, (2, 2), strides=2, padding="same")(input)
    x = Concatenate()([x, skip_features])
    x = conv_block(x, num_filters)
    return x


def build_unet(input_shape):
    inputs = Input(input_shape)
    s1, p1 = encoder_block(inputs, 64)
    s2, p2 = encoder_block(p1, 128)
    s3, p3 = encoder_block(p2, 256)
    s4, p4 = encoder_block(p3, 512)
    b1 = conv_block(p4, 1024)
    d1 = decoder_block(b1, s4, 512)
    d2 = decoder_block(d1, s3, 256)
    d3 = decoder_block(d2, s2, 128)
    d4 = decoder_block(d3, s1, 64)
    outputs = Conv2D(1, 1, padding="same", activation="sigmoid")(d4)
    model = Model(inputs, outputs, name="U-Net")
    return model


def train_unet(x_train, y_train, x_test, y_test, batch_size, epochs, eta):
    input_shape = x_train.shape[1:]

    model = build_unet(input_shape)
    batch_size = batch_size
    epochs = epochs

    model.compile(
        optimizer=Adam(learning_rate=eta),
        loss="binary_crossentropy",
        metrics=["accuracy"],
    )

    history = model.fit(
        x_train,
        y_train,
        batch_size=batch_size,
        epochs=epochs,
        validation_data=(x_test, y_test),
        verbose=2,
    )

    return model, history

## Prepare the data

### Edit these values
These are the directories your hand-labeled RGB, NIR, and labels tifs live in.

In [None]:
# Edit me!
data_dir = "../data/"  # Your base directory for the RGB, NIR, and labels
hand_rgb_dir = f"{data_dir}train_rgb/"  # Subdirectory for RGB
hand_nir_dir = f"{data_dir}train_nir/"  # Subdirectory for NIR
hand_label_dir = f"{data_dir}label/"  # Subdirectory for labels

Patchify the hand-labeled data plus NIR data

In [None]:
# Patchify hand-labeled data PLUS NIR data
patch_rgb = glob.glob(f"{hand_rgb_dir}*.tif")
patch_nir = glob.glob(f"{hand_nir_dir}*.tif")
patch_label = glob.glob(f"{hand_label_dir}*.tif")

patch_rgb.sort()
patch_label.sort()

print("Patchifying RGB + NIR data...")
data_train, data_label = patch_train_label(
    patch_rgb, patch_label, 128, merge_channel=patch_nir
)

print("\n✅ Done.")

### Edit these values, too

These are the directories that your PRE-PATCHIFIED watershed RGBI and labeled images live in. These images should already be patchified into 512x512 patches before we patchify them *again* into 128x128 patches below.

In [None]:
# Edit me!

# Your base directory for the watershed patched RGBI and labels
ws_data_dir = "../data/"
# Subdirectory for RGBI
patched_watershed_rgbi_dir = f"{ws_data_dir}watershed/512/rgbi/"
# Subdirectory for labels
patched_watershed_label_dir = f"{ws_data_dir}watershed/512/labels/"

Patchify the pre-patchified watershed images and labels, and then combine all the data into `x_train`, `x_test`, `y_train`, and `y_test`.

In [None]:
# Patchify watershed data (pre-patchified)
watershed_rgbi = glob.glob(f"{patched_watershed_rgbi_dir}*.tif")
watershed_labels = glob.glob(f"{patched_watershed_label_dir}*.tif")

watershed_rgbi.sort()
watershed_labels.sort()

print("Patchifying watershed data...")
data_train_ws, data_label_ws = patch_train_label(watershed_rgbi, watershed_labels, 128)

data_train = np.vstack((data_train, data_train_ws))
data_label = np.vstack((data_label, data_label_ws))

print(
    f"\nSizes after adding watershed data:\n\
data_train: {data_train.shape}\n\
data_label: {data_label.shape}\n"
)

# Shuffle the data
rng = np.random.default_rng(seed=42)
n = len(data_train)
k = rng.choice(n, size=n, replace=False)
data_train = data_train[k]
data_label = data_label[k]

# Split into train and test
x_train, x_test, y_train, y_test = train_test_split(
    data_train, data_label, test_size=0.1, random_state=157
)

print(
    f"\nSizes after splitting data:\n\
x_train: {x_train.shape}\n\
y_train: {y_train.shape}\n\
x_test: {x_test.shape}\n\
y_test: {y_test.shape}"
)

print("\n✅ Done.")

## Run the grid search

### Change the value to `True` for the parameter you are testing.

In [None]:
epochs = False
batch_size = False
eta = False

### And now you can run the hyperparameter grid search (no need to change anything else)

This will take a while (i.e. hours, maybe a day), so only do this when you're ready to leave the computer for a while.

In [None]:
param_names = ["epochs", "batch_size", "eta"]

for i, param in enumerate([epochs, batch_size, eta]):
    if param:
        param_name = param_names[i]

epochs = np.array([10, 25, 50, 100, 150, 200]) if epochs else 75
batch_size = np.array([8, 16, 32, 64, 128]) if batch_size else 32
eta = np.array([[1e-3, 1e-2, 1e-1, 1, 2]]) if eta else 1e-2

param = globals()[param_name]

for p in param_names:
    if globals()[p] is not param:
        globals()[p] = np.repeat(globals()[p], param.size)

# Data structure for future grid search data storage
n_folds = 10
data = np.zeros((n_folds, param.size, 5), dtype=object)

# Initialize the KFold
kf = KFold(n_splits=n_folds, shuffle=True, random_state=7)

# %% Run reduced grid search

for i, (itrain, itest) in enumerate(
    tqdm(
        kf.split(
            x_train,
            y_train,
        ),
        desc="K-Folds",
        position=0,
        leave=False,
    )
):
    x_train_fold = x_train[itrain]
    y_train_fold = y_train[itrain]
    x_test_fold = x_train[itest]
    y_test_fold = y_train[itest]

    for k, p in enumerate(tqdm(param, desc=param_name, position=1, leave=False)):
        # Run U-Net Here
        model, history = train_unet(
            x_train_fold,
            y_train_fold,
            x_test_fold,
            y_test_fold,
            batch_size[k],
            epochs[k],
            eta[k],
        )

        # Loss and accuracies from each epoch
        loss = history.history["loss"]
        val_loss = history.history["val_loss"]
        acc = history.history["accuracy"]
        val_acc = history.history["val_accuracy"]
        
        # Test the model on the preserved test data
        y_pred = model.predict(x_test)

        # Convert sigmoid probability to classification
        y_pred_thresholded = y_pred > 0.5

        # Get the IoU for the test data
        IOU_keras = MeanIoU(num_classes=2)
        IOU_keras.update_state(y_pred_thresholded, y_test)
        iou = IOU_keras.result().numpy()

        # Log the five stats according to their K-Fold and parameter iteration
        stats = [iou, loss, val_loss, acc, val_acc]
        for j, s in enumerate(stats):
            data[i, k, j] = s

np.save(f"data_{param_name}.npy", data)