# Modeling

In [13]:
import os
import datetime

import tensorflow as tf
from tensorflow.keras import layers

from src.model import unet_model
from src.dataset import SegmentationDataset
from src.data_pipeline import SegmentationDataPipeline
from src.model_utils import (
    CustomTensorBoard,
    dice_coeff,
    dice_loss,
    bce_dice_loss,
    tversky,
    tversky_loss,
    focal_tversky_loss,
    evaluate_per_class_dice,
)


%load_ext lab_black
%load_ext autoreload
%autoreload 2

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Load data and create pipeline

In [15]:
IMG_SHAPE = (256, 1600)
EPOCHS = 10
BATCH_SIZE = 8
ANNOTATIONS_PATH = "../data/train.csv"
TRAIN_IMG_PATH = "../data/train_images/"
LOSSES = {
    "dice_loss": dice_loss,
    "bce_dice_loss": bce_dice_loss,
    "tversky_loss": tversky_loss,
    "focal_tversky_loss": focal_tversky_loss,
}
METRICS = {
    "dice_coeff": dice_coeff,
    "tversky": tversky,
}
LOG_DIR = f'logs/test{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}'

# instantiate dataset and pipelne
# sd = SegmentationDataset(
#     label_file=ANNOTATIONS_PATH,
#     img_dir_path=TRAIN_IMG_PATH,
#     img_shape=IMG_SHAPE,
# )

# create train/test & x/y splits
train_imgs, test_imgs = sd.get_train_test_split(test_size=0.2)

# small sample
train_imgs = train_imgs[:16]
test_imgs = test_imgs[:8]

X_train = sd.get_image_sequence(train_imgs)
y_train = sd.get_label_sequence(train_imgs, label_type="preprocessed")
X_test = sd.get_image_sequence(test_imgs)
y_test = sd.get_label_sequence(test_imgs, label_type="preprocessed")

# create dataset pipelines
sdp = SegmentationDataPipeline(
    img_shape=IMG_SHAPE,
    label_type="preprocessed",
    pipeline_options={
        "map_parallel": None,
        "cache": False,
        "shuffle_buffer_size": False,
        "batch_size": BATCH_SIZE,
        "prefetch": False,
    },
)

train_dataset = sdp(X_train, y_train, is_train=True)
test_dataset = sdp(X_test, y_test, is_train=False)

AUGMENTING!!
Batching
Batching


## Define Model 

In [11]:
# build model
unet = unet_model(IMG_SHAPE, n_channels_bottleneck=512 / 4)

unet.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
    loss=LOSSES["tversky_loss"],
    metrics=[dice_coeff, tversky],
)

callbacks = [
    tf.keras.callbacks.ModelCheckpoint(
        os.path.join(LOG_DIR, "best_model.h5"), save_best_only=True
    ),
    tf.keras.callbacks.TensorBoard(log_dir=LOG_DIR, histogram_freq=1),
]

In [19]:
class_weight = {0: 1.0, 1: 4.0, 2: 4.0, 3: 1.0, 4: 4.0}

In [20]:
hist = unet.fit(
    train_dataset,
    epochs=EPOCHS,
    validation_data=test_dataset,
    callbacks=callbacks,
    class_weight=class_weight,
)

TypeError: '>' not supported between instances of 'NoneType' and 'int'

: 

#### Without class weighting

In [16]:
MODEL_PATH = "logs/test20221017-185537/best_model.h5"
unet_model = tf.keras.models.load_model(MODEL_PATH, custom_objects=(LOSSES | METRICS))

class_scores = evaluate_per_class_dice(test_dataset, unet_model)

  0%|          | 0/1 [00:00<?, ?it/s]



100%|██████████| 1/1 [00:01<00:00,  1.55s/it]


In [17]:
class_scores

{0: 3.6770607e-06,
 1: 4.7923804e-10,
 2: 1.8157777e-10,
 3: 0.018686756,
 4: 4.8614193e-11}