# Modeling

In [1]:
import os
import datetime

import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from numba import cuda

from src.model import unet_model
from src.dataset import SegmentationDataset
from src.data_pipeline import SegmentationDataPipeline
from src.model_utils import (
    CustomTensorBoard,
    dice_coeff,
    dice_loss,
    dice_loss_old,
    dice_coeff_old,
    bce_dice_loss,
    tversky,
    tversky_loss,
    tversky_axis,
    tversky_loss_axis,
    focal_tversky_loss,
    evaluate_per_class_dice,
)


%load_ext lab_black
%load_ext autoreload
%autoreload 2

2022-10-19 17:54:42.105723: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2022-10-19 17:54:45.817895: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib/x86_64-linux-gnu:/usr/local/nvidia/lib64:/usr/local/cuda/lib64:/usr/local/nvidia/lib:/usr/local/cuda/lib::/usr/lib/hadoop/lib/native
2022-10-19 17:54:45.818275: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib/x86_64-linux-gnu:/usr/local/nvidia/lib64:/usr/local/cuda/lib64:/usr/local/nvidia/lib:/usr/local/cuda/lib::/usr/lib/hadoop/lib/native


## Load data and create pipeline

In [2]:
IMG_SHAPE = (256, 1600)
EPOCHS = 10
BATCH_SIZE = 8
ANNOTATIONS_PATH = "../data/train.csv"
TRAIN_IMG_PATH = "../data/train_images/"
LOSSES = {
    "dice_loss_old": dice_loss_old,
    "dice_loss": dice_loss,
    "bce_dice_loss": bce_dice_loss,
    "tversky_loss": tversky_loss,
    "tversky_loss_axis": tversky_loss_axis,
    "focal_tversky_loss": focal_tversky_loss,
}
METRICS = {
    "dice_coeff_old": dice_coeff_old,
    "dice_coeff": dice_coeff,
    "tversky": tversky,
    "tversky_axis": tversky_axis,
}
LOG_DIR = (
    f'../logs/test-SAMPLEWEIGHTS-{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}'
)

# instantiate dataset and pipelne
sd = SegmentationDataset(
    label_file=ANNOTATIONS_PATH,
    img_dir_path=TRAIN_IMG_PATH,
    img_shape=IMG_SHAPE,
)

# create train/test & x/y splits
train_imgs, test_imgs = sd.get_train_test_split(test_size=0.2)

# small sample
train_imgs = train_imgs[:100]
test_imgs = test_imgs[:20]

X_train = sd.get_image_sequence(train_imgs)
y_train = sd.get_label_sequence(train_imgs, label_type="preprocessed")
X_test = sd.get_image_sequence(test_imgs)
y_test = sd.get_label_sequence(test_imgs, label_type="preprocessed")


# create dataset pipelines
sdp = SegmentationDataPipeline(
    img_shape=IMG_SHAPE,
    label_type="preprocessed",
    pipeline_options={
        "map_parallel": None,
        "cache": False,
        "shuffle_buffer_size": False,
        "batch_size": BATCH_SIZE,
        "prefetch": False,
    },
)

# train_dataset = sdp(X_train, y_train, is_train=True)
train_sample_weights = sd.get_sample_weight_sequence(train_imgs)
# train_dataset = sdp(
#     X_train, y_train, is_train=True, sample_weights=train_sample_weights
# )
train_dataset = sdp(X_train, y_train, is_train=True)
test_dataset = sdp(X_test, y_test, is_train=False)

CLASS MAP: {-2: 4.905542544886807, -1: 0.3549079408110245, 1: 2.7238838318162117, 2: 10.741880341880341, 3: 0.44014849057925337, 4: 4.059431524547803}


2022-10-19 17:55:27.396552: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-19 17:55:27.560555: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-19 17:55:27.563554: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-19 17:55:27.570145: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-19 17:55:27.572044: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from S

AUGMENTING!!
Batching
Batching


In [3]:
# train_dataset_pp = sdp(X_train, y_train, is_train=True)
sample_pp = list(train_dataset.take(1).as_numpy_iterator())
sample_pp[0][0].shape, sample_pp[0][1].shape,  # sample_pp[0][2].shape

((8, 256, 1600, 3), (8, 256, 1600, 5))

In [4]:
sample_pp[0][2].shape

IndexError: tuple index out of range

## Define Model 

In [5]:
EPOCHS = 150
# LOG_DIR = f'../logs/test-SAMPLEWEIGHTS_OLD_DICE_LOSS_CORRECTED_AXIS-{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}'
LOG_DIR = f'../logs/WITHOUT-SAMPLEWEIGHTS-SMALLSAMPLESIZE-150_TVERSKY_LOSS_AXIS-{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}'

# build model
unet = unet_model(IMG_SHAPE, n_channels_bottleneck=512 / 4)

unet.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
    # loss=LOSSES["tversky_loss"],
    loss=LOSSES["tversky_loss_axis"],
    # loss=LOSSES["dice_loss"],
    # loss=dice_loss_old_class(),
    metrics=[dice_coeff, tversky, tversky_axis],
)

callbacks = [
    tf.keras.callbacks.ModelCheckpoint(
        os.path.join(LOG_DIR, "best_model.h5"), save_best_only=True
    ),
    tf.keras.callbacks.TensorBoard(log_dir=LOG_DIR, histogram_freq=1),
]

In [6]:
hist = unet.fit(
    train_dataset,
    epochs=EPOCHS,
    validation_data=test_dataset,
    callbacks=callbacks,
    verbose=2,
)

Epoch 1/150


2022-10-19 17:56:48.385847: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8202
2022-10-19 17:56:49.255524: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-10-19 17:56:49.256611: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-10-19 17:56:49.256642: W tensorflow/stream_executor/gpu/asm_compiler.cc:80] Couldn't get ptxas version string: INTERNAL: Couldn't invoke ptxas --version
2022-10-19 17:56:49.258079: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-10-19 17:56:49.258155: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] INTERNAL: Failed to launch ptxas
Relying on driver to perform ptx compilation. 
Modify $PATH to customize ptxas location.
This message will be only logged once.


13/13 - 17s - loss: 0.9789 - dice_coeff: 0.0156 - tversky: 0.0246 - tversky_axis: 0.0211 - val_loss: 0.9603 - val_dice_coeff: 0.0235 - val_tversky: 0.0375 - val_tversky_axis: 0.0397 - 17s/epoch - 1s/step
Epoch 2/150
13/13 - 6s - loss: 0.9526 - dice_coeff: 0.0350 - tversky: 0.0550 - tversky_axis: 0.0474 - val_loss: 0.9600 - val_dice_coeff: 0.0256 - val_tversky: 0.0406 - val_tversky_axis: 0.0400 - 6s/epoch - 448ms/step
Epoch 3/150
13/13 - 6s - loss: 0.9284 - dice_coeff: 0.0544 - tversky: 0.0843 - tversky_axis: 0.0716 - val_loss: 0.9618 - val_dice_coeff: 0.0224 - val_tversky: 0.0355 - val_tversky_axis: 0.0382 - 6s/epoch - 466ms/step
Epoch 4/150
13/13 - 6s - loss: 0.8924 - dice_coeff: 0.0767 - tversky: 0.1151 - tversky_axis: 0.1076 - val_loss: 0.9459 - val_dice_coeff: 0.0354 - val_tversky: 0.0512 - val_tversky_axis: 0.0541 - 6s/epoch - 470ms/step
Epoch 5/150
13/13 - 5s - loss: 0.8637 - dice_coeff: 0.1165 - tversky: 0.1643 - tversky_axis: 0.1363 - val_loss: 0.9603 - val_dice_coeff: 0.0235 -

In [7]:
cuda.select_device(0)
cuda.close()

: 

In [7]:
tf.keras.backend.clear_session()
del unet

#### Without class weighting

In [8]:
MODEL_PATH = "logs/test20221017-185537/best_model.h5"
MODEL_PATH = "../logs/test-SAMPLEWEIGHTS-20221018-132737/best_model.h5"
MODEL_PATH = "../logs/test-NOSAMPLEWEIGHTS_OLD_DICE_LOSS_CORRECTED_AXIS_RERUN-20221018-200049/best_model.h5"
MODEL_PATH = "../logs/WITHOUT-SAMPLEWEIGHTS-250_OLD_DICE_LOSS_CORRECTED_AXIS-20221018-204056/best_model.h5"
MODEL_PATH = "../logs/WITH-SAMPLEWEIGHTS-250_OLD_DICE_LOSS_CORRECTED_AXIS-20221018-202001/best_model.h5"
unet_model = tf.keras.models.load_model(MODEL_PATH, custom_objects=(LOSSES | METRICS))

class_scores = evaluate_per_class_dice(train_dataset, unet_model)

  0%|          | 0/2 [00:00<?, ?it/s]



 50%|█████     | 1/2 [00:01<00:01,  1.07s/it]



100%|██████████| 2/2 [00:01<00:00,  1.41it/s]


In [7]:
class_scores

{0: 0.9803041,
 1: 1.331209e-06,
 2: 3.2722528e-06,
 3: 0.5989963,
 4: 1.442533e-05}

In [9]:
class_scores

{0: 0.9759725, 1: 7.5105786e-06, 2: 0.5, 3: 1.1764704e-11, 4: 3.8533142e-11}