In [4]:
%load_ext lab_black
%load_ext autoreload
%autoreload 2

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
import numpy as np
import tensorflow as tf

from src.model import unet_model
from src.dataset import SegmentationDataset
from src.data_pipeline import SegmentationDataPipeline
from src.data_utils import plot_sample_batch

In [152]:
def dice_coeff(y_true, y_pred, epsilon=1e-6):
    """
    Soft dice loss calculation for arbitrary batch size, number of classes, and number of spatial dimensions.
    Assumes the `channels_last` format.

    Args:
        y_true: b x X x Y x c One hot encoding of ground truth
        y_pred: b x X x Y x c Network output, must sum to 1 over c channel (such as after softmax)
        epsilon: Used for numerical stability to avoid divide by zero errors
    """
    axes = tuple(range(1, 3))
    numerator = 2.0 * tf.reduce_sum((y_pred * y_true), axis=axes)
    denominator = tf.reduce_sum(y_pred + y_true, axis=axes)

    return tf.reduce_mean((numerator + epsilon) / (denominator + epsilon))


def dice_loss(y_true, y_pred, epsilon=1e-6):
    return 1 - dice_coeff(y_true, y_pred, epsilon)

In [6]:
IMG_SHAPE = (256, 1600)
BATCH_SIZE = 8
ANNOTATIONS_PATH = "../data/train.csv"
TRAIN_IMG_PATH = "../data/train_images/"

# instantiate dataset and pipelne
sd = SegmentationDataset(
    label_file=ANNOTATIONS_PATH,
    img_dir_path=TRAIN_IMG_PATH,
    img_shape=IMG_SHAPE,
)

In [149]:
# create train/test & x/y splits
train_imgs, test_imgs = sd.get_train_test_split(test_size=0.2)

X_train = sd.get_image_sequence(train_imgs)
y_train = sd.get_label_sequence(train_imgs, label_type="preprocessed")
X_test = sd.get_image_sequence(test_imgs)
y_test = sd.get_label_sequence(test_imgs, label_type="preprocessed")

sdp = SegmentationDataPipeline(
    img_shape=IMG_SHAPE,
    label_type="preprocessed",
    pipeline_options={
        "map_parallel": None,  # off if None
        "cache": False,
        "shuffle_buffer_size": False,  # off if False
        "batch_size": 8,
        "prefetch": False,  # off if False
    },
)

# create dataset pipelines
train_dataset_pp = sdp(X_train, y_train)
test_dataset_pp = sdp(X_test, y_test)

Batching
Batching


In [150]:
sample_pp = list(train_dataset_pp.take(1).as_numpy_iterator())

In [151]:
sample_pp[0][0].shape, sample_pp[0][1].shape

((8, 256, 1600, 3), (8, 256, 1600, 5))

### Load up model

In [None]:
MODEL_PATH = "../logs/20221010-164546/best_model.h5"

unet_model = tf.keras.models.load_model(
    MODEL_PATH,
    custom_objects={"dice_loss": dice_loss, "dice_coeff": dice_coeff},
)

### Debug loss function

### Build inline channel expansion

In [18]:
img_seq = X_train[:3]
img_seq = [path.split("/")[-1] for path in img_seq]
label_seq = sd.get_label_sequence(img_seq, label_type="preprocessed")

In [21]:
label_seq

['../data/mask_labels/8bdf6cee5.png',
 '../data/mask_labels/43438f903.png',
 '../data/mask_labels/f4c5a6321.png']

In [132]:
label_ds = (
    tf.data.Dataset.from_tensor_slices(label_seq)
    .map(sdp.load_image)
    .map(tf_add_background_channel)
)

In [133]:
sample = list(label_ds.take(1).as_numpy_iterator())

In [135]:
sample[0].shape

(256, 1600, 5)

In [131]:
def add_background_channel(mask, max_value=255.0):
    """
    Prepends an additional channel to a mask label.

    The additional channel assumes a value of `max_value` for each
    pixel location that doesn't have a `max_value` in any of the existing
    channels.

    """

    missing_pixels = np.sum(mask, axis=-1)

    where_0 = np.where(missing_pixels == 0.0)
    where_1 = np.where(missing_pixels == max_value)

    missing_pixels[where_0] = max_value
    missing_pixels[where_1] = 0.0

    missing_pixels = np.expand_dims(missing_pixels, axis=-1)
    mask = np.concatenate((missing_pixels, mask), axis=-1)

    return mask


def tf_add_background_channel(mask):

    mask = tf.py_function(
        func=add_background_channel,
        inp=[mask],
        Tout=[tf.float32],
    )

    return mask[0]

In [66]:
t = add_background_channel(sample[0])

In [90]:
missing_pixels = np.sum(sample[0], axis=-1)

In [95]:
np.where(missing_pixels == 0.0)[0].shape

(406419,)

In [92]:
missing_pixels.shape

(256, 1600)

In [70]:
t[0].dtype

dtype('int64')

In [82]:
def add_background_channel(mask, max_value=255.0):
    """
    Prepends an additional channel to a mask label.

    The additional channel assumes a value of `max_value` for each
    pixel location that doesn't have a `max_value` in any of the existing
    channels.

    """

    missing_pixels = tf.reduce_sum(mask, axis=-1)

    where_0 = tf.cast(tf.where(missing_pixels == 0.0), tf.int64)
    where_1 = tf.cast(tf.where(missing_pixels == max_value), tf.int64)

    #     missing_pixels[where_0] = max_value
    #     missing_pixels[where_1] = 0.0

    #     missing_pixels = tf.expand_dims(missing_pixels, axis=-1)
    #     mask = tf.concatenate((missing_pixels, mask), axis=-1)

    #     return mask
    return where_0

In [86]:
add_background_channel(sample[0])

<tf.Tensor: shape=(406419, 2), dtype=int64, numpy=
array([[   0,    0],
       [   0,    1],
       [   0,    2],
       ...,
       [ 255, 1597],
       [ 255, 1598],
       [ 255, 1599]])>

In [96]:
missing_pixels = tf.reduce_sum(sample[0], axis=-1)

In [97]:
missing_pixels

<tf.Tensor: shape=(256, 1600), dtype=float32, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)>

In [106]:
where = tf.where(missing_pixels == 255.0)

In [109]:
where

<tf.Tensor: shape=(3181, 2), dtype=int64, numpy=
array([[ 151, 1288],
       [ 151, 1289],
       [ 151, 1290],
       ...,
       [ 221, 1318],
       [ 222, 1317],
       [ 222, 1318]])>

In [107]:
where[..., 0]

<tf.Tensor: shape=(3181,), dtype=int64, numpy=array([151, 151, 151, ..., 221, 222, 222])>

In [108]:
where[..., 1]

<tf.Tensor: shape=(3181,), dtype=int64, numpy=array([1288, 1289, 1290, ..., 1318, 1317, 1318])>

In [None]:
missing_pixels

In [54]:
missing_pixels = tf.reduce_sum(sample[0], axis=-1)

In [55]:
tf.conmissing_pixels

<tf.Tensor: shape=(256, 1600), dtype=float32, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)>

In [56]:
tf.cast(missing_pixels, tf.int16)

<tf.Tensor: shape=(256, 1600), dtype=int16, numpy=
array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int16)>

In [51]:
tf.reduce_sum(sample[0], axis=-1)

<tf.Tensor: shape=(256, 1600), dtype=float32, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)>

In [None]:
tf.where(

In [None]:
tf.expand_dims(

In [None]:
tf.concat(

In [50]:
sample[0].shape

(256, 1600, 4)

In [48]:
np.sum(sample[0], axis=-1).shape

(256, 1600)

In [8]:
y_true = sd.prepare_mask_label(label_seq[0], one_hot=True)

In [10]:
y_true.shape

(256, 1600, 5)

In [None]:
#         # create "background" channel and add to mask
#         missing_pixels = np.sum(mask, axis=-1)

#         where_0 = np.where(missing_pixels == 0.0)
#         where_1 = np.where(missing_pixels == 1.0)

#         missing_pixels[where_0] = 1.0
#         missing_pixels[where_1] = 0.0

#         missing_pixels = np.expand_dims(missing_pixels, axis=-1)
#         mask = np.concatenate((missing_pixels, mask), axis=-1)