In [1]:
import os
import numpy as np
import cv2
from glob import glob
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

from tensorflow.keras.layers import (
    Conv2D, Activation, BatchNormalization,
    UpSampling2D, Input, Concatenate
)
from tensorflow.keras.models import Model
from tensorflow.keras.applications import VGG19
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.metrics import Recall, Precision
from tensorflow.keras import backend as K
from src.metrics import (
    cdc, cdc_loss, bahd
)

2024-02-06 21:21:00.366320: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-06 21:21:00.366374: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-06 21:21:00.419952: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-02-06 21:21:00.536769: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
tf.config.list_physical_devices('GPU')

2024-02-06 21:21:06.390126: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-02-06 21:21:06.608627: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-02-06 21:21:06.609021: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [3]:
tf.keras.backend.clear_session()

In [4]:
np.random.seed(42)
tf.random.set_seed(42)

# Hyperparameters

In [5]:
IMAGE_SIZE = 256
EPOCHS = 300
BATCH_SIZE = 4
LR = 1e-4

PATH = "."

# Dataset

In [6]:
def load_data(path, split=0.1):
    images = sorted(glob(os.path.join(path, "images/*")))
    masks = sorted(glob(os.path.join(path, "masks/*")))
    
    total_size = len(images)
    valid_size = int(total_size * split)
    test_size = int(total_size * split)
    
    train_x, valid_x = train_test_split(images, test_size=valid_size, random_state=42)
    train_y, valid_y = train_test_split(masks, test_size=valid_size, random_state=42)
    
    train_x, test_x = train_test_split(train_x, test_size=test_size, random_state=42)
    train_y, test_y = train_test_split(train_y, test_size=test_size, random_state=42)
    
    return (train_x, train_y), (valid_x, valid_y), (test_x, test_y)

In [7]:
def read_image(path):
    path = path.decode()
    x = cv2.imread(path, cv2.IMREAD_COLOR)
    x = cv2.resize(x, (IMAGE_SIZE, IMAGE_SIZE))
    x = x / 255.0
    
    return x

def read_mask(path):
    path = path.decode()
    x = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    x = cv2.resize(x, (IMAGE_SIZE, IMAGE_SIZE))
    x = x / 255.0
    x = np.expand_dims(x, axis=-1)
    
    return x

## Building the tf.data pipeline

In [8]:
def tf_parse(x, y):
    def _parse(x, y):
        x = read_image(x)
        y = read_mask(y)
        
        return x, y
    
    x, y = tf.numpy_function(_parse, [x, y], [tf.float64, tf.float64])
    x.set_shape([IMAGE_SIZE, IMAGE_SIZE, 3])
    y.set_shape([IMAGE_SIZE, IMAGE_SIZE, 1])
    
    return x, y


def tf_dataset(x, y, batch=BATCH_SIZE):
    dataset = tf.data.Dataset.from_tensor_slices((x, y))
    dataset = dataset.map(tf_parse)
    dataset = dataset.batch(batch)
    dataset = dataset.repeat()
    
    return dataset

In [9]:
(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = load_data(PATH)

print("Training data: ", len(train_x))
print("Validation data: ", len(valid_x))
print("Testing data: ", len(test_x))

Training data:  264
Validation data:  33
Testing data:  33


In [10]:
def read_and_rgb(x):
    x = cv2.imread(x)
    x = cv2.cvtColor(x, cv2.COLOR_BGR2RGB)
    
    return x

# Model

In [11]:
#inputs = Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3), name="input_image")
#encoder = MobileNetV2(input_tensor=inputs, weights="imagenet", include_top=False, alpha=0.35)

In [12]:
#encoder.summary()

In [13]:
#inputs = Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3), name="input_image")
#encoder2 = VGG19(input_tensor=inputs, weights="imagenet", include_top=False)

In [14]:
#encoder2.summary()

In [15]:
def model():
    inputs = Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3), name="input_image")

    encoder = VGG19(input_tensor=inputs, weights="imagenet", include_top=False)
    # skip_connection_names = ["input_image", "block_1_expand_relu", "block_3_expand_relu", "block_6_expand_relu"]
    skip_connection_names = ["input_image", "block1_pool", "block2_pool", "block3_pool", "block4_pool"]
    encoder_output = encoder.get_layer("block5_pool").output
    #skip_connection_names = ["input_image", "re_lu", "re_lu_3", "re_lu_7"]
    #encoder_output = encoder.get_layer("re_lu_16").output

    f = [16, 32, 48, 64, 80]
    x = encoder_output
    for i in range(1, len(skip_connection_names)+1, 1):
        x_skip = encoder.get_layer(skip_connection_names[-i]).output
        x = UpSampling2D((2, 2))(x)
        x = Concatenate()([x, x_skip])

        x = Conv2D(f[-i], (3, 3), padding="same")(x)
        x = BatchNormalization()(x)
        x = Activation("relu")(x)

        x = Conv2D(f[-i], (3, 3), padding="same")(x)
        x = BatchNormalization()(x)
        x = Activation("relu")(x)

    x = Conv2D(1, (1, 1), padding="same")(x)
    x = Activation("sigmoid")(x)

    model = Model(inputs, x)
    return model

In [16]:
model = model()
model.summary()

2024-02-06 21:21:06.849824: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-02-06 21:21:06.851463: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-02-06 21:21:06.852167: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_image (InputLayer)    [(None, 256, 256, 3)]        0         []                            
                                                                                                  
 block1_conv1 (Conv2D)       (None, 256, 256, 64)         1792      ['input_image[0][0]']         
                                                                                                  
 block1_conv2 (Conv2D)       (None, 256, 256, 64)         36928     ['block1_conv1[0][0]']        
                                                                                                  
 block1_pool (MaxPooling2D)  (None, 128, 128, 64)         0         ['block1_conv2[0][0]']        
                                                                                              

# Metrics

In [17]:
smooth = 1e-15

def dice_coef(y_true, y_pred):
    y_true = tf.keras.layers.Flatten()(y_true)
    y_pred = tf.keras.layers.Flatten()(y_pred)
    intersection = tf.reduce_sum(y_true * y_pred)
    return (2. * intersection + smooth) / (tf.reduce_sum(y_true) + tf.reduce_sum(y_pred) + smooth)

def dice_loss(y_true, y_pred):
    return 1.0 - dice_coef(y_true, y_pred)

# Training

In [18]:
train_dataset = tf_dataset(train_x, train_y, batch=BATCH_SIZE)
valid_dataset = tf_dataset(valid_x, valid_y, batch=BATCH_SIZE)

In [19]:
opt = tf.keras.optimizers.Nadam(LR)
metrics = [cdc, bahd]
model.compile(loss=cdc_loss, optimizer=opt, metrics=metrics)

In [20]:
callbacks = [
    ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=4),
    EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
]

In [21]:
train_steps = len(train_x) // BATCH_SIZE
valid_steps = len(valid_x) // BATCH_SIZE

if len(train_x) % BATCH_SIZE != 0:
    train_steps += 1
if len(valid_x) % BATCH_SIZE != 0:
    valid_steps += 1
    
model.fit(
    train_dataset,
    validation_data=valid_dataset,
    epochs=EPOCHS,
    steps_per_epoch=train_steps,
    validation_steps=valid_steps,
    callbacks=callbacks
)

Epoch 1/300


2024-02-06 21:21:22.303478: I external/local_xla/xla/service/service.cc:168] XLA service 0x7fe630094b00 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-02-06 21:21:22.303643: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce 920MX, Compute Capability 5.0
2024-02-06 21:21:25.389771: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-02-06 21:21:27.925066: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8904
2024-02-06 21:21:27.984845: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2024-02-06 21:21:31.800895: W external/local_tsl/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 1.14GiB with freed_by_count=0. The caller indicates that this 

ResourceExhaustedError: Graph execution error:

Detected at node cluster_1_1/xla_run defined at (most recent call last):
<stack traces unavailable>
Out of memory while trying to allocate 549755832000 bytes.
BufferAssignment OOM Debugging.
BufferAssignment stats:
             parameter allocation:   88.89MiB
              constant allocation:        52B
        maybe_live_out allocation:  700.11MiB
     preallocated temp allocation:  512.00GiB
  preallocated temp fragmentation:         0B (0.00%)
                 total allocation:  512.69GiB
              total fragmentation:   44.26MiB (0.01%)
Peak buffers:
	Buffer 1:
		Size: 256.00GiB
		Operator: op_type="Sum" op_name="Sum_8" source_file="dummy_file_name" source_line=10
		XLA Label: fusion
		Shape: f32[262144,262144]
		==========================

	Buffer 2:
		Size: 256.00GiB
		Operator: op_type="Sum" op_name="Sum_8" source_file="dummy_file_name" source_line=10
		XLA Label: fusion
		Shape: f32[262144,262144]
		==========================

	Buffer 3:
		Size: 64.00MiB
		Operator: op_type="Conv2D" op_name="model/block1_conv2/Conv2D" source_file="dummy_file_name" source_line=10
		XLA Label: custom-call
		Shape: f32[4,64,256,256]
		==========================

	Buffer 4:
		Size: 64.00MiB
		Operator: op_type="Conv2D" op_name="model/block1_conv1/Conv2D" source_file="dummy_file_name" source_line=10
		XLA Label: custom-call
		Shape: f32[4,64,256,256]
		==========================

	Buffer 5:
		Size: 35.00MiB
		Operator: op_type="ConcatV2" op_name="model/concatenate_4/concat" source_file="dummy_file_name" source_line=10
		XLA Label: fusion
		Shape: f32[4,256,256,35]
		==========================

	Buffer 6:
		Size: 35.00MiB
		Operator: op_type="ConcatV2" op_name="model/concatenate_4/concat" source_file="dummy_file_name" source_line=10
		XLA Label: fusion
		Shape: f32[4,35,256,256]
		==========================

	Buffer 7:
		Size: 32.00MiB
		Operator: op_name="XLA_Retvals"
		XLA Label: fusion
		Shape: f32[4,256,256,32]
		==========================

	Buffer 8:
		Size: 32.00MiB
		Operator: op_type="Conv2D" op_name="model/block2_conv2/Conv2D" source_file="dummy_file_name" source_line=10
		XLA Label: custom-call
		Shape: f32[4,128,128,128]
		==========================

	Buffer 9:
		Size: 32.00MiB
		Operator: op_type="Conv2D" op_name="model/block2_conv1/Conv2D" source_file="dummy_file_name" source_line=10
		XLA Label: custom-call
		Shape: f32[4,128,128,128]
		==========================

	Buffer 10:
		Size: 28.00MiB
		Operator: op_type="ConcatV2" op_name="model/concatenate_3/concat" source_file="dummy_file_name" source_line=10
		XLA Label: fusion
		Shape: f32[4,112,128,128]
		==========================

	Buffer 11:
		Size: 16.00MiB
		Operator: op_type="Relu" op_name="model/activation_8/Relu" deduplicated_name="fusion.56"
		XLA Label: fusion
		Shape: f32[4,16,256,256]
		==========================

	Buffer 12:
		Size: 16.00MiB
		Operator: op_type="Conv2D" op_name="model/conv2d_8/Conv2D" source_file="dummy_file_name" source_line=10
		XLA Label: custom-call
		Shape: f32[4,16,256,256]
		==========================

	Buffer 13:
		Size: 16.00MiB
		Operator: op_type="Conv2D" op_name="model/block3_conv4/Conv2D" source_file="dummy_file_name" source_line=10
		XLA Label: custom-call
		Shape: f32[4,256,64,64]
		==========================

	Buffer 14:
		Size: 16.00MiB
		Operator: op_type="Conv2D" op_name="model/block3_conv3/Conv2D" source_file="dummy_file_name" source_line=10
		XLA Label: custom-call
		Shape: f32[4,256,64,64]
		==========================

	Buffer 15:
		Size: 16.00MiB
		Operator: op_type="Conv2D" op_name="model/block3_conv2/Conv2D" source_file="dummy_file_name" source_line=10
		XLA Label: custom-call
		Shape: f32[4,256,64,64]
		==========================


	 [[{{node cluster_1_1/xla_run}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_train_function_12105]

In [None]:
test_dataset = tf_dataset(test_x, test_y, batch=BATCH_SIZE)

test_steps = len(test_x) // BATCH_SIZE
if len(test_x) % BATCH_SIZE != 0:
    test_steps += 1
    
model.evaluate(test_dataset, steps=test_steps)

# Results

In [None]:
def read_image(path):
    x = cv2.imread(path, cv2.IMREAD_COLOR)
    x = cv2.cvtColor(x, cv2.COLOR_BGR2RGB)
    x = cv2.resize(x, (IMAGE_SIZE, IMAGE_SIZE))
    x = x/255.0
    return x

def read_mask(path):
    x = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    x = cv2.resize(x, (IMAGE_SIZE, IMAGE_SIZE))
    x = np.expand_dims(x, axis=-1)
    x = x/255.0
    return x

In [None]:
def mask_parse(mask):
    mask = np.squeeze(mask)
    mask = [mask, mask, mask]
    mask = np.transpose(mask, (1, 2, 0))
    return mask

In [None]:
for i, (x, y) in enumerate(zip(test_x[:10], test_y[:10])):
    x = read_image(x)
    y = read_mask(y)
    y_pred = model.predict(np.expand_dims(x, axis=0))[0] > 0.5
    h, w, _ = x.shape
    white_line = np.ones((h, 10, 3))

    all_images = [
        x, white_line,
        mask_parse(y), white_line,
        mask_parse(y_pred)
    ]
    image = np.concatenate(all_images, axis=1)

    fig = plt.figure(figsize=(12, 12))
    a = fig.add_subplot(1, 1, 1)
    imgplot = plt.imshow(image)