In [23]:
import os
import random
import numpy as np
import matplotlib.pyplot as plt

from glob import glob
from PIL import Image

import tensorflow as tf
from sklearn.model_selection import train_test_split

from keras.optimizers import Adam
from keras.metrics import Recall, Precision
from keras import backend as K
from keras.layers import (Input, Conv2D, BatchNormalization, 
                          Activation, MaxPool2D, Conv2DTranspose, 
                          Concatenate)
from keras.models import Model
from keras.layers import LeakyReLU

In [24]:
image_path = "/kaggle/input/computer-vision-for-building-damage-assessment/Dataset/images"
mask_path = "/kaggle/input/computer-vision-for-building-damage-assessment/Dataset/masks"

# Parameters
H = 256
W = 256
BATCH_SIZE = 32
LEARNING_RATE = 1e-4
EPOCHS = 10

In [25]:
smooth = 1e-15
def iou(y_true, y_pred):
    """Intersection over Union."""
    y_pred = tf.cast(y_pred > 0.5, tf.float32)
    intersection = tf.reduce_sum(y_true * y_pred)
    union = tf.reduce_sum(y_true) + tf.reduce_sum(y_pred) - intersection
    iou_value = (intersection + smooth) / (union + smooth)
    return tf.ensure_shape(iou_value, shape=())

def soft_dice_coef(y_true, y_pred, smooth=1e-6):
    intersection = tf.reduce_sum(y_true * y_pred)
    denominator = tf.reduce_sum(y_true) + tf.reduce_sum(y_pred)
    return (2. * intersection + smooth) / (denominator + smooth)

def soft_dice_loss(y_true, y_pred):
    return 1.0 - soft_dice_coef(y_true, y_pred)

In [26]:
def load_data(image_file, mask_file, split = 0.2):
    images = sorted(glob(image_file+ '/*.png'))
    masks = sorted(glob(mask_file+ '/*.png'))

    train_x, val_x = train_test_split(images, test_size=0.2, random_state=42)
    train_y, val_y = train_test_split(masks, test_size=0.2, random_state=42)
    
    train_x, test_x = train_test_split(train_x, test_size=0.2, random_state=42)
    train_y, test_y = train_test_split(train_y, test_size=0.2, random_state=42)
    
    return (train_x, train_y), (val_x, val_y), (test_x, test_y)

(train_x, train_y), (val_x, val_y), (test_x, test_y) = load_data(image_path, mask_path)
print(f"Train size: {len(train_x)}, Val size: {len(val_x)}, Test size: {len(test_x)}")

Train size: 3582, Val size: 1120, Test size: 896


In [27]:
def read_image(path):
    """Reads and resizes the input image."""
    try:
        img = Image.open(path)
        img = img.resize((W, H))
        x = np.array(img, dtype=np.float32) / 255.0
        return x
    except Exception as e:
        print(f'Error while reading image: {e}')
        return None

def read_mask(path):
    """Reads and resizes the input mask."""
    try:
        mask = Image.open(path).convert('L')
        mask = mask.resize((W, H))
        x = np.array(mask, dtype=np.float32) / 255.0
        x = np.expand_dims(x, axis=-1)
        return x
    except Exception as e:
        print(f'Error while reading mask: {e}')
        return None

def tf_parse(x, y):
    """Wrapper to parse images in a TensorFlow graph."""
    def _parse(x, y):
        x = read_image(x)
        y = read_mask(y)
        return x, y
    x, y = tf.numpy_function(_parse, [x, y], [tf.float32, tf.float32])
    x.set_shape([H, W, 3])
    y.set_shape([H, W, 1])
    return x, y

# <-- ADDED: Define an augmentation function using TensorFlow ops
def random_flip(x, y):
    """
    Randomly flips the image and mask horizontally or vertically.
    """
    # Random horizontal flip
    if tf.random.uniform(()) > 0.5:
        x = tf.image.flip_left_right(x)
        y = tf.image.flip_left_right(y)

    # Random vertical flip
    if tf.random.uniform(()) > 0.5:
        x = tf.image.flip_up_down(x)
        y = tf.image.flip_up_down(y)

    return x, y

def tf_dataset(X, Y, batch=BATCH_SIZE, repeat=False, augment=False):  # <-- CHANGED
    """
    Create a TF dataset from filenames.

    Args:
        X: List of input image paths.
        Y: List of corresponding mask paths.
        batch: Batch size.
        repeat: Whether to repeat the dataset indefinitely.
        augment: Whether to apply data augmentation (flip).
    """
    dataset = tf.data.Dataset.from_tensor_slices((X, Y))
    dataset = dataset.map(tf_parse, num_parallel_calls=tf.data.AUTOTUNE)
    
    if augment:  # <-- ADDED
        dataset = dataset.map(random_flip, num_parallel_calls=tf.data.AUTOTUNE)

    if repeat:
        dataset = dataset.repeat()  # Repeat indefinitely for training

    dataset = dataset.batch(batch)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)
    return dataset

# <-- CHANGED: Pass 'augment=True' for the training dataset
train_dataset = tf_dataset(train_x, train_y, batch=BATCH_SIZE, repeat=True, augment=True)
val_dataset   = tf_dataset(val_x, val_y, batch=BATCH_SIZE, repeat=True, augment=False)

train_steps = np.ceil(len(train_x) / BATCH_SIZE).astype(int)
val_steps   = np.ceil(len(val_x) / BATCH_SIZE).astype(int)

print(f"Train steps per epoch: {train_steps}")
print(f"Validation steps per epoch: {val_steps}")

Train steps per epoch: 112
Validation steps per epoch: 35


In [28]:
def conv_block(x, filters, kernel_size=3, activation=None, padding='same'):
    """(Conv2D -> BN -> Activation) * 2 block."""
    x = Conv2D(filters, kernel_size, padding=padding)(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.1)(x)  # Use LeakyReLU explicitly
    
    x = Conv2D(filters, kernel_size, padding=padding)(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.1)(x)  # Use LeakyReLU explicitly
    return x

def encoder_block(x, filters, activation='relu'):
    """Encoder block: conv block + maxpool."""
    c = conv_block(x, filters, activation=activation)
    p = MaxPool2D((2, 2))(c)
    return c, p

def decoder_block(x, skip_features, filters, activation='relu'):
    """Decoder block: transpose conv + concat + conv block."""
    x = Conv2DTranspose(filters, (2, 2), strides=(2, 2), padding='same')(x)
    x = Concatenate()([x, skip_features])
    x = conv_block(x, filters, activation=activation)
    return x

def build_beefy_unet(input_shape=(256, 256, 3), activation='relu'):
    """Build a beefed-up UNet with batch normalization and double conv blocks."""
    inputs = Input(input_shape)
    
    # Encoder
    c1, p1 = encoder_block(inputs, 64, activation=activation)
    c2, p2 = encoder_block(p1,   128, activation=activation)
    c3, p3 = encoder_block(p2,   256, activation=activation)
    c4, p4 = encoder_block(p3,   512, activation=activation)
    
    # Bottleneck
    bn = conv_block(p4, 1024, activation=activation)
    
    # Decoder
    d1 = decoder_block(bn, c4, 512, activation=activation)
    d2 = decoder_block(d1, c3, 256, activation=activation)
    d3 = decoder_block(d2, c2, 128, activation=activation)
    d4 = decoder_block(d3, c1, 64,  activation=activation)
    
    # Output
    outputs = Conv2D(1, (1, 1), padding='same', activation='sigmoid')(d4)
    
    model = Model(inputs, outputs)
    return model

# Instantiate and compile the model
model = build_beefy_unet(input_shape=(H, W, 3), activation=None)  # Pass None; handled internally
model.compile(
    loss=soft_dice_loss,  # <-- CHANGED: switch to Dice loss
    optimizer=Adam(LEARNING_RATE),
    metrics=[soft_dice_coef, iou, Recall(), Precision()]
)

model.summary()

In [29]:
for batch_idx, (xx, yy) in enumerate(train_dataset.take(1)):
    print("Batch", batch_idx)
    print("X shape:", xx.shape, xx.dtype)
    print("Y shape:", yy.shape, yy.dtype)

Batch 0
X shape: (32, 256, 256, 3) <dtype: 'float32'>
Y shape: (32, 256, 256, 1) <dtype: 'float32'>


In [None]:
callbacks = [
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_recall',
        factor=0.1,
        patience=2,
        verbose=1
    ),
    tf.keras.callbacks.EarlyStopping(
        monitor='val_recall',
        patience=20,
        restore_best_weights=True,
        verbose=1
    )
]

history = model.fit(
    train_dataset,
    epochs=EPOCHS,
    steps_per_epoch=train_steps,
    validation_data=val_dataset,
    validation_steps=val_steps,
    callbacks=callbacks   # <-- ADDED
)

Epoch 1/10
[1m  9/112[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m3:25:55[0m 120s/step - iou: 0.0649 - loss: 0.9026 - precision_2: 0.1352 - recall_2: 0.7285 - soft_dice_coef: 0.0974