# Artificial Neural Networks and Deep Learning

---

## Homework 2: Minimal Working Example




https://github.com/bnsreenu/python_for_microscopists/blob/master/224_225_226_models.py

## 🌐 Connect Colab to Google Drive

In [None]:
from google.colab import drive

drive.mount("/gdrive")
%cd /gdrive/My Drive/[2024-2025] AN2DL/Homework 2

Mounted at /gdrive
/gdrive/My Drive/[2024-2025] AN2DL/Homework 2


## ⚙️ Import Libraries

In [None]:
import os
from datetime import datetime

import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl

import matplotlib.pyplot as plt
%matplotlib inline

import hashlib
seed = 42
np.random.seed(seed)
tf.random.set_seed(seed)

print(f"TensorFlow version: {tf.__version__}")
print(f"Keras version: {tfk.__version__}")
print(f"GPU devices: {len(tf.config.list_physical_devices('GPU'))}")

TensorFlow version: 2.17.1
Keras version: 3.5.0
GPU devices: 0


In [None]:
## U-net imports

import os
import math
from PIL import Image
from keras import backend as K
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Configure plot display settings
sns.set(font_scale=1.4)
sns.set_style('white')
plt.rc('font', size=14)

from tensorflow.keras import models, layers, regularizers
from tensorflow.keras import backend as K

## Data Exploration Functions


In [None]:
def plot_highest_error_iou(model, X_val, y_val, num_classes, top_n=10):
    """
    Plots the images with the highest IoU error (lowest IoU scores).

    Parameters:
        model (tf.keras.Model): The trained segmentation model.
        X_val (numpy.ndarray): Validation images of shape (N, H, W, C).
        y_val (numpy.ndarray): Ground truth masks of shape (N, H, W).
        num_classes (int): The number of segmentation classes.
        top_n (int): The number of images with highest error to plot.
    """
    # Initialize IoU scores and predictions
    iou_scores = []
    predictions = []

    for i in range(len(X_val)):
        # Predict single batch and remove the batch dimension
        pred_mask = model.predict(X_val[i:i+1], verbose=0)  # Shape: (1, H, W, num_classes)
        pred_mask = np.argmax(pred_mask[0], axis=-1)  # Shape: (H, W)
        predictions.append(pred_mask)

        # Flatten both the true and predicted masks
        y_true_flat = y_val[i].flatten()  # Ground truth mask flattened
        y_pred_flat = pred_mask.flatten()  # Predicted mask flattened

        # Calculate IoU
        intersection = np.sum((y_true_flat == y_pred_flat) & (y_true_flat > 0))
        union = np.sum((y_true_flat > 0) | (y_pred_flat > 0))
        iou = intersection / union if union > 0 else 0.0
        iou_scores.append(iou)

    # Get indices of the lowest IoU scores
    lowest_iou_indices = np.argsort(iou_scores)[:top_n]

    # Plot the images, true masks, predicted masks, and IoU scores
    fig, axes = plt.subplots(top_n, 3, figsize=(15, 5 * top_n))
    for i, idx in enumerate(lowest_iou_indices):
        img = X_val[idx]
        true_mask = y_val[idx]
        pred_mask = predictions[idx]
        iou = iou_scores[idx]

        # Plot image
        axes[i, 0].imshow(img.squeeze(), cmap="gray")
        axes[i, 0].set_title(f"Image (IoU: {iou:.2f})")
        axes[i, 0].axis("off")

        # Plot true mask
        axes[i, 1].imshow(true_mask, cmap="nipy_spectral")
        axes[i, 1].set_title("True Mask")
        axes[i, 1].axis("off")

        # Plot predicted mask
        axes[i, 2].imshow(pred_mask, cmap="nipy_spectral")
        axes[i, 2].set_title("Predicted Mask")
        axes[i, 2].axis("off")

    plt.tight_layout()
    plt.show()


In [None]:
def plot_image_mask_pairs(X, y, num_pairs=5):
    num_samples = min(num_pairs, len(X))
    plt.figure(figsize=(10, 4 * num_samples))

    random_indices = np.random.choice(len(X), num_samples, replace=False)
    for idx, i in enumerate(random_indices):
        plt.subplot(num_samples, 2, 2 * idx + 1)
        plt.imshow(X[i])
        plt.title(f"Image {i+1}")
        plt.axis('off')

        plt.subplot(num_samples, 2, 2 * idx + 2)
        plt.imshow(y[i], cmap='nipy_spectral', vmin=0, vmax=4)  # Standardize the color scaling
        plt.title(f"Mask {i+1}")
        plt.axis('off')

    plt.tight_layout()
    plt.show()

In [None]:
import numpy as np

def compute_class_distribution(y_train, num_classes=5):
    """
    Compute the percentage of pixels belonging to each class in the dataset.

    Parameters:
    - y_train (numpy array): Array of shape (num_images, height, width) containing the label masks.
    - num_classes (int): The number of classes in the dataset (default is 5).

    Returns:
    - percentages (list): A list of percentages corresponding to each class.
    """
    # Flatten the masks to count pixel values across all images
    flattened_labels = y_train.flatten()

    # Total number of pixels
    total_pixels = flattened_labels.size

    # Compute the percentage for each class
    percentages = [(flattened_labels == i).sum() / total_pixels * 100 for i in range(num_classes)]

    return percentages

In [None]:
import numpy as np

def flip_dataset(X_train, y_train):
    """
    Augments the dataset by adding horizontally and vertically flipped versions of
    the images and their corresponding masks.

    Args:
        X_train (numpy.ndarray): Training images of shape (n_samples, 64, 128).
        y_train (numpy.ndarray): Corresponding masks of shape (n_samples, 64, 128).

    Returns:
        augmented_X (numpy.ndarray): Augmented training images.
        augmented_y (numpy.ndarray): Augmented training masks.
    """
    # Horizontal flipping
    X_h_flip = np.flip(X_train, axis=2)  # Flip along width (horizontal axis)
    y_h_flip = np.flip(y_train, axis=2)

    # Combine original and horizontally flipped data
    X_combined = np.concatenate((X_train, X_h_flip), axis=0)
    y_combined = np.concatenate((y_train, y_h_flip), axis=0)

    # Vertical flipping
    X_v_flip = np.flip(X_combined, axis=1)  # Flip along height (vertical axis)
    y_v_flip = np.flip(y_combined, axis=1)

    # Combine with vertically flipped data
    augmented_X = np.concatenate((X_combined, X_v_flip), axis=0)
    augmented_y = np.concatenate((y_combined, y_v_flip), axis=0)

    return augmented_X, augmented_y


## Data Cleanup Functions


In [None]:
def find_duplicate_masks(labels):
    hashes = {}
    duplicates = {}

    for i, mask in enumerate(labels):
        mask_flat = mask.tobytes()
        mask_hash = hashlib.md5(mask_flat).hexdigest()

        if mask_hash in hashes:
            first_index = hashes[mask_hash]
            if first_index not in duplicates:
                duplicates[first_index] = []  # Initialize the list for this first index
            duplicates[first_index].append(i)  # Add the duplicate index
        else:
            hashes[mask_hash] = i  # Store the first occurrence of this hash

    return duplicates

In [None]:
def remove_alien_elements(input_dict, keys_to_remove):
    result = {}
    for key, value in input_dict.items():
        if key not in keys_to_remove:
            result[key] = value
    return result

In [None]:
pip install albumentations opencv-python



In [None]:
import albumentations as A
from albumentations.core.composition import OneOf
from albumentations.core.transforms_interface import ImageOnlyTransform
from albumentations.augmentations.transforms import *
import cv2
from albumentations import Compose


import numpy as np
def augment_image_and_mask(image, mask, augmentations):
    """
    Apply the same augmentations to an image and its corresponding mask.

    Args:
        image (np.ndarray): The input image.
        mask (np.ndarray): The corresponding mask.
        augmentations: Albumentations augmentation pipeline.

    Returns:
        Tuple[np.ndarray, np.ndarray]: Augmented image and mask.
    """
    data = augmentations(image=image, mask=mask)
    return data["image"], data["mask"]

  check_for_updates()


In [None]:
def augment_dataset(X_train, y_train, augmentations):
    """
    Augment the dataset by applying transformations, including horizontal and vertical flips.

    Args:
        X_train (np.ndarray): Array of input images of shape (N, 64, 128).
        y_train (np.ndarray): Array of masks of shape (N, 64, 128).
        augmentations: Albumentations augmentation pipeline.

    Returns:
        Tuple[np.ndarray, np.ndarray]: Augmented image and mask arrays.
    """
    augmented_images = []
    augmented_masks = []

    for img, mask in zip(X_train, y_train):
        # Original
        augmented_images.append(img)
        augmented_masks.append(mask)

        # Horizontal Flip
        img_hf, mask_hf = augment_image_and_mask(img, mask, A.Compose([A.HorizontalFlip(p=1)]))
        augmented_images.append(img_hf)
        augmented_masks.append(mask_hf)

        # Vertical Flip
        img_vf, mask_vf = augment_image_and_mask(img, mask, A.Compose([A.VerticalFlip(p=1)]))
        augmented_images.append(img_vf)
        augmented_masks.append(mask_vf)

        # Combined (Horizontal + Vertical Flip)
        img_hvf, mask_hvf = augment_image_and_mask(img_hf, mask_hf, A.Compose([A.VerticalFlip(p=1)]))
        augmented_images.append(img_hvf)
        augmented_masks.append(mask_hvf)

    # Convert lists to arrays
    return np.array(augmented_images), np.array(augmented_masks)


In [None]:
import numpy as np
from sklearn.metrics import confusion_matrix

def compute_segmentation_confusion_matrix(model, X_val, y_val, num_classes):
    """
    Computes a confusion matrix for image segmentation predictions.

    Parameters:
        model (tf.keras.Model): The trained segmentation model.
        X_val (numpy.ndarray): Validation images of shape (N, H, W, C).
        y_val (numpy.ndarray): Ground truth masks of shape (N, H, W).
        num_classes (int): The number of segmentation classes.

    Returns:
        numpy.ndarray: Confusion matrix of shape (num_classes, num_classes).
    """
    # Initialize an overall confusion matrix
    overall_confusion_matrix = np.zeros((num_classes, num_classes), dtype=np.int64)

    # Loop through each sample to make predictions and calculate confusion matrix
    for i in range(len(X_val)):
        # Get model predictions for a single image
        prediction = model.predict(X_val[i:i+1])  # Predict single batch
        prediction = np.argmax(prediction[0], axis=-1)  # Convert logits to class indices

        # Flatten prediction and ground truth for comparison
        pred_flat = prediction.flatten()
        true_flat = y_val[i].flatten()

        # Compute confusion matrix for the current image
        cm = confusion_matrix(true_flat, pred_flat, labels=range(num_classes))

        # Add to the overall confusion matrix
        overall_confusion_matrix += cm

    return overall_confusion_matrix


## Training Functions

In [None]:
import keras
import tensorflow as tf
from keras.metrics import MeanIoU
from keras.saving import register_keras_serializable

@keras.saving.register_keras_serializable()
class MeanIntersectionOverUnion(tf.keras.metrics.MeanIoU):
    def __init__(self, num_classes, labels_to_exclude=None, ignore_class=None, name="mean_iou", dtype=None, **kwargs):
        # Pass only recognized arguments to the parent class
        super(MeanIntersectionOverUnion, self).__init__(num_classes=num_classes, name=name, dtype=dtype, **kwargs)

        if labels_to_exclude is None:
            labels_to_exclude = [0]  # Default to excluding label 0 (background)
        self.labels_to_exclude = labels_to_exclude
        self.ignore_class = ignore_class  # Explicitly handle the ignore_class

    def update_state(self, y_true, y_pred, sample_weight=None):
        # Convert predictions to class labels
        y_pred = tf.math.argmax(y_pred, axis=-1)

        # Flatten the tensors
        y_true = tf.reshape(y_true, [-1])
        y_pred = tf.reshape(y_pred, [-1])

        # Apply mask to exclude specified labels
        for label in self.labels_to_exclude:
            mask = tf.not_equal(y_true, label)
            y_true = tf.boolean_mask(y_true, mask)
            y_pred = tf.boolean_mask(y_pred, mask)

        # Update the state
        return super().update_state(y_true, y_pred, sample_weight)

    def get_config(self):
        # Extend the parent class's get_config to include custom arguments
        config = super(MeanIntersectionOverUnion, self).get_config()
        config.update({
            "labels_to_exclude": self.labels_to_exclude,
            "ignore_class": self.ignore_class,
        })
        return config

    @classmethod
    def from_config(cls, config):
        # Ensure the custom config can be deserialized
        return cls(**config)


In [None]:
def load_single_image(image_path, label_path, input_size=(64, 128)):
    """
    Load a single image-label pair with the correct shape.
    """
    print(f"Loading image from {image_path}")
    print(f"Loading label from {label_path}")
    # Read and preprocess the image
    image = tf.io.read_file(image_path)
    image = tf.io.decode_png(image, channels=1)  # Ensure 1 channel, greyscale
    image = tf.image.resize(image, input_size)   # Resize to fixed size
    image = tf.cast(image, tf.float32) / 255.0

    # Read and preprocess the label
    label = tf.io.read_file(label_path)
    label = tf.io.decode_png(label, channels=1)  # Ensure single channel
    label = tf.image.resize(label, input_size, method='bilinear')  # Resize to fixed size
    label = tf.cast(label, tf.int32)

    return image, label

In [None]:
# Visualization callback
class VizCallback(tf.keras.callbacks.Callback):
    def __init__(self, image, label, frequency=5):
        super().__init__()
        self.image = image
        self.label = label
        self.frequency = frequency

    def on_epoch_end(self, epoch, logs=None):
        if epoch % self.frequency == 0:  # Visualize only every "frequency" epochs
            image, label = self.image, self.label
            image = tf.expand_dims(image, 0)
            pred = self.model.predict(image, verbose=0)
            y_pred = tf.math.argmax(pred, axis=-1)
            y_pred = y_pred.numpy()

            num_classes = 5 #had to do it
            colormap = create_segmentation_colormap(num_classes)

            plt.figure(figsize=(16, 4))

            # Input image
            plt.subplot(1, 3, 1)
            plt.imshow(image[0], cmap='gray')
            plt.title("Input Image")
            plt.axis('off')

            # Ground truth
            plt.subplot(1, 3, 2)
            colored_label = apply_colormap(label, colormap)
            plt.imshow(colored_label)
            plt.title("Ground Truth Mask")
            plt.axis('off')

            # Prediction
            plt.subplot(1, 3, 3)
            colored_pred = apply_colormap(y_pred, colormap)
            plt.imshow(colored_pred)
            plt.title("Predicted Mask")
            plt.axis('off')

            plt.tight_layout()
            plt.show()
            plt.close()

In [None]:
#Creating a colour map
def create_segmentation_colormap(num_classes):
    """
    Create a linear colormap using a predefined palette.
    Uses 'viridis' as default because it is perceptually uniform
    and works well for colorblindness.
    """

    return plt.cm.viridis(np.linspace(0, 1, num_classes))

def apply_colormap(label, colormap=None):
    """
    Apply the colormap to a label.
    """
    # Ensure label is 2D
    label = np.squeeze(label)

    if colormap is None:
        num_classes = len(np.unique(label))
        colormap = create_segmentation_colormap(num_classes)

    # Apply the colormap
    colored = colormap[label.astype(int)]

    return colored

## ⏳ Load the Data

In [None]:
data = np.load("mars_for_students.npz")

training_set = data["training_set"]
X_train = training_set[:, 0]
y_train = training_set[:, 1]

X_test = data["test_set"]

print(f"Training X shape: {X_train.shape}")
print(f"Training y shape: {y_train.shape}")
print(f"Test X shape: {X_test.shape}")

Training X shape: (2615, 64, 128)
Training y shape: (2615, 64, 128)
Test X shape: (10022, 64, 128)


In [None]:
#Finding and removing duplicates
duplicates = find_duplicate_masks(y_train)

#After manual inspection, the first alien image appears at index 62:
alien_image_indices = duplicates[62]
alien_image_indices.append(62)

# Step 3: Create a dictionary of all masks
input_dict = {index: y_train[index] for index in range(len(y_train))}

no_aliens_map = remove_alien_elements(input_dict, alien_image_indices)

remaining_indices = [i for i in range(len(y_train)) if i not in alien_image_indices]

X_train = X_train[remaining_indices]
y_train = y_train[remaining_indices]

print(f"Total masks before removal: {len(input_dict)}")
print(f"Total masks after removal: {len(no_aliens_map)}")

Total masks before removal: 2615
Total masks after removal: 2505


In [None]:
# Adding flipped versions of images
X_train, y_train = flip_dataset(X_train, y_train)

In [None]:
#Split a validation set
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=seed)

In [None]:
augmentations = A.Compose([
    A.ShiftScaleRotate(shift_limit=0.5625, scale_limit=0.1, rotate_limit=15, border_mode=cv2.BORDER_CONSTANT, value=0, mask_value=0, p=0.5),
    A.OneOf([
        A.MotionBlur(blur_limit=7, p=0.4),
        A.MedianBlur(blur_limit=7, p=0.4),
        A.Blur(blur_limit=7, p=0.4)
    ], p=0.5),
    A.Resize(64, 128, always_apply=True),  # Ensures all outputs are the correct shape
])

# Apply the augmentations
X_train, y_train = augment_dataset(X_train, y_train, augmentations)

In [None]:
# Compute the class distribution
class_percentages = compute_class_distribution(y_train)

# Print the results
for i, percentage in enumerate(class_percentages):
    print(f"Class {i}: {percentage:.2f}%")

Class 0: 24.36%
Class 1: 33.98%
Class 2: 23.33%
Class 3: 18.21%
Class 4: 0.13%


## 🛠️ Train and Save the Model

In [None]:
# Add color channel and rescale pixels between 0 and 1
X_train = X_train[..., np.newaxis] / 255.0
X_test = X_test[..., np.newaxis] / 255.0
X_val = X_val[..., np.newaxis] / 255.0

input_shape = X_train.shape[1:]
num_classes = len(np.unique(y_train))

#

print(f"Input shape: {input_shape}")
print(f"Number of classes: {num_classes}")

Input shape: (64, 128, 1)
Number of classes: 5


In [None]:
# Defining constants

batch_size = 64
num_epochs = 120
learning_rate = 0.001
patience = 15

### Define Model

In [None]:
def res_conv_block(x, filter_size, size):
    '''
    Residual convolutional layer.
    Two variants....
    Either put activation function before the addition with shortcut
    or after the addition (which would be as proposed in the original resNet).

    1. conv - BN - Activation - conv - BN - Activation
                                          - shortcut  - BN - shortcut+BN

    2. conv - BN - Activation - conv - BN
                                     - shortcut  - BN - shortcut+BN - Activation

    Check fig 4 in https://arxiv.org/ftp/arxiv/papers/1802/1802.06955.pdf
    '''

    conv = layers.Conv2D(size, (filter_size, filter_size), padding='same')(x)
    conv = layers.BatchNormalization(axis=3)(conv)
    conv = layers.Activation('relu')(conv)

    conv = layers.Conv2D(size, (filter_size, filter_size), padding='same')(conv)
    conv = layers.BatchNormalization(axis=3)(conv)
    #conv = layers.Activation('relu')(conv)    #Activation before addition with shortcut

    shortcut = layers.Conv2D(size, kernel_size=(1, 1), padding='same')(x)
    res_path = layers.add([shortcut, conv])
    res_path = layers.Activation('relu')(res_path)    #Activation after addition with shortcut (Original residual block)
    return res_path

In [None]:
def attention_block(x, gating, inter_shape):
    shape_x = K.int_shape(x)
    shape_g = K.int_shape(gating)

# Getting the x signal to the same shape as the gating signal
    theta_x = layers.Conv2D(inter_shape, (2, 2), strides=(2, 2), padding='same')(x)  # 16
    shape_theta_x = K.int_shape(theta_x)

# Getting the gating signal to the same number of filters as the inter_shape
    phi_g = layers.Conv2D(inter_shape, (1, 1), padding='same')(gating)
    upsample_g = layers.Conv2DTranspose(inter_shape, (3, 3),
                                 strides=(shape_theta_x[1] // shape_g[1], shape_theta_x[2] // shape_g[2]),
                                 padding='same')(phi_g)  # 16

    concat_xg = layers.add([upsample_g, theta_x])
    act_xg = layers.Activation('relu')(concat_xg)
    psi = layers.Conv2D(1, (1, 1), padding='same')(act_xg)
    sigmoid_xg = layers.Activation('sigmoid')(psi)
    shape_sigmoid = K.int_shape(sigmoid_xg)
    upsample_psi = layers.UpSampling2D(size=(shape_x[1] // shape_sigmoid[1], shape_x[2] // shape_sigmoid[2]))(sigmoid_xg)  # 32

    upsample_psi = repeat_elem(upsample_psi, shape_x[3])

    y = layers.multiply([upsample_psi, x])

    result = layers.Conv2D(shape_x[3], (1, 1), padding='same')(y)
    result_bn = layers.BatchNormalization()(result)
    return result_bn

In [None]:
def gating_signal(input, out_size):
    """
    resize the down layer feature map into the same dimension as the up layer feature map
    using 1x1 conv
    :return: the gating feature map with the same dimension of the up layer feature map
    """
    x = layers.Conv2D(out_size, (1, 1), padding='same')(input)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    return x


In [None]:
def senet_block(input, filters, kernel_size=3, activation='relu', stack=2, name=''):
    for s in range(stack):
        # Main concolutional part
        x = tfkl.Conv2D(filters, kernel_size, padding='same', use_bias=False, name=f'{name}_conv_{s}')(input)
        x = tfkl.BatchNormalization(name=f'{name}_bn1_{s}')(x)
        x = tfkl.Activation(activation, name=f'{name}_activation_{s}')(x)

        # Squeeze-and-excitation module
        channels = x.shape[-1]
        # Squeeze step
        se = tfkl.GlobalAveragePooling2D(name=f'{name}_squeeze_{s}')(x)
        # Excitation step
        se = tfkl.Dense(channels // 16, activation=activation, name=f'{name}_dense1_{s}')(se)
        se = tfkl.Dense(channels, activation='sigmoid', name=f'{name}_dense2_{s}')(se)

        # Scaling of the output with SE activation
        se = tfkl.Reshape((1,1,channels))(se)
        x = tfkl.Multiply(name=f'{name}_scale_{s}')([x,se])

        return x

In [None]:
def repeat_elem(tensor, rep):

     return layers.Lambda(lambda x, repnum: K.repeat_elements(x, repnum, axis=3),
                          arguments={'repnum': rep})(tensor)

In [None]:
def Attention_ResUNet(input_shape=(64, 128, 1), NUM_CLASSES=5):
    '''
    Rsidual UNet, with attention

    '''
    # network structure
    FILTER_NUM = 64 # number of basic filters for the first layer
    FILTER_SIZE = 3 # size of the convolutional filter
    UP_SAMP_SIZE = 2 # size of upsampling filters
    # input data
    # dimension of the image depth
    inputs = layers.Input(input_shape, dtype=tf.float32)
    axis = 3

    # Downsampling layers
    # DownRes 1, double residual convolution + pooling
    conv_128 = res_conv_block(inputs, FILTER_SIZE, FILTER_NUM)
    pool_64 = layers.MaxPooling2D(pool_size=(2,2))(conv_128)
    # DownRes 2

    conv_64 = res_conv_block(pool_64, FILTER_SIZE, 2*FILTER_NUM)
    pool_32 = layers.MaxPooling2D(pool_size=(2,2))(conv_64)
    # DownRes 3

    conv_32 = res_conv_block(pool_32, FILTER_SIZE, 4*FILTER_NUM)
    pool_16 = layers.MaxPooling2D(pool_size=(2,2))(conv_32)
    # DownRes 4

    conv_16 = res_conv_block(pool_16, FILTER_SIZE, 8*FILTER_NUM)
    pool_8 = layers.MaxPooling2D(pool_size=(2,2))(conv_16)
    # DownRes 5, convolution only

    conv_8 = senet_block(pool_8, 16*FILTER_NUM, name='bottleneck')

    # Upsampling layers

    # UpRes 6, attention gated concatenation + upsampling + double residual convolution
    gating_16 = gating_signal(conv_8, 8*FILTER_NUM)
    att_16 = attention_block(conv_16, gating_16, 8*FILTER_NUM)
    up_16 = layers.UpSampling2D(size=(UP_SAMP_SIZE, UP_SAMP_SIZE), data_format="channels_last")(conv_8)
    up_16 = layers.concatenate([up_16, att_16], axis=axis)
    up_conv_16 = res_conv_block(up_16, FILTER_SIZE, 8*FILTER_NUM)
    # UpRes 7
    gating_32 = gating_signal(up_conv_16, 4*FILTER_NUM)
    att_32 = attention_block(conv_32, gating_32, 4*FILTER_NUM)
    up_32 = layers.UpSampling2D(size=(UP_SAMP_SIZE, UP_SAMP_SIZE), data_format="channels_last")(up_conv_16)
    up_32 = layers.concatenate([up_32, att_32], axis=axis)
    up_conv_32 = res_conv_block(up_32, FILTER_SIZE, 4*FILTER_NUM)
    # UpRes 8
    gating_64 = gating_signal(up_conv_32, 2*FILTER_NUM)
    att_64 = attention_block(conv_64, gating_64, 2*FILTER_NUM)
    up_64 = layers.UpSampling2D(size=(UP_SAMP_SIZE, UP_SAMP_SIZE), data_format="channels_last")(up_conv_32)
    up_64 = layers.concatenate([up_64, att_64], axis=axis)
    up_conv_64 = res_conv_block(up_64, FILTER_SIZE, 2*FILTER_NUM)
    # UpRes 9
    gating_128 = gating_signal(up_conv_64, FILTER_NUM)
    att_128 = attention_block(conv_128, gating_128, FILTER_NUM)
    up_128 = layers.UpSampling2D(size=(UP_SAMP_SIZE, UP_SAMP_SIZE), data_format="channels_last")(up_conv_64)
    up_128 = layers.concatenate([up_128, att_128], axis=axis)
    up_conv_128 = res_conv_block(up_128, FILTER_SIZE, FILTER_NUM)

    # 1*1 convolutional layers

    conv_final = layers.Conv2D(NUM_CLASSES, kernel_size=(1,1))(up_conv_128)
    conv_final = layers.BatchNormalization(axis=axis)(conv_final)
    conv_final = layers.Activation('softmax')(conv_final)

    # Model integration
    model = models.Model(inputs, conv_final, name="AttentionResUNet")
    return model

### Building model

In [None]:
model = Attention_ResUNet(input_shape=input_shape, NUM_CLASSES=num_classes)

# Print a detailed summary of the model with expanded nested layers and trainable parameters.
model.summary(expand_nested=True, show_trainable=True)

## Compile Model

In [None]:
# Compile the model
print("Compiling model...")
model.compile(
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    optimizer=tf.keras.optimizers.AdamW(learning_rate),
    metrics=["accuracy", MeanIntersectionOverUnion(num_classes=num_classes, labels_to_exclude=[0])]
)

print("Model compiled!")

Compiling model...
Model compiled!


### Setup Callbacks

In [None]:
earlystop = tf.keras.callbacks.EarlyStopping(
    monitor= "accuracy",
    mode='max',
    patience=patience,
    restore_best_weights=True
)

plateau = tf.keras.callbacks.ReduceLROnPlateau(
    monitor= "accuracy",
    mode='max',
    factor=0.1,
    patience=8,
    min_lr=1e-4
)

viz_callback = VizCallback(X_val[10], y_val[10])

### Train the Model

In [None]:
# Train the model
history = model.fit(
    X_train,
    y_train,
    epochs=num_epochs,
    callbacks=[earlystop, viz_callback, plateau],
    verbose=1,
    validation_data=(X_val, y_val)
).history

# Calculate and print the final validation accuracy
final_val_meanIoU = round(max(history['val_mean_iou'])* 100, 2)
print(f'Final validation Mean Intersection Over Union: {final_val_meanIoU}%')

# Save the trained model to a file with the accuracy included in the filename
model_filename = 'baseline_'+str(final_val_meanIoU)+'.keras'

timestep_str = datetime.now().strftime("%y%m%d_%H%M%S")
model_filename = f"exploration_model_{timestep_str}.keras"
model.save(model_filename)

Epoch 1/120
[1m 755/1128[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m58:57[0m 9s/step - accuracy: 0.5825 - loss: 1.1323 - mean_iou: 0.3090

## 📊 Prepare Your Submission

In our Kaggle competition, submissions are made as `csv` files. To create a proper `csv` file, you need to flatten your predictions and include an `id` column as the first column of your dataframe. To maintain consistency between your results and our solution, please avoid shuffling the test set. The code below demonstrates how to prepare the `csv` file from your model predictions.




In [None]:
# If model_filename is not defined, load the most recent model from Google Drive
if "model_filename" not in globals() or model_filename is None:
    files = [f for f in os.listdir('.') if os.path.isfile(f) and f.startswith('model_') and f.endswith('.keras')]
    files.sort(key=lambda x: os.path.getmtime(x), reverse=True)
    if files:
        model_filename = files[0]
    else:
        raise FileNotFoundError("No model files found in the current directory.")

In [None]:
#model = tfk.models.load_model(model_filename, safe_mode=False)
#print(f"Model loaded from {model_filename}")

In [None]:
## Define your model architecture again (without the weights)
#model = Attention_ResUNet(input_shape=input_shape, NUM_CLASSES=num_classes)
#
## Load the weights
#model.load_weights(model_filename)
#
#print(f"Model weights loaded from {model_filename}")

In [None]:
preds = model.predict(X_test)
preds = np.argmax(preds, axis=-1)
print(f"Predictions shape: {preds.shape}")
del model

In [None]:
def y_to_df(y) -> pd.DataFrame:
    """Converts segmentation predictions into a DataFrame format for Kaggle."""
    n_samples = len(y)
    y_flat = y.reshape(n_samples, -1)
    df = pd.DataFrame(y_flat)
    df["id"] = np.arange(n_samples)
    cols = ["id"] + [col for col in df.columns if col != "id"]
    return df[cols]

In [None]:
# Create and download the csv submission file
timestep_str = model_filename.replace("model_", "").replace(".keras", "")
submission_filename = f"submission_{timestep_str}.csv"
submission_df = y_to_df(preds)
submission_df.to_csv(submission_filename, index=False)

from google.colab import files
files.download(submission_filename)

#  
<img src="https://airlab.deib.polimi.it/wp-content/uploads/2019/07/airlab-logo-new_cropped.png" width="350">

<img src="https://upload.wikimedia.org/wikipedia/commons/thumb/9/95/Instagram_logo_2022.svg/800px-Instagram_logo_2022.svg.png" width="15"> **Instagram:** https://www.instagram.com/airlab_polimi/

<img src="https://upload.wikimedia.org/wikipedia/commons/thumb/8/81/LinkedIn_icon.svg/2048px-LinkedIn_icon.svg.png" width="15"> **LinkedIn:** https://www.linkedin.com/company/airlab-polimi/
___
Credits: Alberto Archetti 📧 alberto.archetti@polito.it





```
   Copyright 2024 Alberto Archetti

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
```