## 🌐 Connect Colab to Google Drive

In [1]:
from google.colab import drive

drive.mount("/gdrive")
%cd /gdrive/My Drive
%cd [2024-2025] AN2DL/Homework 2

Mounted at /gdrive
/gdrive/My Drive
/gdrive/My Drive/[2024-2025] AN2DL/Homework 2


## ⚙️ Import Libraries

In [None]:
import os
from datetime import datetime

import numpy as np
import pandas as pd
import logging
import random

import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from scipy.stats import mode

import matplotlib.pyplot as plt
%matplotlib inline

seed = 29
np.random.seed(seed)
tf.random.set_seed(seed)

# Set seeds for random number generators in NumPy and Python
np.random.seed(seed)
random.seed(seed)

# Set seed for TensorFlow
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

# Reduce TensorFlow verbosity
tf.autograph.set_verbosity(0)
tf.get_logger().setLevel(logging.ERROR)
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

# Suppress warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)

## ⏳ Load the Datasets

In [3]:
data = np.load("mars_for_students.npz")

training_set = data["training_set"]
X_train = training_set[:, 0]
y_train = training_set[:, 1]

X_test = data["test_set"]

print(f"Training X shape: {X_train.shape}")
print(f"Training y shape: {y_train.shape}")
print(f"Test X shape: {X_test.shape}")

# Add color channel and rescale pixels between 0 and 1
X_train = X_train[..., np.newaxis] / 255.0
X_test = X_test[..., np.newaxis] / 255.0

input_shape = X_train.shape[1:]
num_classes = len(np.unique(y_train))

print(f"Input shape: {input_shape}")
print(f"Number of classes: {num_classes}")

Training X shape: (2615, 64, 128)
Training y shape: (2615, 64, 128)
Test X shape: (10022, 64, 128)
Input shape: (64, 128, 1)
Number of classes: 5


## 🔍 Inspect the training dataset

In [None]:
# Extract all the dominant labels
y_train_labels = mode(y_train, axis=(1, 2))[0].flatten()
unique_labels = np.unique(y_train)

# Plot images in batches
def plot_images(X, y, start_index=0, img_row=10, img_col=10):
    fig, axes = plt.subplots(img_col, img_row, figsize=(15, 15))
    for i in range(img_row * img_col):
        idx = start_index + i
        if idx >= len(X):
            break
        ax = axes[i // img_row, i % img_row]
        ax.imshow(X[idx], cmap="gray")
        ax.set_title(f"Class: {y[idx]}")
        ax.axis("off")
    plt.tight_layout()
    plt.show()

# Plot just one image from each class
def plot_one(X, y, y_mask, classes):
    for label in classes:
        for i in range(len(y_mask)):
            if label in np.unique(y_mask[i]):
                plt.figure()
                plt.imshow(X[i], cmap="gray")
                plt.title(f"Class: {label}")
                plt.axis("off")
                plt.show()
                break

plot_one(X_train, y_train_labels, y_train, unique_labels)

# Plot all the images in batches
img_row = 10
img_col = 10
img_page = img_row * img_col
num_images = X_train.shape[0]

for start_idx in range(0, num_images, img_page):
    plot_images(X_train, y_train_labels, start_index=start_idx, img_row=img_row, img_col=img_col)

## ❌ Remove outliers from dataset

In [5]:
# Lists to contain filtered elements
X_train_filtered = []
y_train_filtered = []

for i in range(len(y_train)):
    label = y_train[i].argmax() if y_train.ndim > 1 else y_train[i]
    if label != 415:
        # Add to filtered dataset the non-alien images
        X_train_filtered.append(X_train[i])
        y_train_filtered.append(y_train[i])

# Convert lists to numpy arrays
X_train_filtered = np.array(X_train_filtered)
y_train_filtered = np.array(y_train_filtered)

print(f"Shape X_train_filtered: {X_train_filtered.shape}")
print(f"Shape y_train_filtered: {y_train_filtered.shape}")
print(f"Unique classes: {np.unique(y_train_filtered)}")

Shape X_train_filtered: (2505, 64, 128, 1)
Shape y_train_filtered: (2505, 64, 128)
Unique classes: [0. 1. 2. 3. 4.]


## ✂ Split into Training and Validation Sets

In [7]:
# Split the training dataset to get a validation set
X_train, X_val, y_train, y_val = train_test_split(
    X_train,
    y_train,
    test_size=0.1,
    random_state=seed)

# Print the shapes of the resulting sets
print('Training set shape:\t', X_train.shape, y_train.shape)
print('Validation set shape:\t', X_val.shape, y_val.shape)

Training set shape:	 (2353, 64, 128, 1) (2353, 64, 128)
Validation set shape:	 (262, 64, 128, 1) (262, 64, 128)


## 🧮 Define network parameters

In [None]:
# Set batch size for training
batch_size = 64

# Set learning rate for the optimizer
learning_rate = 1e-5

# Set early stopping patience threshold
patience = 10

# Set maximum number of training epochs
epochs = 100

# Flat y_train to compute weights
y_train_flat = y_train.flatten()

# Compute class weights
class_weights_array = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train_flat),
    y=y_train_flat
)

# Convert weights to dictionary, set background to 0
class_weights = {i: weight for i, weight in enumerate(class_weights_array)}
class_weights[0] = 0.0

print("Class Weights:", class_weights)

Class Weights: {0: 0.0, 1: 0.6075654375216993, 2: 0.8680682195342838, 3: 1.0852267596292755, 4: 151.75983938904855}


In [None]:
early_stopping = tfk.callbacks.EarlyStopping(
    monitor='val_mean_iou',
    patience=10,
    restore_best_weights=True,
    mode='max'
)

lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(
    monitor="val_mean_iou",
    factor=0.5,
    patience=5,
    min_lr=1e-6
)

# Store the callback in a list
callbacks = [early_stopping, lr_scheduler]

## 🔄 Augmentation

In [10]:
def augment_data(image, label):
    # Geometric Transformations
    image = tf.image.random_flip_left_right(image)
    label = tf.image.random_flip_left_right(label)

    image = tf.image.random_flip_up_down(image)
    label = tf.image.random_flip_up_down(label)

    # Chromatic Transformations
    image = tf.image.random_brightness(image, max_delta=0.4)
    image = tf.image.random_contrast(image, lower=0.7, upper=1.3)

    return image, label

def preprocess_image(image):
    image = tf.expand_dims(image, axis=-1) if len(image.shape) == 2 else image
    image = tf.cast(image, tf.float32)
    return image

def preprocess_label(label):
    label = tf.expand_dims(label, axis=-1) if len(label.shape) == 2 else label
    label = tf.cast(label, tf.int32)
    return label

def preprocess_data(image, label):
    image = preprocess_image(image)
    label = preprocess_label(label)
    return image, label

In [None]:
# Original dataset
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train_dataset = train_dataset.map(preprocess_data, num_parallel_calls=tf.data.AUTOTUNE)
train_dataset = train_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)

# Augmented dataset
augmented_dataset = train_dataset.map(lambda x, y: augment_data(x, y), num_parallel_calls=tf.data.AUTOTUNE)

# Combined dataset, having both augmented and original dataset
combined_dataset = train_dataset.concatenate(augmented_dataset)
combined_dataset = combined_dataset.shuffle(buffer_size=len(X_train))
combined_dataset = combined_dataset.prefetch(tf.data.AUTOTUNE)

# Validation dataset
val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val))
val_dataset = val_dataset.map(preprocess_data, num_parallel_calls=tf.data.AUTOTUNE)
val_dataset = val_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)

## 🔨 Build the model

In [None]:
def res_unet_block(input_tensor, filters, name):
    x = tfkl.Conv2D(filters, (3, 3), padding="same", kernel_initializer="he_normal", name=f"{name}_conv1")(input_tensor)
    x = tfkl.BatchNormalization(name=f"{name}_bn1")(x)
    x = tfkl.Activation("relu", name=f"{name}_act1")(x)
    x = tfkl.Conv2D(filters, (3, 3), padding="same", kernel_initializer="he_normal", name=f"{name}_conv2")(x)
    x = tfkl.BatchNormalization(name=f"{name}_bn2")(x)

    # Skip connection
    shortcut = tfkl.Conv2D(filters, (1, 1), padding="same", name=f"{name}_shortcut")(input_tensor)
    x = tfkl.Add(name=f"{name}_add")([x, shortcut])
    x = tfkl.Activation("relu", name=f"{name}_act2")(x)
    return x

In [None]:
# Define the input layer
input_layer = tfkl.Input(shape=input_shape)

# Downsampling path with an extra layer
def downsampling_block(input_tensor, filters,):
    x = res_unet_block(input_tensor, filters)
    # Extra convolutional layer
    x = tfkl.Conv2D(filters, (3, 3), padding="same", kernel_initializer="he_normal")(x)
    x = tfkl.BatchNormalization()(x)
    x = tfkl.Activation("relu")(x)
    pool = tfkl.MaxPooling2D(pool_size=(2, 2))(x)
    return x, pool

down_block_1, d1 = downsampling_block(input_layer, 64)
down_block_2, d2 = downsampling_block(d1, 128)
down_block_3, d3 = downsampling_block(d2, 256)
down_block_4, d4 = downsampling_block(d3, 512)

# Bottleneck with multi-scale features and Squeeze-and-Excitation
def bottleneck_module(input_tensor, filters):
    # Base convolution
    x = tfkl.Conv2D(filters, kernel_size=3, padding='same', activation='relu')(input_tensor)

    # Squeeze-and-Excitation Block
    se = tfkl.GlobalAveragePooling2D()(x)
    se = tfkl.Dense(filters // 16, activation='relu')(se)
    se = tfkl.Dense(filters, activation='sigmoid')(se)
    se = tfkl.Reshape((1, 1, filters))(se)

    # Amplification of important features
    return tfkl.Multiply()([x, se])

bottleneck = bottleneck_module(d4, 1024)
bottleneck = tfkl.SpatialDropout2D(0.4)(bottleneck)

# Upsampling path with skip connections
def upsampling_block(input_tensor, skip_connection, filters):
    up = tfkl.Conv2DTranspose(filters, kernel_size=2, strides=2, padding="same")(input_tensor)
    skip_connection = tfkl.Dropout(0.2)(skip_connection)  # Dropout sulle connessioni skip
    concat = tfkl.Concatenate()([up, skip_connection])
    x = res_unet_block(concat, filters)
    # Extra convolutional layer
    x = tfkl.Conv2D(filters, (3, 3), padding="same", kernel_initializer="he_normal")(x)
    x = tfkl.BatchNormalization()(x)
    x = tfkl.Activation("relu")(x)
    return tfkl.SpatialDropout2D(0.2)(x)

# Upsampling path
u4 = upsampling_block(bottleneck, down_block_4, 512)
u3 = upsampling_block(u4, down_block_3, 256)
u2 = upsampling_block(u3, down_block_2, 128)
u1 = upsampling_block(u2, down_block_1, 64)

# Output layer with softmax activation for multi-class segmentation
output_layer = tfkl.Conv2D(num_classes, kernel_size=1, padding='same', activation="softmax")(u1)

# Define the model
model = tfk.Model(inputs=input_layer, outputs=output_layer)

def jaccard_loss(y_true, y_pred, smooth=1e-6):
    # Convert to one-hot encoding
    y_true = tf.squeeze(y_true, axis=-1)
    y_true = tf.one_hot(tf.cast(y_true, tf.int32), depth=y_pred.shape[-1])

    # Ignore class 0 (background)
    y_true = y_true[..., 1:]
    y_pred = y_pred[..., 1:]

    # Compute intersection and union
    intersection = tf.reduce_sum(y_true * y_pred, axis=[1, 2])
    union = tf.reduce_sum(y_true + y_pred, axis=[1, 2]) - intersection

    # Compute IoU
    iou = (intersection + smooth) / (union + smooth)

    # Jaccard Loss
    return 1 - tf.reduce_mean(iou)


# Define the MeanIoU ignoring the background class
mean_iou = tfk.metrics.MeanIoU(num_classes=num_classes, ignore_class=0, sparse_y_pred=False, name='mean_iou')

# Compile the model
model.compile(
    optimizer=tf.keras.optimizers.AdamW(learning_rate=1e-3, weight_decay=1e-5),
    loss=jaccard_loss,
    metrics=[mean_iou]
)

## 🛠️ Train and Save the Model

In [26]:
history = model.fit(
    combined_dataset,
    epochs=epochs,
    validation_data=val_dataset,
    batch_size=batch_size,
    callbacks=callbacks
).history

Epoch 1/100
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 887ms/step - loss: 0.8993 - mean_iou: 0.1977 - val_loss: 0.9200 - val_mean_iou: 0.1331 - learning_rate: 0.0010
Epoch 2/100
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 590ms/step - loss: 0.8442 - mean_iou: 0.3347 - val_loss: 0.9163 - val_mean_iou: 0.1466 - learning_rate: 0.0010
Epoch 3/100
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 599ms/step - loss: 0.8347 - mean_iou: 0.4452 - val_loss: 0.9314 - val_mean_iou: 0.1196 - learning_rate: 0.0010
Epoch 4/100
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 608ms/step - loss: 0.8298 - mean_iou: 0.4551 - val_loss: 0.9144 - val_mean_iou: 0.1739 - learning_rate: 0.0010
Epoch 5/100
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 616ms/step - loss: 0.8298 - mean_iou: 0.4701 - val_loss: 0.8215 - val_mean_iou: 0.4799 - learning_rate: 0.0010
Epoch 6/100
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

In [27]:
timestep_str = datetime.now().strftime("%y%m%d_%H%M%S")
model_filename = f"model_{timestep_str}.keras"
model.save(model_filename)
del model

print(f"Model saved as {model_filename}")

Model saved as model_241211_000402.keras


## 📊 Test the model

In [None]:
model = tfk.models.load_model(
    model_filename,
    custom_objects={
        'jaccard_loss': jaccard_loss,
        'res_unet_block': res_unet_block
    }
)
print(f"Model loaded from {model_filename}")

Model loaded from model_241211_000402.keras


In [34]:
preds = model.predict(X_test)
preds = np.argmax(preds, axis=-1)
print(f"Predictions shape: {preds.shape}")

[1m314/314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 106ms/step
Predictions shape: (10022, 64, 128)


In [None]:
def calculate_iou_per_class(y_true, y_pred, num_classes):
    # Remove the channel dimension from y_true if present
    y_true = y_true.squeeze()  # or y_true = np.squeeze(y_true, axis=-1)

    iou_scores = []
    for i in range(num_classes):
        intersection = np.logical_and(y_true == i, y_pred == i).sum()
        union = np.logical_or(y_true == i, y_pred == i).sum()
        if union == 0:
            iou_scores.append(np.nan)
        else:
            iou_scores.append(intersection / union)
    return iou_scores
preds = model.predict(X_val)
preds = np.argmax(preds, axis=-1)
iou_scores = calculate_iou_per_class(y_val, preds, num_classes)
print("IoU score for each class (background excluded):", iou_scores[1:])

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 109ms/step
IoU score for each class (background excluded): [0.7006867530131458, 0.4988689751174526, 0.6321922079005073, 0.0]


## 💾 Save the predictions

In [35]:
def y_to_df(y) -> pd.DataFrame:
    """Converts segmentation predictions into a DataFrame format for Kaggle."""
    n_samples = len(y)
    y_flat = y.reshape(n_samples, -1)
    df = pd.DataFrame(y_flat)
    df["id"] = np.arange(n_samples)
    cols = ["id"] + [col for col in df.columns if col != "id"]
    return df[cols]

In [36]:
# Create and download the csv submission file
timestep_str = model_filename.replace("model_", "").replace(".keras", "")
submission_filename = f"submission_{timestep_str}.csv"
submission_df = y_to_df(preds)
submission_df.to_csv(submission_filename, index=False)

from google.colab import files
files.download(submission_filename)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>