<a href="https://www.kaggle.com/code/mahboobehabdighara/semanticsegmentation-techlabsproject-ipynb?scriptVersionId=223961624" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

### Load & Display Data

In [None]:
import os  #os: Handles file paths & directory access
import numpy as np
import cv2   # cv2:  (OpenCV) Loads and processes images
import matplotlib.pyplot as plt

# Set dataset path
DATASET_PATH = "/kaggle/input/cityscapes-image-pairs/cityscapes_data/train"

# Load an image
def load_and_display_image(idx=0):
    files = sorted(os.listdir(DATASET_PATH))  # Get sorted file list --> ensures that images and masks match up correctly
                                              # os.listdir(DATASET_PATH) → Lists all image files in the dataset folder
                                    #sorted() → Ensures images are processed in order (prevents randomness).
                                    #files[idx] → Selects the idx-th image from the sorted list.
                                    #os.path.join(DATASET_PATH, files[idx]) → Gets the full file path of the selected image.
    img_path = os.path.join(DATASET_PATH, files[idx])
    
    # Read image
    img = cv2.imread(img_path)    #Loads the image as a NumPy array
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert from BGR to RGB -->OpenCV loads images in BGR format, but Matplotlib expects RGB. This fixes color issues
    
    # Split into left (original) and right (mask)
    h, w, _ = img.shape   # Gets the height (h), width (w), and channels (_) of the image
    img_left = img[:, :w//2, :]   # Original Image -->Selects the left half of the image (original)
    img_right = img[:, w//2:, :]  # Segmentation Mask --> Selects the right half (segmentation mask)

    # Show both images
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5)) #Creates two side-by-side images.

    ax1.imshow(img_left) #Displays the original image
    ax1.set_title("Original Image")
    ax1.axis("off")

    ax2.imshow(img_right)  #Displays the segmentation mask
    ax2.set_title("Segmentation Mask")
    ax2.axis("off")  #Hides axis labels for a cleaner view

    plt.show()

# Display first image
load_and_display_image(5)


## Preprocess Data; resize, normalize, and prepare the dataset for training.

In [None]:
IMAGE_SIZE = (256, 256)  # Increase resolution for better results--> helps standardize input images so the model processes them consistently

def load_images(dataset_path, image_size):
    image_files = sorted(os.listdir(dataset_path))  # Sort image filenames
    images, masks = [], [] #empty lists to store processed images & masks

    for img_name in image_files:
        img_path = os.path.join(dataset_path, img_name) # Creates the full path of the image file
        img = cv2.imread(img_path) #Reads the image as a NumPy array
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) #Converts BGR → RGB (since OpenCV loads images in BGR format)

        # Split into left (image) and right (mask)
        h, w, _ = img.shape
        img_left = img[:, :w//2, :]   # Original Image
        img_right = img[:, w//2:, :]  # Segmentation Mask

        # Resize
        img_left = cv2.resize(img_left, image_size)
        img_right = cv2.resize(img_right, image_size)
        #Resizes both the original image and mask to (256, 256).
        #Ensures all images are the same size for deep learning models.

        
        # Normalize images (scale between 0 and 1); makes training more stable and faster.
        img_left = img_left / 255.0  
        
        # Convert mask to grayscale
        img_right = cv2.cvtColor(img_right, cv2.COLOR_RGB2GRAY) #Converts the mask to grayscale (from RGB)
        img_right = cv2.resize(img_right, image_size)
        img_right = img_right / 255.0  # Normalize mask, between 0 and 1
        #Why Convert to Grayscale?

#The segmentation mask only contains 2 values (object or background), so we don’t need color channels.
#Reduces memory usage and speeds up training.

        images.append(img_left) #Stores the processed image & mask in their respective lists
        masks.append(img_right)

    return np.array(images), np.array(masks)
    #Converts lists to NumPy arrays, which are needed for deep learning models

# Load dataset
X, Y = load_images(DATASET_PATH, IMAGE_SIZE) #Calls load_images() to load and preprocess all images.
#Stores the processed images in X and masks in Y.
Y = Y.reshape(Y.shape[0], Y.shape[1], Y.shape[2], 1)  # Ensure correct shape; Ensures Y (masks) has a 4D shape

print(f"✅ Loaded {len(X)} images and masks.")
print(f"📏 Image shape: {X.shape}, Mask shape: {Y.shape}")
#Prints how many images and masks were loaded.
#Displays the shape of images & masks to confirm correctness.


### Split into Training & Validation

In [None]:
from sklearn.model_selection import train_test_split

# Split dataset
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.2, random_state=42)
#Splits X (images) & Y (masks) into:
#X_train, Y_train → 80% for training.
#X_val, Y_val → 20% for validation.


# Print dataset sizes
print(f"🔹 Training set: {X_train.shape}, {Y_train.shape}")
print(f"🔹 Validation set: {X_val.shape}, {Y_val.shape}")


### Build a U-Net
🔹 Use Batch Normalization for stability
🔹 Use Dropout to reduce overfitting
🔹 More filters for better feature extraction

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
#tensorflow.keras.layers → Provides deep learning layers (Conv2D, MaxPooling2D, etc.).
#tensorflow.keras.models → Allows us to define and build a model.

def build_unet(input_shape=(256, 256, 3)): #creates a U-Net model; model expects RGB images of size 256x256
    inputs = layers.Input(shape=input_shape) #Creates the input layer for the network

    # Encoder
    conv1 = layers.Conv2D(64, 3, activation="relu", padding="same")(inputs)
    #Conv2D (64, 3, activation="relu", padding="same")
    #Applies 64 convolutional filters of size 3x3.
    #ReLU activation helps learn complex patterns.
    #Padding="same" ensures the output size remains the same.
    
    conv1 = layers.BatchNormalization()(conv1)  # Stabilize training
    conv1 = layers.Conv2D(64, 3, activation="relu", padding="same")(conv1)
    pool1 = layers.MaxPooling2D(pool_size=(2, 2))(conv1)
    #MaxPooling2D (2x2) → Reduces spatial size by half (downsampling)

    conv2 = layers.Conv2D(128, 3, activation="relu", padding="same")(pool1)
    #Uses 128 filters for deeper feature extraction
    conv2 = layers.BatchNormalization()(conv2)
    conv2 = layers.Conv2D(128, 3, activation="relu", padding="same")(conv2)
    pool2 = layers.MaxPooling2D(pool_size=(2, 2))(conv2)
    #Starts as (256x256) → after 1st pooling → (128x128)
    #After 2nd pooling → (64x64)
    

    # Bottleneck
    conv3 = layers.Conv2D(256, 3, activation="relu", padding="same")(pool2) #256 filters → Captures high-level abstract features
    conv3 = layers.Dropout(0.3)(conv3)  # Dropout (0.3) → Randomly disables 30% of neurons to prevent overfitting

    
    # Decoder-upsampling to original size
    up4 = layers.Conv2DTranspose(128, 3, strides=2, activation="relu", padding="same")(conv3)
    #Upsamples (doubles size) from (64x64) to (128x128)
    merge4 = layers.concatenate([conv2, up4], axis=3) #Combines decoder output with encoder feature maps
    conv4 = layers.Conv2D(128, 3, activation="relu", padding="same")(merge4) #Refines details after upsampling

    up5 = layers.Conv2DTranspose(64, 3, strides=2, activation="relu", padding="same")(conv4)
    merge5 = layers.concatenate([conv1, up5], axis=3)
    conv5 = layers.Conv2D(64, 3, activation="relu", padding="same")(merge5)

    # Output layer
    outputs = layers.Conv2D(1, 1, activation="sigmoid")(conv5)

    model = models.Model(inputs, outputs) #Creates the full U-Net model
    return model

# Create model
unet_model = build_unet()
unet_model.summary() #Prints model architecture (number of layers, parameters, etc.)


### Add Dice Loss & IoU Metric

In [None]:
import tensorflow.keras.backend as K

# Dice Loss (better for segmentation)
def dice_loss(y_true, y_pred, smooth=1e-6):
    y_true = K.cast(y_true, dtype="float32")
    y_pred = K.cast(y_pred, dtype="float32")
    #Converts y_true and y_pred to float32 to ensure compatible types for operations

    y_true = K.flatten(y_true)
    y_pred = K.flatten(y_pred)
    #Flattens tensors from (batch_size, height, width, channels) to a 1D array.

    intersection = K.sum(y_true * y_pred) #Computes the intersection (overlap) between predicted and true masks
    return 1 - (2. * intersection + smooth) / (K.sum(y_true) + K.sum(y_pred) + smooth)
    #Small constant added to prevent division by zero and improve numerical stability

# IoU Score (for evaluation)--> Intersection over Union
def iou_score(y_true, y_pred, smooth=1e-6):
    y_true = K.cast(y_true, dtype="float32") #Ensures both y_true and y_pred are float32 to prevent type mismatch
    y_pred = K.cast(y_pred, dtype="float32")

    y_true = K.flatten(y_true)
    y_pred = K.flatten(y_pred)

    intersection = K.sum(y_true * y_pred) #Counts the pixels where both masks are 1 (overlap)
    union = K.sum(y_true) + K.sum(y_pred) - intersection #Counts all the pixels that belong to either the predicted or true mask

    return (intersection + smooth) / (union + smooth)

#Dice Loss--> Lower is better (perfect = 0)
#IoU-->Higher is better (perfect = 1)

### Compile the Model with:
Adam optimizer (adaptive learning) & Dice Loss (better for segmentation) & IoU as a metric

In [None]:
# Compile with Dice Loss & IoU metric
unet_model.compile(optimizer="adam", loss=dice_loss, metrics=["accuracy", iou_score])
#Optimizer → Controls how the model updates its weights during training.
#Loss Function → Measures how far the model’s predictions are from the actual values.
#Metrics → Additional evaluation metrics to monitor the model’s performance.



#customised version:
#from tensorflow.keras.optimizers import Adam

#unet_model.compile(optimizer=Adam(learning_rate=0.0001), loss=dice_loss, metrics=["accuracy", iou_score])


## Apply Data Augmentation for Better Generalization
To improve model accuracy and robustness, we use augmentation like:

Flipping images horizontally; Rotating up to 20 degrees; Changing brightness

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
#Data augmentation is a technique to artificially expand the dataset by applying 
#random transformations (e.g., flipping, rotating, etc.) to the input images.

# Create an ImageDataGenerator for augmentation
datagen = ImageDataGenerator(
    horizontal_flip=True,
    rotation_range=20,
    brightness_range=[0.8, 1.2]
)

# Apply augmentation to training data
train_gen = datagen.flow(X_train, Y_train, batch_size=8)


### train the U-Net model for 30+ epochs with augmentation

In [None]:
history = unet_model.fit(train_gen, validation_data=(X_val, Y_val), epochs=30, batch_size=8)


#### IoU > 0.5 → Good segmentation!
IoU ~ 0.3-0.5 → Decent but needs improvement.
IoU < 0.3 → The model is not learning well.



### Plot Loss & IoU

In [None]:
import matplotlib.pyplot as plt

# Plot loss over epochs
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Loss Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

# Plot IoU over epochs
plt.subplot(1, 2, 2)
plt.plot(history.history['iou_score'], label='Train IoU')
plt.plot(history.history['val_iou_score'], label='Val IoU')
plt.title('IoU Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('IoU Score')
plt.legend()

plt.show()


### Visualize Predictions

In [None]:
import numpy as np
import random #to select random images from the validation set

# Select random test images
num_samples = 10
fig, axes = plt.subplots(num_samples, 3, figsize=(12, 4 * num_samples))
#plt.subplots() creates a grid of images to display.
#num_samples, 3 means 10 rows and 3 columns (original, true mask, predicted mask).
#figsize=(12, 4 * num_samples) defines the figure size for better clarity.

for i in range(num_samples):
    idx = random.randint(0, len(X_val) - 1) #picks a random index from the validation set
    test_image = X_val[idx] #stores the input image
    true_mask = Y_val[idx] #stores the corresponding ground truth mask

    # Make a prediction
    predicted_mask = unet_model.predict(test_image[np.newaxis, ...])[0]
    predicted_mask = (predicted_mask > 0.5).astype(np.uint8)
    #test_image[np.newaxis, ...] adds a batch dimension (required by Keras).
    #.predict() generates the predicted mask.
    ##Thresholding: Converts probabilities to binary (0 or 1):
    #> 0.5 → Foreground (object).
    #<= 0.5 → Background.

    # Plot original image
    axes[i, 0].imshow(test_image) #imshow() displays the original image
    axes[i, 0].set_title("Original Image") #labels the image
    axes[i, 0].axis("off") #removes axis ticks and labels for clarity

    # Plot ground truth mask
    axes[i, 1].imshow(true_mask[:, :, 0], cmap="gray") #displays the actual mask
    #cmap="gray" renders the mask in grayscale
    axes[i, 1].set_title("Ground Truth Mask")
    axes[i, 1].axis("off")

    # Plot predicted mask
    axes[i, 2].imshow(predicted_mask[:, :, 0], cmap="gray")
    axes[i, 2].set_title("Predicted Mask")
    axes[i, 2].axis("off")

plt.show()


# Improved Code; Version two

In [None]:
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split

# Set dataset path
DATASET_PATH = "/kaggle/input/cityscapes-image-pairs/cityscapes_data/train"

# Image size
IMAGE_SIZE = (128, 128)

def load_images(dataset_path, image_size):
    image_files = sorted(os.listdir(dataset_path))
    images, masks = [], []

    for img_name in image_files:
        img_path = os.path.join(dataset_path, img_name)
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Split into left (image) and right (mask)
        h, w, _ = img.shape
        img_left = img[:, :w//2, :]
        img_right = img[:, w//2:, :]

        # Resize
        img_left = cv2.resize(img_left, image_size)
        img_right = cv2.resize(img_right, image_size)

        # Normalize images
        img_left = img_left / 255.0  
        
        # Convert mask to grayscale
        img_right = cv2.cvtColor(img_right, cv2.COLOR_RGB2GRAY)
        img_right = cv2.resize(img_right, image_size)
        img_right = img_right / 255.0

        images.append(img_left)
        masks.append(img_right)

    return np.array(images), np.array(masks)

# Load dataset
X, Y = load_images(DATASET_PATH, IMAGE_SIZE)
Y = Y.reshape(Y.shape[0], Y.shape[1], Y.shape[2], 1)  # Ensure correct shape

# Split dataset
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.2, random_state=42)

print(f"✅ Loaded {len(X_train)} training images and {len(X_val)} validation images")


In [None]:
import tensorflow as tf

# Define number of output classes (1 for binary segmentation)
num_classes = 1

def get_unet_model():
    inputs = tf.keras.layers.Input(shape=(128, 128, 3))

    # First Downsample
    f1 = tf.keras.layers.Conv2D(64, (3, 3), padding="same", activation="relu")(inputs)
    b1 = tf.keras.layers.BatchNormalization()(f1)
    f2 = tf.keras.layers.Conv2D(64, (3, 3), padding="same", activation="relu")(b1)  # Used later for residual connection

    m3 = tf.keras.layers.MaxPooling2D((2, 2))(f2)
    d4 = tf.keras.layers.Dropout(0.2)(m3)

    # Second Downsample
    f5 = tf.keras.layers.Conv2D(128, (3, 3), padding="same", activation="relu")(d4)
    b5 = tf.keras.layers.BatchNormalization()(f5)
    f6 = tf.keras.layers.Conv2D(128, (3, 3), padding="same", activation="relu")(b5)

    m7 = tf.keras.layers.MaxPooling2D((2, 2))(f6)
    d8 = tf.keras.layers.Dropout(0.2)(m7)

    # Third Downsample
    f9 = tf.keras.layers.Conv2D(256, (3, 3), padding="same", activation="relu")(d8)
    b9 = tf.keras.layers.BatchNormalization()(f9)
    f10 = tf.keras.layers.Conv2D(256, (3, 3), padding="same", activation="relu")(b9)

    m11 = tf.keras.layers.MaxPooling2D((2, 2))(f10)
    d12 = tf.keras.layers.Dropout(0.2)(m11)

    # Fourth Downsample
    f13 = tf.keras.layers.Conv2D(512, (3, 3), padding="same", activation="relu")(d12)
    b13 = tf.keras.layers.BatchNormalization()(f13)
    f14 = tf.keras.layers.Conv2D(512, (3, 3), padding="same", activation="relu")(b13)

    m15 = tf.keras.layers.MaxPooling2D((2, 2))(f14)
    d16 = tf.keras.layers.Dropout(0.2)(m15)

    # Fifth Downsample
    f17 = tf.keras.layers.Conv2D(1024, (3, 3), padding="same", activation="relu")(d16)
    b17 = tf.keras.layers.BatchNormalization()(f17)
    f18 = tf.keras.layers.Conv2D(1024, (3, 3), padding="same", activation="relu")(b17)

    # First Upsample
    m19 = tf.keras.layers.UpSampling2D((2, 2))(f18)
    d19 = tf.keras.layers.Dropout(0.2)(m19)
    c20 = tf.keras.layers.Concatenate()([d19, f14])
    f21 = tf.keras.layers.Conv2D(512, (3, 3), padding="same", activation="relu")(c20)
    b21 = tf.keras.layers.BatchNormalization()(f21)
    f22 = tf.keras.layers.Conv2D(512, ((3, 3)), padding="same", activation="relu")(b21)

    # Second Upsample
    m23 = tf.keras.layers.UpSampling2D((2, 2))(f22)
    d23 = tf.keras.layers.Dropout(0.2)(m23)
    c24 = tf.keras.layers.Concatenate()([d23, f10])
    f25 = tf.keras.layers.Conv2D(256, (3, 3), padding="same", activation="relu")(c24)
    b25 = tf.keras.layers.BatchNormalization()(f25)
    f26 = tf.keras.layers.Conv2D(256, (3, 3), padding="same", activation="relu")(b25)

    # Third Upsample
    m27 = tf.keras.layers.UpSampling2D((2, 2))(f26)
    d27 = tf.keras.layers.Dropout(0.2)(m27)
    c28 = tf.keras.layers.Concatenate()([d27, f6])
    f29 = tf.keras.layers.Conv2D(128, (3, 3), padding="same", activation="relu")(c28)
    b29 = tf.keras.layers.BatchNormalization()(f29)
    f30 = tf.keras.layers.Conv2D(128, (3, 3), padding="same", activation="relu")(b29)

    # Fourth Upsample
    m31 = tf.keras.layers.UpSampling2D((2, 2))(f30)
    d31 = tf.keras.layers.Dropout(0.2)(m31)
    c32 = tf.keras.layers.Concatenate()([d31, f2])
    f33 = tf.keras.layers.Conv2D(64, (3, 3), padding="same", activation="relu")(c32)
    b33 = tf.keras.layers.BatchNormalization()(f33)
    f34 = tf.keras.layers.Conv2D(64, (3, 3), padding="same", activation="relu")(b33)

    # Output Layer - Using Sigmoid for Binary Segmentation
    outputs = tf.keras.layers.Conv2D(num_classes, (1, 1), activation="sigmoid")(f34)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model

# Create model
unet_model = get_unet_model()
unet_model.summary()


## Compile & Train the Model

In [None]:
import tensorflow.keras.backend as K

# Dice Loss Function
def dice_loss(y_true, y_pred, smooth=1e-6):
    y_true = K.flatten(K.cast(y_true, dtype="float32"))
    y_pred = K.flatten(K.cast(y_pred, dtype="float32"))

    intersection = K.sum(y_true * y_pred)
    return 1 - (2. * intersection + smooth) / (K.sum(y_true) + K.sum(y_pred) + smooth)

# IoU Metric
def iou_score(y_true, y_pred, smooth=1e-6):
    y_true = K.flatten(K.cast(y_true, dtype="float32"))
    y_pred = K.flatten(K.cast(y_pred, dtype="float32"))

    intersection = K.sum(y_true * y_pred)
    union = K.sum(y_true) + K.sum(y_pred) - intersection
    return (intersection + smooth) / (union + smooth)

# Compile Model
unet_model.compile(optimizer="adam", loss=dice_loss, metrics=["accuracy", iou_score])


In [None]:
history = unet_model.fit(X_train, Y_train, validation_data=(X_val, Y_val), epochs=30, batch_size=8)


## results 
IoU = 0.3677 → The model is correctly segmenting some regions but can be improved.
Loss (~ 0.46) → Acceptable but can be reduced further.
Accuracy (~ 0.02) → Not useful for segmentation, can be ignored.

## Plot Loss & IoU Over Epochs

In [None]:
import matplotlib.pyplot as plt

# Plot loss over epochs
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Loss Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

# Plot IoU over epochs
plt.subplot(1, 2, 2)
plt.plot(history.history['iou_score'], label='Train IoU')
plt.plot(history.history['val_iou_score'], label='Val IoU')
plt.title('IoU Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('IoU Score')
plt.legend()

plt.show()


## Code to Display Predictions

In [None]:
import numpy as np
import random

# Select random test images
num_samples = 30
fig, axes = plt.subplots(num_samples, 3, figsize=(12, 4 * num_samples))

for i in range(num_samples):
    idx = random.randint(0, len(X_val) - 1)
    test_image = X_val[idx]
    true_mask = Y_val[idx]

    # Make a prediction
    predicted_mask = unet_model.predict(test_image[np.newaxis, ...])[0]
    predicted_mask = (predicted_mask > 0.5).astype(np.uint8)

    # Plot original image
    axes[i, 0].imshow(test_image)
    axes[i, 0].set_title("Original Image")
    axes[i, 0].axis("off")

    # Plot ground truth mask
    axes[i, 1].imshow(true_mask[:, :, 0], cmap="gray")
    axes[i, 1].set_title("Ground Truth Mask")
    axes[i, 1].axis("off")

    # Plot predicted mask
    axes[i, 2].imshow(predicted_mask[:, :, 0], cmap="gray")
    axes[i, 2].set_title("Predicted Mask")
    axes[i, 2].axis("off")

plt.show()


# Third version

### Improved Model Code

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, concatenate, BatchNormalization, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import tensorflow.keras.backend as K

# Define Dice Loss (Better for Segmentation)
def dice_loss(y_true, y_pred, smooth=1e-6):
    y_true = K.flatten(K.cast(y_true, dtype="float32"))
    y_pred = K.flatten(K.cast(y_pred, dtype="float32"))

    intersection = K.sum(y_true * y_pred)
    return 1 - (2. * intersection + smooth) / (K.sum(y_true) + K.sum(y_pred) + smooth)

# Define IoU Metric
def iou_score(y_true, y_pred, smooth=1e-6):
    y_true = K.flatten(K.cast(y_true, dtype="float32"))
    y_pred = K.flatten(K.cast(y_pred, dtype="float32"))

    intersection = K.sum(y_true * y_pred)
    union = K.sum(y_true) + K.sum(y_pred) - intersection
    return (intersection + smooth) / (union + smooth)

# 🔹 Improved Model Function
def create_model():
    inp = Input(shape=(128, 128, 3))  # ✅ Fixed input shape

    # Encoder (Downsampling)
    x1 = BatchNormalization()(inp)
    x1 = Conv2D(64, (3, 3), activation="relu", padding="same")(x1)
    x1 = Conv2D(128, (3, 3), activation="relu", padding="same")(x1)
    p1 = MaxPooling2D((2, 2))(x1)
    p1 = Dropout(0.2)(p1)  # ✅ Added dropout for regularization

    x2 = Conv2D(128, (3, 3), activation="relu", padding="same")(p1)
    x2 = Conv2D(128, (3, 3), activation="relu", padding="same")(x2)
    p2 = MaxPooling2D((2, 2))(x2)
    p2 = Dropout(0.2)(p2)

    x3 = Conv2D(128, (3, 3), activation="relu", padding="same")(p2)
    x3 = Conv2D(128, (3, 3), activation="relu", padding="same")(x3)
    p3 = MaxPooling2D((2, 2))(x3)
    p3 = Dropout(0.2)(p3)

    x4 = Conv2D(128, (3, 3), activation="relu", padding="same")(p3)
    x4 = Conv2D(128, (3, 3), activation="relu", padding="same")(x4)

    # Decoder (Upsampling)
    x5 = UpSampling2D((2, 2))(x4)
    x5 = concatenate([x3, x5])
    x5 = Conv2D(128, (3, 3), activation="relu", padding="same")(x5)
    x5 = Conv2D(128, (3, 3), activation="relu", padding="same")(x5)

    x6 = UpSampling2D((2, 2))(x5)
    x6 = concatenate([x2, x6])
    x6 = Conv2D(128, (3, 3), activation="relu", padding="same")(x6)
    x6 = Conv2D(128, (3, 3), activation="relu", padding="same")(x6)

    x7 = UpSampling2D((2, 2))(x6)
    x7 = concatenate([x1, x7])
    x7 = Conv2D(64, (3, 3), activation="relu", padding="same")(x7)
    x7 = Conv2D(64, (3, 3), activation="relu", padding="same")(x7)

    # Output Layer (Binary Segmentation)
    outputs = Conv2D(1, (1, 1), activation="sigmoid")(x7)  # ✅ Changed to sigmoid for binary segmentation

    # Compile Model
    model = Model(inp, outputs)
    opt = Adam(learning_rate=0.0001)
    model.compile(optimizer=opt, loss=dice_loss, metrics=["accuracy", iou_score])  # ✅ Using Dice Loss + IoU

    return model

# Create and print summary
model = create_model()
model.summary()


### Preprocess Data & Load Dataset

In [None]:
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split

# Set dataset path
DATASET_PATH = "/kaggle/input/cityscapes-image-pairs/cityscapes_data/train"

# Image size
IMAGE_SIZE = (128, 128)

def load_images(dataset_path, image_size):
    image_files = sorted(os.listdir(dataset_path))
    images, masks = [], []

    for img_name in image_files:
        img_path = os.path.join(dataset_path, img_name)
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Split into left (image) and right (mask)
        h, w, _ = img.shape
        img_left = img[:, :w//2, :]
        img_right = img[:, w//2:, :]

        # Resize
        img_left = cv2.resize(img_left, image_size)
        img_right = cv2.resize(img_right, image_size)

        # Normalize images
        img_left = img_left / 255.0  
        
        # Convert mask to grayscale
        img_right = cv2.cvtColor(img_right, cv2.COLOR_RGB2GRAY)
        img_right = cv2.resize(img_right, image_size)
        img_right = img_right / 255.0

        images.append(img_left)
        masks.append(img_right)

    return np.array(images), np.array(masks)

# Load dataset
X, Y = load_images(DATASET_PATH, IMAGE_SIZE)
Y = Y.reshape(Y.shape[0], Y.shape[1], Y.shape[2], 1)  # Ensure correct shape

# Split dataset
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.2, random_state=42)

print(f"✅ Loaded {len(X_train)} training images and {len(X_val)} validation images")


### Train the Model

In [None]:
history = model.fit(X_train, Y_train, validation_data=(X_val, Y_val), epochs=30, batch_size=8)


### Plot Training Performance

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Loss Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['iou_score'], label='Train IoU')
plt.plot(history.history['val_iou_score'], label='Val IoU')
plt.title('IoU Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('IoU Score')
plt.legend()

plt.show()


### Visualize Model Predictions

In [None]:
idx = np.random.randint(0, len(X_val))
test_image = X_val[idx]
true_mask = Y_val[idx]
predicted_mask = model.predict(test_image[np.newaxis, ...])[0]
predicted_mask = (predicted_mask > 0.5).astype(np.uint8)

plt.figure(figsize=(12, 4))
plt.subplot(1, 3, 1)
plt.imshow(test_image)
plt.title("Original Image")

plt.subplot(1, 3, 2)
plt.imshow(true_mask[:, :, 0], cmap="gray")
plt.title("Ground Truth Mask")

plt.subplot(1, 3, 3)
plt.imshow(predicted_mask[:, :, 0], cmap="gray")
plt.title("Predicted Mask")

plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Select 20 random test images
num_samples = 20
fig, axes = plt.subplots(num_samples, 3, figsize=(12, 4 * num_samples))

for i in range(num_samples):
    idx = np.random.randint(0, len(X_val))
    test_image = X_val[idx]
    true_mask = Y_val[idx]
    
    # Predict mask
    predicted_mask = model.predict(test_image[np.newaxis, ...])[0]
    predicted_mask = (predicted_mask > 0.5).astype(np.uint8)

    # Plot original image
    axes[i, 0].imshow(test_image)
    axes[i, 0].set_title(f"Original Image {i+1}")
    axes[i, 0].axis("off")

    # Plot ground truth mask
    axes[i, 1].imshow(true_mask[:, :, 0], cmap="gray")
    axes[i, 1].set_title("Ground Truth Mask")
    axes[i, 1].axis("off")

    # Plot predicted mask
    axes[i, 2].imshow(predicted_mask[:, :, 0], cmap="gray")
    axes[i, 2].set_title("Predicted Mask")
    axes[i, 2].axis("off")

plt.tight_layout()
plt.show()


# 4th Version, UNET Multiclass-Segmentation

### Step 1

In [None]:
# Step 1: Import Required Libraries
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
import cv2
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

## step 2

In [None]:
# Step 2: Set Constants
DATASET_PATH = '/kaggle/input/cityscapes-image-pairs/cityscapes_data/train'
IMAGE_SIZE = (256, 256)
NUM_CLASSES = 5  # Adjust based on how many classes you want to segment

## Step 3; it gets RAM error message.

In [None]:
#Output: RAM Error Message
# Step 3: Load and Process Dataset
def load_images(dataset_path, image_size):
    image_files = sorted(os.listdir(dataset_path))
    images, masks = [], []

    for img_name in image_files:
        img_path = os.path.join(dataset_path, img_name)
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Split into left (image) and right (mask)
        h, w, _ = img.shape
        img_left = img[:, :w // 2, :]
        img_right = img[:, w // 2:, :]

        # Resize
        img_left = cv2.resize(img_left, image_size)
        img_right = cv2.resize(img_right, image_size)

        # Normalize input image
        img_left = img_left / 255.0

        # Convert mask to classes (simulate multi-class labels)
        mask = cv2.cvtColor(img_right, cv2.COLOR_RGB2GRAY)
        mask = cv2.resize(mask, image_size)

        # Simulate multi-class (we divide intensity ranges)
        mask = np.digitize(mask, bins=[51, 102, 153, 204])  # Map grayscale to 5 classes

        images.append(img_left)
        masks.append(mask)

    images = np.array(images)
    masks = np.array(masks)

    # One-hot encode masks
    masks = tf.keras.utils.to_categorical(masks, num_classes=NUM_CLASSES)

    return images, masks

# Load dataset
X, Y = load_images(DATASET_PATH, IMAGE_SIZE)
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.2, random_state=42)

print(f"✅ Loaded {X_train.shape[0]} training images and {X_val.shape[0]} validation images.")



In [None]:
## Step 3: Load and Process Dataset
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
import cv2
import os
import random

# Constants
IMAGE_SIZE = (256, 256)
NUM_CLASSES = 5
BATCH_SIZE = 8

# ✅ Define the dataset path
DATASET_PATH = "/kaggle/input/cityscapes-image-pairs/cityscapes_data/train"

# Data Generator
class CityscapesGenerator(tf.keras.utils.Sequence):
    def __init__(self, dataset_path, image_size, batch_size, num_classes, mode="train", split_ratio=0.8):
        self.dataset_path = dataset_path
        self.image_size = image_size
        self.batch_size = batch_size
        self.num_classes = num_classes
        self.mode = mode

        # Get image file paths
        self.image_files = sorted(os.listdir(dataset_path))
        random.shuffle(self.image_files)

        # Split train/validation
        split_point = int(len(self.image_files) * split_ratio)
        if self.mode == "train":
            self.image_files = self.image_files[:split_point]
        else:
            self.image_files = self.image_files[split_point:]

    def __len__(self):
        return int(np.ceil(len(self.image_files) / self.batch_size))

    def __getitem__(self, index):
        batch_files = self.image_files[index * self.batch_size:(index + 1) * self.batch_size]
        images, masks = [], []

        for img_name in batch_files:
            img_path = os.path.join(self.dataset_path, img_name)
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            # Split image into left (input) and right (mask)
            h, w, _ = img.shape
            img_left = img[:, :w // 2, :]
            img_right = img[:, w // 2:, :]

            # Resize images
            img_left = cv2.resize(img_left, self.image_size)
            img_right = cv2.resize(img_right, self.image_size)

            # Normalize input images (scale between 0 and 1)
            img_left = img_left / 255.0

            # Convert mask to grayscale and map to classes
            mask = cv2.cvtColor(img_right, cv2.COLOR_RGB2GRAY)
            mask = np.digitize(mask, bins=[51, 102, 153, 204])  # Convert grayscale to classes
            mask = to_categorical(mask, num_classes=self.num_classes)

            images.append(img_left)
            masks.append(mask)

        return np.array(images), np.array(masks)

# Create Generators
train_generator = CityscapesGenerator(DATASET_PATH, IMAGE_SIZE, BATCH_SIZE, NUM_CLASSES, mode="train")
val_generator = CityscapesGenerator(DATASET_PATH, IMAGE_SIZE, BATCH_SIZE, NUM_CLASSES, mode="val")

print(f"✅ Data Generators Ready: {len(train_generator)} batches for training, {len(val_generator)} for validation.")


## Step 4

In [None]:
# Step 4: Build U-Net Model for Multi-Class Segmentation

import tensorflow as tf
from tensorflow.keras import layers, models

def build_unet(input_shape=(256, 256, 3), num_classes=NUM_CLASSES):
    inputs = layers.Input(shape=input_shape)

    # Encoder
    def encoder_block(x, filters):
        x = layers.Conv2D(filters, 3, activation='relu', padding='same')(x)
        x = layers.BatchNormalization()(x)
        x = layers.Conv2D(filters, 3, activation='relu', padding='same')(x)
        p = layers.MaxPooling2D((2, 2))(x)
        return p, x

    p1, c1 = encoder_block(inputs, 64)
    p2, c2 = encoder_block(p1, 128)

    # Bottleneck
    b = layers.Conv2D(256, 3, activation='relu', padding='same')(p2)
    b = layers.Dropout(0.3)(b)

    # Decoder
    def decoder_block(x, skip, filters):
        x = layers.Conv2DTranspose(filters, 3, strides=2, padding='same', activation='relu')(x)
        x = layers.concatenate([x, skip])
        x = layers.Conv2D(filters, 3, activation='relu', padding='same')(x)
        return x

    d1 = decoder_block(b, c2, 128)
    d2 = decoder_block(d1, c1, 64)

    # Output Layer (Softmax for Multi-Class Segmentation)
    outputs = layers.Conv2D(num_classes, 1, activation='softmax')(d2)

    model = models.Model(inputs, outputs)
    return model

unet_model = build_unet()
unet_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
unet_model.summary()


### Checking GPU availability

In [None]:
import tensorflow as tf
print("Number of available GPUs: ", len(tf.config.list_physical_devices('GPU')))


In [None]:
import torch
print(torch.cuda.is_available())

## Step 5

In [None]:
# Step 5: Train the Model
# Train the model using data generators
history = unet_model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=30,
    steps_per_epoch=len(train_generator),
    validation_steps=len(val_generator)
)


## Step 6

In [None]:
# Step 6: Define Function to Visualize Predictions
colors = [
    (255, 0, 0),    # Class 0: Red
    (0, 255, 0),    # Class 1: Green
    (0, 0, 255),    # Class 2: Blue
    (255, 255, 0),  # Class 3: Yellow
    (255, 0, 255)   # Class 4: Magenta
]

def decode_segmentation(mask):
    h, w, c = mask.shape
    color_mask = np.zeros((h, w, 3), dtype=np.uint8)

    for i in range(NUM_CLASSES):
        color_mask[mask[:, :, i] == 1] = colors[i]

    return color_mask


## Step 7

In [None]:
import matplotlib.pyplot as plt

# Ensure this function is defined to convert class indices to RGB
def decode_segmentation(mask):
    colors = [
        (255, 0, 0),    # Class 0: Red
        (0, 255, 0),    # Class 1: Green
        (0, 0, 255),    # Class 2: Blue
        (255, 255, 0),  # Class 3: Yellow
        (255, 0, 255)   # Class 4: Magenta
    ]

    h, w, c = mask.shape
    color_mask = np.zeros((h, w, 3), dtype=np.uint8)

    for i in range(NUM_CLASSES):
        color_mask[mask[:, :, i] == 1] = colors[i]

    return color_mask

# Display Predictions Using the Generator
num_samples = 5
fig, axes = plt.subplots(num_samples, 3, figsize=(12, 5 * num_samples))

for i in range(num_samples):
    # Randomly select a batch from the validation generator
    test_image, true_mask = val_generator[np.random.randint(0, len(val_generator))]

    # Take the first sample from the batch
    test_image = test_image[0]
    true_mask = true_mask[0]

    # Predict mask
    pred_mask = unet_model.predict(test_image[np.newaxis, ...])[0]
    pred_mask = (pred_mask == pred_mask.max(axis=-1, keepdims=True)).astype(np.uint8)

    # Visualize results
    axes[i, 0].imshow(test_image)
    axes[i, 0].set_title("Original Image")
    axes[i, 0].axis("off")

    axes[i, 1].imshow(decode_segmentation(true_mask))
    axes[i, 1].set_title("Ground Truth Mask")
    axes[i, 1].axis("off")

    axes[i, 2].imshow(decode_segmentation(pred_mask))
    axes[i, 2].set_title("Predicted Mask")
    axes[i, 2].axis("off")

plt.tight_layout()
plt.show()


# Loss function and IoU

In [None]:
import tensorflow.keras.backend as K

def iou_score(y_true, y_pred, smooth=1e-6):
    y_true = K.cast(y_true, dtype='float32')
    y_pred = K.cast(y_pred, dtype='float32')

    intersection = K.sum(y_true * y_pred)
    union = K.sum(y_true) + K.sum(y_pred) - intersection

    return (intersection + smooth) / (union + smooth)


In [None]:
unet_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', iou_score])


In [None]:
# Train the U-Net model with IoU metric
history = unet_model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=30,
    steps_per_epoch=len(train_generator),
    validation_steps=len(val_generator)
)


In [None]:
import matplotlib.pyplot as plt

# Function to plot training history
def plot_history(history):
    # Extract metrics from history
    loss = history.history['loss']
    val_loss = history.history.get('val_loss', [])
    iou = history.history['iou_score']
    val_iou = history.history.get('val_iou_score', [])

    epochs = range(1, len(loss) + 1)
    val_epochs = range(1, len(val_loss) + 1)  # Match validation length

    # Plot Loss
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(epochs, loss, 'r', label='Training Loss')
    if val_loss:
        plt.plot(val_epochs, val_loss, 'b', label='Validation Loss')
    plt.title('Loss Over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    # Plot IoU Score
    plt.subplot(1, 2, 2)
    plt.plot(epochs, iou, 'r', label='Training IoU')
    if val_iou:
        plt.plot(val_epochs, val_iou, 'b', label='Validation IoU')
    plt.title('IoU Over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('IoU Score')
    plt.legend()

    plt.show()

# Call the plot function
plot_history(history)
