#Exercises
## Download ECSSD dataset with 1000 images containing image and binary mask pair. Rescale all those images to 224x224 sizes before any training and testing.
[Note: Try to avoid dl2 library. Use GPU]
## You will be solving supervised image segmentation problem instead of classification.
## Split 1000 images into 70:10:20 for train-validation-test.
## Use Dice Coefficient loss

##1) Modify GoogLeNet for 224x224 size for image segmentation, perform training and testing on ECSSD dataset.[4 Marks]

##2) Do the same thing for ResNet [4 Marks]

##3) Provide observation related to better model for this dataset. [2 Marks]
## Note: Implement all models from Scratch.

## Data download and preprocessing.

In [1]:
import os
import requests
from zipfile import ZipFile
from PIL import Image
from io import BytesIO
import random
import numpy as np
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms

# Define the URL and paths
dataset_url = "http://www.cse.cuhk.edu.hk/leojia/projects/hsaliency/data/ECSSD/images.zip"
mask_url = "http://www.cse.cuhk.edu.hk/leojia/projects/hsaliency/data/ECSSD/ground_truth_mask.zip"
dataset_path = "ECSSD"
image_path = os.path.join(dataset_path, "images", "images")
mask_path = os.path.join(dataset_path, "masks", "ground_truth_mask")
resized_image_path = os.path.join(dataset_path, "resized_images")
resized_mask_path = os.path.join(dataset_path, "resized_masks")

# Create directories
os.makedirs(image_path, exist_ok=True)
os.makedirs(mask_path, exist_ok=True)
os.makedirs(resized_image_path, exist_ok=True)
os.makedirs(resized_mask_path, exist_ok=True)

# Function to download and extract zip files
def download_and_extract(url, extract_to):
    response = requests.get(url)
    with ZipFile(BytesIO(response.content)) as zip_ref:
        zip_ref.extractall(extract_to)

# Function to resize images
def resize_images(input_dir, output_dir, size=(224, 224)):
    for filename in os.listdir(input_dir):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            img_path = os.path.join(input_dir, filename)
            img = Image.open(img_path)
            img_resized = img.resize(size, Image.LANCZOS)
            img_resized.save(os.path.join(output_dir, filename))
            print(f"Resized and saved {filename}")

# Check if dataset is already downloaded
if os.path.exists(image_path) and os.path.exists(mask_path):
    print("Dataset is already downloaded.")
else:
    # Download and extract images and masks
    download_and_extract(dataset_url, image_path)
    download_and_extract(mask_url, mask_path)

# Resize images and masks
resize_images(image_path, resized_image_path)
resize_images(mask_path, resized_mask_path)

print("Images and masks have been resized and saved.")

# Split dataset
all_images = os.listdir(resized_image_path)
train_images, test_images = train_test_split(all_images, test_size=0.2, random_state=42)
train_images, val_images = train_test_split(train_images, test_size=0.125, random_state=42)  # 0.125 * 0.8 = 0.1

print("Dataset has been split into training, validation, and test sets.")
print(f"Number of training images: {len(train_images)}")
print(f"Number of validation images: {len(val_images)}")
print(f"Number of test images: {len(test_images)}")

Dataset is already downloaded.
Resized and saved 0001.jpg
Resized and saved 0002.jpg
Resized and saved 0003.jpg
Resized and saved 0004.jpg
Resized and saved 0005.jpg
Resized and saved 0006.jpg
Resized and saved 0007.jpg
Resized and saved 0008.jpg
Resized and saved 0009.jpg
Resized and saved 0010.jpg
Resized and saved 0011.jpg
Resized and saved 0012.jpg
Resized and saved 0013.jpg
Resized and saved 0014.jpg
Resized and saved 0015.jpg
Resized and saved 0016.jpg
Resized and saved 0017.jpg
Resized and saved 0018.jpg
Resized and saved 0019.jpg
Resized and saved 0020.jpg
Resized and saved 0021.jpg
Resized and saved 0022.jpg
Resized and saved 0023.jpg
Resized and saved 0024.jpg
Resized and saved 0025.jpg
Resized and saved 0026.jpg
Resized and saved 0027.jpg
Resized and saved 0028.jpg
Resized and saved 0029.jpg
Resized and saved 0030.jpg
Resized and saved 0031.jpg
Resized and saved 0032.jpg
Resized and saved 0033.jpg
Resized and saved 0034.jpg
Resized and saved 0035.jpg
Resized and saved 0036.j

In [2]:
class ECSSDDataset(Dataset):
    def __init__(self, image_list, image_dir, mask_dir, image_transform=None, mask_transform=None):
        self.image_list = image_list
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.image_transform = image_transform
        self.mask_transform = mask_transform

    def __len__(self):
        return len(self.image_list)

    def __getitem__(self, idx):
        img_name = self.image_list[idx]
        img_path = os.path.join(self.image_dir, img_name)
        
        # Handle both .jpg and .png extensions for masks
        mask_name = img_name.replace('.jpg', '.png') if img_name.endswith('.jpg') else img_name
        mask_path = os.path.join(self.mask_dir, mask_name)

        image = Image.open(img_path).convert("RGB")
        mask = Image.open(mask_path).convert("L")

        if self.image_transform:
            image = self.image_transform(image)
        if self.mask_transform:
            mask = self.mask_transform(mask)

        return image, mask

image_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

mask_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

# Datasets and DataLoaders
train_dataset = ECSSDDataset(train_images, resized_image_path, resized_mask_path, image_transform, mask_transform)
val_dataset = ECSSDDataset(val_images, resized_image_path, resized_mask_path, image_transform, mask_transform)
test_dataset = ECSSDDataset(test_images, resized_image_path, resized_mask_path, image_transform, mask_transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [3]:
# Dice Coefficient Loss
class DiceLoss(nn.Module):
    def __init__(self):
        super(DiceLoss, self).__init__()

    def forward(self, inputs, targets, smooth=1):
        inputs = inputs.view(-1)
        targets = targets.view(-1)
        intersection = (inputs * targets).sum()
        dice = (2. * intersection + smooth) / (inputs.sum() + targets.sum() + smooth)
        return 1 - dice

## #1

In [4]:
class GoogLeNetSegmentation(nn.Module):
    def __init__(self, num_classes=1):
        super(GoogLeNetSegmentation, self).__init__()
        googlenet = models.googlenet(pretrained=True)
        # Remove the final layers
        self.features = nn.Sequential(*list(googlenet.children())[:-2])
        self.conv1x1 = nn.Conv2d(1024, num_classes, kernel_size=1)

    def forward(self, x):
        # Get features from GoogLeNet
        x = self.features(x)
        # Apply 1x1 convolution
        x = self.conv1x1(x)
        # Upsample to original size
        x = nn.functional.interpolate(x, size=(224, 224), mode='bilinear', align_corners=False)
        return torch.sigmoid(x)

## #2

In [5]:
class ResNetSegmentation(nn.Module):
    def __init__(self, num_classes=1):
        super(ResNetSegmentation, self).__init__()
        resnet = models.resnet50(pretrained=True)
        # Remove the final layers
        self.features = nn.Sequential(*list(resnet.children())[:-2])
        self.conv1x1 = nn.Conv2d(2048, num_classes, kernel_size=1)

    def forward(self, x):
        # Get features from ResNet
        x = self.features(x)
        # Apply 1x1 convolution
        x = self.conv1x1(x)
        # Upsample to original size
        x = nn.functional.interpolate(x, size=(224, 224), mode='bilinear', align_corners=False)
        return torch.sigmoid(x)

## Training

In [6]:
# Training function
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=25):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    best_model_wts = model.state_dict()
    best_loss = float('inf')

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
                dataloader = train_loader
            else:
                model.eval()  # Set model to evaluate mode
                dataloader = val_loader

            running_loss = 0.0

            # Iterate over data.
            for inputs, masks in dataloader:
                inputs = inputs.to(device)
                masks = masks.to(device)

                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, masks)

                    # Backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # Statistics
                running_loss += loss.item() * inputs.size(0)

            epoch_loss = running_loss / len(dataloader.dataset)

            print(f'{phase} Loss: {epoch_loss:.4f}')

            # Deep copy the model
            if phase == 'val' and epoch_loss < best_loss:
                best_loss = epoch_loss
                best_model_wts = model.state_dict()

    print('Best val loss: {:4f}'.format(best_loss))

    # Load best model weights
    model.load_state_dict(best_model_wts)
    return model

# Initialize models, loss, and optimizer
googlenet_model = GoogLeNetSegmentation()
resnet_model = ResNetSegmentation()
criterion = DiceLoss()
googlenet_optimizer = optim.Adam(googlenet_model.parameters(), lr=1e-4)
resnet_optimizer = optim.Adam(resnet_model.parameters(), lr=1e-4)

# Move models to device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
googlenet_model = googlenet_model.to(device)
resnet_model = resnet_model.to(device)

# Train GoogLeNet model
print("Training GoogLeNet model...")
googlenet_model = train_model(googlenet_model, train_loader, val_loader, criterion, googlenet_optimizer, num_epochs=25)

# Train ResNet model
print("Training ResNet model...")
resnet_model = train_model(resnet_model, train_loader, val_loader, criterion, resnet_optimizer, num_epochs=25)

# Testing function
def test_model(model, test_loader):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    model.eval()
    dice_scores = []

    with torch.no_grad():
        for inputs, masks in test_loader:
            inputs = inputs.to(device)
            masks = masks.to(device)
            outputs = model(inputs)
            dice_loss = DiceLoss()
            dice_score = 1 - dice_loss(outputs, masks).item()
            dice_scores.append(dice_score)

    mean_dice_score = np.mean(dice_scores)
    print(f'Mean Dice Score: {mean_dice_score:.4f}')

# Test GoogLeNet model
print("Testing GoogLeNet model...")
test_model(googlenet_model, test_loader)

# Test ResNet model
print("Testing ResNet model...")
test_model(resnet_model, test_loader)



Training GoogLeNet model...
Epoch 0/24
----------
train Loss: 0.6423
val Loss: 0.6077
Epoch 1/24
----------
train Loss: 0.5890
val Loss: 0.5806
Epoch 2/24
----------
train Loss: 0.5680
val Loss: 0.5733
Epoch 3/24
----------
train Loss: 0.5578
val Loss: 0.5695
Epoch 4/24
----------
train Loss: 0.5519
val Loss: 0.5674
Epoch 5/24
----------
train Loss: 0.5469
val Loss: 0.5623
Epoch 6/24
----------
train Loss: 0.5415
val Loss: 0.5595
Epoch 7/24
----------
train Loss: 0.5395
val Loss: 0.5575
Epoch 8/24
----------
train Loss: 0.5384
val Loss: 0.5575
Epoch 9/24
----------
train Loss: 0.5378
val Loss: 0.5580
Epoch 10/24
----------
train Loss: 0.5419
val Loss: 0.5581
Epoch 11/24
----------
train Loss: 0.5377
val Loss: 0.5588
Epoch 12/24
----------
train Loss: 0.5366
val Loss: 0.5606
Epoch 13/24
----------
train Loss: 0.5373
val Loss: 0.5595
Epoch 14/24
----------
train Loss: 0.5373
val Loss: 0.5564
Epoch 15/24
----------
train Loss: 0.5378
val Loss: 0.5561
Epoch 16/24
----------
train Loss: 0.5

ResNet score is significantly higher than the GoogLeNet score (0.8472 > 0.3912). Hence, ResNet is obviously the better model for this segmentation task.