# COMP9517 Group Project

### Model 2 : DeepLabV3+

Please run this file after dataVisualisation.ipynb.

Please run all the cells in this file first and follow the instructions in this file. This notebook's purpose is to perform DeepLabV3+ model on the turtle dataset, we will also be loading in our data from /models/dataset.pth which was created in dataVisualisation.ipyb

#### DataLoader

First we will load back in all the datasplits we created back in dataVisualisation.ipynb. To do this, we will need to again import back in the SeaTurtleDataset custom class we created for this dataset. This simply involves copying what we had back to here.

In [None]:
###################################################################
# 
# Nothing new here, just same as dataVisualisation.ipynb.
# 
###################################################################

from pycocotools.coco import COCO
import os
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import torch
from torch.utils.data import Dataset
import numpy as np
from PIL import Image


resize_transform = A.Compose(
    [
        A.Resize(512, 512, p=1),  # Resize to 512 x 512
        A.Normalize(
            mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
        ),  # Normalises pixel values
        ToTensorV2(),  # Converts to PyTorch tensor
    ],
    bbox_params=A.BboxParams(format="coco", label_fields=["category_ids"]),
)



class SeaTurtleDataset(Dataset):
    def __init__(self, root, annotation, transforms=None, target_size=(512, 512)):
        self.root = root
        self.coco = COCO(annotation)
        self.transforms = transforms
        self.cat_ids = self.coco.getCatIds()
        self.img_ids = list(self.coco.imgs.keys())
        self.target_size = target_size

    def __len__(self):
        return len(self.img_ids)

    def __getitem__(self, idx):
        # Image params
        img_id = self.img_ids[idx]
        img_info = self.coco.loadImgs(img_id)[0]
        img_path = os.path.join(self.root, img_info["file_name"])
        img = Image.open(img_path).convert("RGB")

        # Annotations
        ann_ids = self.coco.getAnnIds(imgIds=img_id)
        anns = self.coco.loadAnns(ann_ids)

        # BBox and category IDs
        bbox = [ann["bbox"] for ann in anns]
        category_ids = [ann["category_id"] for ann in anns]

        # Apply transformations
        original_size = img.size
        image = np.array(img)

        # Masks from annotations
        masks = self._getmask(self.img_ids[idx], image)

        if self.transforms and original_size != self.target_size:
            transformed = self.transforms(
                image=image, bboxes=bbox, masks=[masks], category_ids=category_ids
            )
            image = transformed["image"]  # Resize image
            bbox = transformed["bboxes"]  # Resize bounding boxes
            masks = transformed["masks"][0]  # Resize masks
            category_ids = transformed["category_ids"]
        else:
            image = torch.tensor(image, dtype=torch.float32).permute(2, 0, 1)
            masks = torch.tensor(masks, dtype=torch.uint8)

        # Convert bounding boxes and category IDs to tensors
        bboxes = torch.tensor(bbox, dtype=torch.float32)
        category_ids = torch.tensor(category_ids, dtype=torch.int64)

        return {
            "image": image,
            "bboxes": bboxes,
            "masks": masks,
            "category_ids": category_ids,
            "original_size": original_size,
            "target_size": self.target_size,
        }

    def _getmask(self, image_id, image):
        categories = {"turtle": 1, "flipper": 2, "head": 3}
        # Initialise the mask
        mask = np.zeros((image.shape[0], image.shape[1]), dtype=np.uint8)

        # Process each category
        for category_name, category_id in categories.items():
            ann_ids = self.coco.getAnnIds(
                imgIds=image_id, catIds=category_id, iscrowd=None
            )
            annotations = self.coco.loadAnns(ann_ids)

            # Create a temporary mask for the current category
            temp_mask = np.zeros_like(mask)

            for ann in annotations:
                temp_mask += self.coco.annToMask(ann)
            # Assign category-specific value to the final mask
            if category_name == "turtle":
                mask[temp_mask > 0] = 1
            elif category_name == "flipper":
                mask[temp_mask > 0] = 2
            elif category_name == "head":
                mask[temp_mask > 0] = 3

        return mask

Next, we will load our train val split from /models into our code here.

In [None]:
import torch
from torch.utils.data import DataLoader

# Load in our presplitted data from dataVisualisation.ipynb
loaded_dataset = torch.load('models/dataset.pth')

# Split as we need
train_dataset = loaded_dataset['train']
val_dataset = loaded_dataset['val']
test_dataset = loaded_dataset['test']

# Create dataloaders

In [None]:
def compute_IoU(outputs, masks, target_class):
    # Get the predicted class for each pixel
    outputs = outputs.argmax(dim=1)
    intersection = ((outputs == target_class) & (masks == target_class)).sum().item()
    union = ((outputs == target_class) | (masks == target_class)).sum().item()
    if union == 0:
        return float('nan')
    else:
        return intersection / union
    
model = smp.DeepLabV3Plus(
    encoder_name="resnet101",
    encoder_weights="imagenet",  
    classes=4,                    
    activation='softmax2d'
)

num_classes = 4  # background + 3 classes
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)


In [None]:
# chatgpt generated function
def visualize_prediction(image, mask, prediction, figsize=(15,5)):
   # Convert tensors to numpy arrays
   if isinstance(image, torch.Tensor):
       image = image.cpu().numpy()
       if image.shape[0] == 3:
           image = image.transpose(1, 2, 0)
       
   if isinstance(mask, torch.Tensor):
       mask = mask.cpu().numpy()
       
   if isinstance(prediction, torch.Tensor):
       prediction = prediction.cpu().numpy()
   
   # Denormalize image
   mean = np.array([0.485, 0.456, 0.406])
   std = np.array([0.229, 0.224, 0.225])
   image = np.clip((image * std + mean), 0, 1)
   
   # Create color maps for mask and prediction
   colors = [(0,0,0), (1,0,0), (0,1,0), (0,0,1)]
   colored_mask = np.zeros((*mask.shape, 3))
   colored_pred = np.zeros((*prediction.shape, 3))
   
   for i, color in enumerate(colors):
       colored_mask[mask == i] = color
       colored_pred[prediction == i] = color
   
   # Plot
   plt.figure(figsize=figsize)
   
   plt.subplot(1, 3, 1)
   plt.imshow(image)
   plt.title('Original Image')
   plt.axis('off')
   
   plt.subplot(1, 3, 2)
   plt.imshow(colored_mask)
   plt.title('Ground Truth Mask')
   plt.axis('off')
   
   plt.subplot(1, 3, 3)
   plt.imshow(colored_pred)
   plt.title('Model Prediction')
   plt.axis('off')
   
   plt.tight_layout()
   plt.show()

#### Training

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)


# Define class labels for each category
turtle_class = 1
flipper_class = 2
head_class = 3

num_epochs = 10
model.train()

for epoch in range(num_epochs):
    print(f"\nEpoch {epoch+1}/{num_epochs}")
    epoch_loss = 0
    
    # Training loop
    for batch_idx, (images, masks) in enumerate(train_loader):
        images = images.to(device)
        masks = masks.to(device).long()
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, masks)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        
        # Print every 5 batches
        if batch_idx % 5 == 0:  
            print(f"Batch {batch_idx}/{len(train_loader)} - Loss: {loss.item():.4f}")
    
    # Average epoch loss
    avg_epoch_loss = epoch_loss / len(train_loader)
    print(f"Epoch {epoch+1} Average Loss: {avg_epoch_loss:.4f}")
    
    # Validation step
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for images, masks in val_loader:
            images, masks = images.to(device), masks.to(device).long()
            outputs = model(images)
            loss = criterion(outputs, masks)
            val_loss += loss.item()

    avg_val_loss = val_loss / len(val_loader)
    print(f"Validation Loss after Epoch {epoch+1}: {avg_val_loss:.4f}")

    # Compute mIoU on the test set
    turtle_IoUs, flipper_IoUs, head_IoUs = [], [], []

    with torch.no_grad():
        for images, masks in test_loader:
            images, masks = images.to(device), masks.to(device).long()
            outputs = model(images)
            
            # Compute IoU for each category
            for i in range(len(images)):  # Process each image in the batch
                turtle_IoUs.append(compute_IoU(outputs[i:i+1], masks[i:i+1], turtle_class))
                flipper_IoUs.append(compute_IoU(outputs[i:i+1], masks[i:i+1], flipper_class))
                head_IoUs.append(compute_IoU(outputs[i:i+1], masks[i:i+1], head_class))

    turtle_mIoU = np.nanmean(turtle_IoUs)
    flipper_mIoU = np.nanmean(flipper_IoUs)
    head_mIoU = np.nanmean(head_IoUs)
    
    print(f"Turtle (Carapace) mIoU on Test Set after Epoch {epoch+1}: {turtle_mIoU:.4f}")
    print(f"Flippers mIoU on Test Set after Epoch {epoch+1}: {flipper_mIoU:.4f}")
    print(f"Head mIoU on Test Set after Epoch {epoch+1}: {head_mIoU:.4f}")
    
    # Visualize last few predictions after each epoch
    with torch.no_grad():
        # Get a single batch from the train_loader
        batch_images, batch_masks = next(iter(train_loader))
        
        # Select the last 3 images in the batch
        images = batch_images[-3:].to(device)
        masks = batch_masks[-3:]
        
        # Generate model predictions
        outputs = model(images)
        predictions = torch.argmax(outputs, dim=1)
        
        # Visualize predictions
        print(f"\nPredictions after Epoch {epoch+1}:")
        for i in range(3):  # Display the last 3 images
            visualize_prediction(
                images[i],
                masks[i],
                predictions[i]
            )
    
    model.train()  # Set back to training mode
