<a href="https://colab.research.google.com/github/aledima00/Project4_SemSeg_AML2024/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Project 4 - Semantic Segmentation

First let's download dataset, that is already split in "Train", "Test" and "Val" modules

In [124]:
import os
import glob
from skimage.io import imread
import logging
from enum import Enum
import gdown

import torch
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
from torchvision import transforms

from PIL import Image

import numpy as np

General Configuration:

In [125]:
DBG = False                    # set to True for debug mode (lighter execution + dbg prints)
CONFIG_DATASET = True         # set to True to download and config all dataset resources
CONFIG_DEEPLABV2 = True       # set to True to download and config all DeepLabv2 resources

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE}")
TRAIN_PATH = "Train"          # path of the train folder
VAL_PATH = "Val"              # path of the val folder
TEST_PATH = "Test"            # path of the test folder
DEEPLABV2_WEIGHTS_PATH = "deeplabv2-pretrain-weights.pth"  # path of the deeplabv2 folder

Using device: cuda


download datasets:

In [126]:
def config_generic_dataset(DS_PATHNAME,URL):
  !rm -rf {DS_PATHNAME}
  ZIP_PATH = DS_PATHNAME + ".zip"
  !rm {ZIP_PATH}
  !wget -O {ZIP_PATH} {URL}
  !unzip {ZIP_PATH} | tail -n 3
  !rm {ZIP_PATH}

def config_train_dataset():
  config_generic_dataset(TRAIN_PATH, "https://zenodo.org/records/5706578/files/Train.zip?download=1")
def config_val_dataset():
  config_generic_dataset(VAL_PATH, "https://zenodo.org/records/5706578/files/Val.zip?download=1")
def config_test_dataset():
  config_generic_dataset(TEST_PATH, "https://zenodo.org/records/5706578/files/Test.zip?download=1")

def config_all_dataset(*,force=False):
  print("Dowloading and Configuring Dataset")
  if force or (not os.path.exists(TRAIN_PATH)):
    config_train_dataset()
  if force or (not os.path.exists(VAL_PATH)):
    config_val_dataset()
  if force or (not os.path.exists(TEST_PATH)):
    config_test_dataset()

download and configure deeplabv2 model library (_with R101 backbone_) and the pretrain weights:

In [127]:
def config_deeplabv2_model():
  print("Dowloading and Configuring DeepLabv2 Model")
  import sys
  import gdown
  !rm -rf "MLDL2024_project1"
  !git clone https://github.com/Gabrysse/MLDL2024_project1.git
  sys.path.append("/content/MLDL2024_project1/")
  gdown.download("https://drive.google.com/uc?id=1ZX0UCXvJwqd2uBGCX7LI2n-DfMg3t74v", DEEPLABV2_WEIGHTS_PATH, quiet=False)


In [128]:
if CONFIG_DATASET:
  config_all_dataset()
if CONFIG_DEEPLABV2:
  config_deeplabv2_model()

Dowloading and Configuring Dataset
Dowloading and Configuring DeepLabv2 Model
Cloning into 'MLDL2024_project1'...
remote: Enumerating objects: 34, done.
remote: Counting objects: 100% (18/18), done.
remote: Compressing objects: 100% (14/14), done.
remote: Total 34 (delta 8), reused 4 (delta 4), pack-reused 16 (from 1)
Receiving objects: 100% (34/34), 12.06 KiB | 2.41 MiB/s, done.
Resolving deltas: 100% (8/8), done.


Downloading...
From (original): https://drive.google.com/uc?id=1ZX0UCXvJwqd2uBGCX7LI2n-DfMg3t74v
From (redirected): https://drive.google.com/uc?id=1ZX0UCXvJwqd2uBGCX7LI2n-DfMg3t74v&confirm=t&uuid=e6a1978d-7609-4759-89a2-ba441fb922e4
To: /content/deeplabv2-pretrain-weights.pth
100%|██████████| 177M/177M [00:04<00:00, 36.3MB/s]


In [129]:
from colorama import Fore, Back, Style
def dbgp(name,value):
  """ Debug print function """
  if DBG:
    print(f"{Fore.BLACK}{Back.GREEN}{Style.BRIGHT}{name}:\t{value}{Fore.RESET}{Back.RESET}{Style.RESET_ALL}")

## Data Preparation

Create Dataset class and filter urban pictures...

In [146]:
NUM_CLASSES = 8
BATCH_SIZE = 2 if DBG else 64
LR = 0.001           # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 4e-5  # Regularization, you can keep this at the default
NUM_EPOCHS = 20      # Total number of training epochs (iterations over dataset)
STEP_SIZE = [25, 75, 150] # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down

In [147]:
# Define transforms for training phase
train_image_transform = transforms.Compose([
    transforms.Resize(256),       # Resizes short size of the PIL image to 256
    transforms.CenterCrop(224),   # Crops a central square patch of the image
                                  # 224 because torchvision's AlexNet needs a 224x224 input!
                                  # Remember this when applying different transformations, otherwise you get an error
    transforms.ToTensor(),        # Turn PIL Image to torch.Tensor
    transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))  # Normalize as per ImageNet stats
])

# Define transforms for the evaluation phase
test_image_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))  # Normalize as per ImageNet stats
])

# Masks should not be normalized or resized with interpolation
target_transform = transforms.Compose([
    transforms.Resize((256, 256), interpolation=Image.NEAREST),
    transforms.CenterCrop(224),
    transforms.Lambda(lambda mask: torch.tensor(np.array(mask), dtype=torch.uint8)),
])


In [148]:
# taken from official repo of LoveDA
COLOR_MAP = {
    0:"IGNORE",
    1:"Background",
    2:"Building",
    3:"Road",
    4:"Water",
    5:"Barren",
    6:"Forest",
    7:"Agricultural"
}

In [149]:
def pil_loader(path,*,format:str="RGB"):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert(format)

class DataType(Enum):
  RURAL = 0
  URBAN = 1

class LoveDA(Dataset):
  def __init__(self, basedir, data_type:DataType, transforms=None, target_transform=None):
    #super(LoveDA, self).__init__(basedir, transforms, target_transform) # should we do this??
    if data_type == DataType.RURAL:
        self.base_path = os.path.join(basedir, "Rural")
    else: #data_type == DataType.URBAN:
        self.base_path = os.path.join(basedir, "Urban")


    # list of integers that identifies paths relative to both images_png and masks_png
    self.int_pathrefs = os.listdir(os.path.join(self.base_path, "images_png"))
    self.int_pathrefs = list(int(st.split(".")[0]) for st in self.int_pathrefs)

    # DEBUG PRINT
    if DBG:
      self.int_pathrefs = self.int_pathrefs[:15] # limit the dataset for debug
    dbgp("int_pathrefs", self.int_pathrefs)

    self.transforms = transforms
    self.target_transform = target_transform

  def __getitem__(self, idx):
    assert idx < len(self), 'Index out of range'
    image_path = os.path.join(self.base_path, "images_png", str(self.int_pathrefs[idx]) + ".png")
    mask_path = os.path.join(self.base_path, "masks_png", str(self.int_pathrefs[idx]) + ".png")
    image = pil_loader(image_path,format="RGB")
    mask = pil_loader(mask_path,format="L")

    if self.transforms is not None:
      image = self.transforms(image)
    if self.target_transform is not None:
      mask = self.target_transform(mask) # should we apply transformation on mask ??

    # DEBUG PRINT
    dbgp("post-transform image", image)
    dbgp("post-transform mask", mask)

    return image, mask

  def __len__(self):
    return len(self.int_pathrefs)


In [150]:
# Urban Datasets (train, val, test)
urban_train = LoveDA(TRAIN_PATH, DataType.URBAN, transforms=train_image_transform, target_transform=target_transform)
urban_val = LoveDA(VAL_PATH, DataType.URBAN, transforms=test_image_transform, target_transform=target_transform)
urban_test = LoveDA(TEST_PATH, DataType.URBAN, transforms=test_image_transform, target_transform=target_transform)

# Urban Dataloaders (train, val, test)

NUM_WORKERS = 2 if DBG else 4
urban_train_dataloader = DataLoader(urban_train, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, drop_last=True)
urban_val_dataloader = DataLoader(urban_val, shuffle=False, num_workers=NUM_WORKERS, drop_last=False)
urban_test_dataloader = DataLoader(urban_test, shuffle=False, num_workers=NUM_WORKERS, drop_last=False)

Get DeepLabv2 model with pretrain weights:

In [151]:
from MLDL2024_project1.models.deeplabv2 import deeplabv2

model = deeplabv2.get_deeplab_v2(num_classes=NUM_CLASSES,pretrain=True,pretrain_model_path=DEEPLABV2_WEIGHTS_PATH)

Deeplab pretraining loading...


## Optimizer, Loss, ecc.

In [152]:
# enable validation during training
validate = True

model.train(True)
model.multi_level = False # ask in class
for params in model.get_1x_lr_params_no_scale():
  params.requires_grad = False # no training in Backbone
for params in model.get_10x_lr_params():
  params.requires_grad = True # training in classifiers


model = model.to(DEVICE) # switch to GPU

#Loss (as said in DeepLabv2 docs)
loss_fn = nn.CrossEntropyLoss()

#Opt
trainable_params = [p for p in model.parameters() if p.requires_grad]
optimizer_fn = optim.Adam(trainable_params, lr=LR, weight_decay=WEIGHT_DECAY, eps=1e-4)

#Scheduler
optim_scheduler = optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=STEP_SIZE, gamma=GAMMA)


## Training Loop

In [153]:
import warnings
from torch.backends import cudnn
from icecream import ic
from colorama import Fore,Back,Style
warnings.filterwarnings('ignore')

train_iter = 0
val_iter = 0

trainSamples = len(urban_train) - (len(urban_train) % BATCH_SIZE)
val_samples = len(urban_val)
iterPerEpoch = len(urban_train_dataloader)
val_steps = len(urban_val_dataloader)

cudnn.benchmark
CUDA_LAUNCH_BLOCKING=1
model_checkpoint = "model" #name
model.train(True)

for epoch in range(NUM_EPOCHS):
    epoch_loss = 0
    IoU = 0

    for i, (inputs, targets) in enumerate(urban_train_dataloader):
        train_iter += 1
        optimizer_fn.zero_grad()

        # feeds in model
        inputs = inputs.to(DEVICE)
        labels = targets.long().to(device=DEVICE)

        output_logits,_,_ = model(inputs)

        # compute loss
        loss = loss_fn(output_logits, labels)

        # backward loss and optimizer step
        loss.backward()
        optimizer_fn.step()

        #compute the training accuracy
        _, predicted = torch.max(output_logits.data, 1)

        dbgp("predicted", predicted)
        dbgp("labels", labels)

        intersection = torch.logical_and(labels, predicted)
        union = torch.logical_or(labels, predicted)

        dbgp("intersection", intersection)
        dbgp("union", union)

        IoU += torch.sum(intersection).item() / torch.sum(union).item()

        step_loss = loss.data.item()
        epoch_loss += step_loss
        if train_iter % 5 == 0:
          print(Fore.WHITE + Style.DIM + 'Train: Epoch = {} | Step = {} | Step Loss = {:.3f}'.format(epoch + 1, train_iter, step_loss))
    avg_loss = epoch_loss/iterPerEpoch
    avg_IoU = (IoU / trainSamples)
    print(Fore.GREEN + Style.BRIGHT + 'Train: Epoch = {} | mean Loss = {:.3f} | mean-IoU = {:.3f}'.format(epoch + 1, avg_loss, avg_IoU))
    """
    #train_logger.add_epoch_data(epoch+1, trainAccuracy, avg_loss)

    if validate:
        if (epoch+1) % 1 == 0:
            model.train(False)
            val_loss_epoch = 0
            numCorr = 0
            for j, (inputs, targets) in enumerate(val_loader):
                val_iter += 1
                inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE)
                labelVariable = targets.to(DEVICE)

                output_label, _ = model(inputVariable)
                val_loss = loss_fn(output_label, labelVariable)

                val_loss_step = val_loss.data.item()
                val_loss_epoch += val_loss_step
                _, predicted = torch.max(output_label.data, 1)
                numCorr += torch.sum(predicted == labelVariable.data).data.item()
                #val_logger.add_step_data(val_iter, numCorr, val_loss_step)

            val_accuracy = (numCorr / val_samples) * 100
            avg_val_loss = val_loss_epoch / val_steps

            print(Fore.GREEN + 'Val: Epoch = {} | Loss {:.3f} | Accuracy = {:.3f}'.format(epoch + 1, avg_val_loss, val_accuracy))
            if val_accuracy > min_accuracy:
                print("[||| NEW BEST on val||||]")
                save_path_model = os.path.join(model_folder, model_checkpoint)
                torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy
"""
    optim_scheduler.step()


Train: Epoch = 1 | Step = 5 | Step Loss = 4.961
Train: Epoch = 1 | Step = 10 | Step Loss = 4.614
Train: Epoch = 1 | Step = 15 | Step Loss = 2.841
Train: Epoch = 1 | mean Loss = 4.386 | mean-IoU = 0.015
Train: Epoch = 2 | Step = 20 | Step Loss = 2.098
Train: Epoch = 2 | Step = 25 | Step Loss = 2.093
Train: Epoch = 2 | Step = 30 | Step Loss = 1.674
Train: Epoch = 2 | Step = 35 | Step Loss = 1.541
Train: Epoch = 2 | mean Loss = 1.898 | mean-IoU = 0.015
Train: Epoch = 3 | Step = 40 | Step Loss = 1.443
Train: Epoch = 3 | Step = 45 | Step Loss = 1.258
Train: Epoch = 3 | Step = 50 | Step Loss = 1.288
Train: Epoch = 3 | mean Loss = 1.318 | mean-IoU = 0.016
Train: Epoch = 4 | Step = 55 | Step Loss = 1.080
Train: Epoch = 4 | Step = 60 | Step Loss = 1.049
Train: Epoch = 4 | Step = 65 | Step Loss = 1.100
Train: Epoch = 4 | Step = 70 | Step Loss = 1.210
Train: Epoch = 4 | mean Loss = 1.119 | mean-IoU = 0.016
Train: Epoch = 5 | Step = 75 | Step Loss = 0.957
Train: Epoch = 5 | Step = 80 | Step Loss =