<a href="https://colab.research.google.com/github/aledima00/Project4_SemSeg_AML2024/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Project 4 - Semantic Segmentation

First let's download dataset, that is already split in "Train", "Test" and "Val" modules

In [1]:
!pip install skimage
!pip install icecream

Collecting skimage
  Downloading skimage-0.0.tar.gz (757 bytes)
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mpython setup.py egg_info[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Preparing metadata (setup.py) ... [?25l[?25herror
[1;31merror[0m: [1mmetadata-generation-failed[0m

[31m×[0m Encountered error while generating package metadata.
[31m╰─>[0m See above for output.

[1;35mnote[0m: This is an issue with the package mentioned above, not pip.
[1;36mhint[0m: See above for details.
Collecting icecream
  Downloading icecream-2.1.3-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting colorama>=0.3.9 (from icecream)
  Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)
Collecting executing>=0.3.1 (from icecream)
  Downloading executing-2.1.0-py2.py3-none-any.whl.metadata (

In [10]:
import os
import glob
from icecream import ic
from skimage.io import imread
import logging
from enum import Enum
import gdown

import torch
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
from torchvision import transforms

General Configuration:

In [11]:
DBG = True                    # set to True for debug mode (lighter execution + dbg prints)
CONFIG_DATASET = True         # set to True to download and config all dataset resources
CONFIG_DEEPLABV2 = True       # set to True to download and config all DeepLabv2 resources

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE}")
TRAIN_PATH = "Train"          # path of the train folder
VAL_PATH = "Val"              # path of the val folder
TEST_PATH = "Test"            # path of the test folder
DEEPLABV2_WEIGHTS_PATH = "deeplabv2-pretrain-weights.pth"  # path of the deeplabv2 folder

Using device: cpu


download datasets:

In [12]:
def config_generic_dataset(DS_PATHNAME,URL):
  !rm -rf {DS_PATHNAME}
  ZIP_PATH = DS_PATHNAME + ".zip"
  !rm {ZIP_PATH}
  !wget -O {ZIP_PATH} {URL}
  !unzip {ZIP_PATH} | tail -n 3
  !rm {ZIP_PATH}

def config_train_dataset():
  config_generic_dataset(TRAIN_PATH, "https://zenodo.org/records/5706578/files/Train.zip?download=1")
def config_val_dataset():
  config_generic_dataset(VAL_PATH, "https://zenodo.org/records/5706578/files/Val.zip?download=1")
def config_test_dataset():
  config_generic_dataset(TEST_PATH, "https://zenodo.org/records/5706578/files/Test.zip?download=1")

def config_all_dataset(*,force=False):
  print("Dowloading and Configuring Dataset")
  if force or (not os.path.exists(TRAIN_PATH)):
    config_train_dataset()
  if force or (not os.path.exists(VAL_PATH)):
    config_val_dataset()
  if force or (not os.path.exists(TEST_PATH)):
    config_test_dataset()

download and configure deeplabv2 model library (_with R101 backbone_) and the pretrain weights:

In [13]:
def config_deeplabv2_model():
  print("Dowloading and Configuring DeepLabv2 Model")
  import sys
  import gdown
  !rm -rf "MLDL2024_project1"
  !git clone https://github.com/Gabrysse/MLDL2024_project1.git
  sys.path.append("/content/MLDL2024_project1/")
  gdown.download("https://drive.google.com/uc?id=1ZX0UCXvJwqd2uBGCX7LI2n-DfMg3t74v", DEEPLABV2_WEIGHTS_PATH, quiet=False)


In [14]:
if CONFIG_DATASET:
  config_all_dataset()
if CONFIG_DEEPLABV2:
  config_deeplabv2_model()

Dowloading and Configuring Dataset
Dowloading and Configuring DeepLabv2 Model
Cloning into 'MLDL2024_project1'...
remote: Enumerating objects: 34, done.[K
remote: Counting objects: 100% (18/18), done.[K
remote: Compressing objects: 100% (14/14), done.[K
remote: Total 34 (delta 8), reused 4 (delta 4), pack-reused 16 (from 1)[K
Receiving objects: 100% (34/34), 12.06 KiB | 12.06 MiB/s, done.
Resolving deltas: 100% (8/8), done.


Downloading...
From (original): https://drive.google.com/uc?id=1ZX0UCXvJwqd2uBGCX7LI2n-DfMg3t74v
From (redirected): https://drive.google.com/uc?id=1ZX0UCXvJwqd2uBGCX7LI2n-DfMg3t74v&confirm=t&uuid=79442e94-9b2f-4dad-a7d7-8cc5d51357b5
To: /content/deeplabv2-pretrain-weights.pth
100%|██████████| 177M/177M [00:00<00:00, 204MB/s]


## Data Preparation

Create Dataset class and filter urban pictures...

In [15]:
NUM_CLASSES = 7
BATCH_SIZE = 2 if DBG else 128
LR = 0.001           # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 4e-5  # Regularization, you can keep this at the default
NUM_EPOCHS = 20      # Total number of training epochs (iterations over dataset)
STEP_SIZE = [25, 75, 150] # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down

In [16]:
# Define transforms for training phase
train_transform = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                      transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                   # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                   # Remember this when applying different transformations, otherwise you get an error
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))  # Normalize as per ImageNet stats
])
# Define transforms for the evaluation phase
test_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))  # Normalize as per ImageNet stats
])

In [17]:
# taken from official repo of LoveDA
COLOR_MAP = {
    0:"IGNORE",
    1:"Background",
    2:"Building",
    3:"Road",
    4:"Water",
    5:"Barren",
    6:"Forest",
    7:"Agricultural"
}

In [64]:
from PIL import Image

def pil_loader(path,*,convert=True):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')

class DataType(Enum):
  RURAL = 0
  URBAN = 1

class LoveDA(Dataset):
  def __init__(self, basedir, data_type:DataType, transforms=None, target_transform=None):
    #super(LoveDA, self).__init__(basedir, transforms, target_transform) # should we do this??
    if data_type == DataType.RURAL:
        self.base_path = os.path.join(basedir, "Rural")
    else: #data_type == DataType.URBAN:
        self.base_path = os.path.join(basedir, "Urban")


    # list of integers that identifies paths relative to both images_png and masks_png
    self.int_pathrefs = os.listdir(os.path.join(self.base_path, "images_png"))
    self.int_pathrefs = list(int(st.split(".")[0]) for st in self.int_pathrefs)

    # -------------------------- DEBUG --------------------------
    if DBG:
      self.int_pathrefs = self.int_pathrefs[:15] # limit the dataset for debug
      ic(self.int_pathrefs)
    # -------------------------- DEBUG --------------------------

    self.transforms = transforms
    self.target_transform = target_transform

  def __getitem__(self, idx):
    assert idx < len(self), 'Index out of range'
    image_path = os.path.join(self.base_path, "images_png", str(self.int_pathrefs[idx]) + ".png")
    mask_path = os.path.join(self.base_path, "masks_png", str(self.int_pathrefs[idx]) + ".png")
    image = pil_loader(image_path,convert=True)
    mask = pil_loader(mask_path,convert=False)

    if self.transforms is not None:
      image = self.transforms(image)
    if self.target_transform is not None:
      mask = self.target_transforms(mask) # should we apply transformation on mask ??

    #image,mask = torch.tensor(image),torch.tensor(mask) # to torch tensors
    #mask = torch.tensor(mask, dtype=torch.uint8)

    # -------------------------- DEBUG --------------------------
    if DBG:
      print("post-transform:")
      ic(image)
      ic(mask)
    # -------------------------- DEBUG --------------------------

    return image, mask

  def __len__(self):
    return len(self.int_pathrefs)


In [65]:
# Urban Datasets (train, val, test)
urban_train = LoveDA(TRAIN_PATH, DataType.URBAN, transforms=train_transform)
urban_val = LoveDA(VAL_PATH, DataType.URBAN, transforms=test_transform)
urban_test = LoveDA(TEST_PATH, DataType.URBAN, transforms=test_transform)

# Urban Dataloaders (train, val, test)

NUM_WORKERS = 1 if DBG else 4
urban_train_dataloader = DataLoader(urban_train, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, drop_last=True)
urban_val_dataloader = DataLoader(urban_val, shuffle=False, num_workers=NUM_WORKERS, drop_last=False)
urban_test_dataloader = DataLoader(urban_test, shuffle=False, num_workers=NUM_WORKERS, drop_last=False)

ic| self.int_pathrefs: [1634,
                        2130,
                        2460,
                        1812,
                        1459,
                        1811,
                        2243,
                        2187,
                        1956,
                        1746,
                        1587,
                        2389,
                        1570,
                        2314,
                        2104]
ic| self.int_pathrefs: [3607,
                        4125,
                        3894,
                        3966,
                        3724,
                        3602,
                        3579,
                        3772,
                        3778,
                        3806,
                        3891,
                        3898,
                        3957,
                        4156,
                        3590]
ic| self.int_pathrefs: [5583,
                        5626,
                        5239,
          

Get DeepLabv2 model with pretrain weights:

In [66]:
from MLDL2024_project1.models.deeplabv2 import deeplabv2

model = deeplabv2.get_deeplab_v2(num_classes=7,pretrain=True,pretrain_model_path=DEEPLABV2_WEIGHTS_PATH)

Deeplab pretraining loading...


## Optimizer, Loss, ecc.

In [67]:
# enable validation during training
validate = True

model.train(True)
model.multi_level = False # ask in class
for params in model.get_1x_lr_params_no_scale():
  params.requires_grad = False # no training in Backbone
for params in model.get_10x_lr_params():
  params.requires_grad = True # training in classifiers


model = model.to(DEVICE) # switch to GPU

#Loss (as said in DeepLabv2 docs)
loss_fn = nn.CrossEntropyLoss()

#Opt
trainable_params = [p for p in model.parameters() if p.requires_grad]
optimizer_fn = optim.Adam(trainable_params, lr=LR, weight_decay=WEIGHT_DECAY, eps=1e-4)

#Scheduler
optim_scheduler = optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=STEP_SIZE, gamma=GAMMA)


## Training Loop

In [68]:
for inputs, targets in urban_train_dataloader:
  print(inputs.shape)
  print(targets.shape)
  break

post-transform:


ic| image: tensor([[[-0.9877, -0.9877, -0.9363,  ..., -1.2274, -1.2103, -1.2959],
                    [-0.7650, -,0.7308 -0.8164,  ..., -1.3644, -1.3473, -1.3644],
                    [-0.7993, -0.7479, -0.6965,  ..., -1.3644, -1.4158, -1.4843],
                    ...,
                    [-1.3644, -1.4329, -1.4158,  ..., -1.3987, -1.4158, -1.4158],
                    [-1.3130, -1.3987, -1.3987,  ..., -1.3130, -1.3987, -1.4500],
                    [-1.3815, -1.4158, -1.3987,  ..., -1.2445, -1.3130, -1.4158]],
           
                   [[-0.7752, -0.7402, -0.7052,  ..., -0.8627, -0.8627, -0.9678],
                    [-0.5126, -0.5301, -0.6527,  ..., -0.9853, -0.9678, -1.0553],
                    [-0.5651, -0.5301, -0.4776,  ..., -0.9503, -1.0028, -1.1078],
                    ...,
                    [-1.0728, -1.1253, -1.1253,  ..., -1.1253, -1.1779, -1.1779],
                    [-1.0378, -1.0903, -1.1253,  ..., -1.0553, -1.1429, -1.1779],
                    [-1.1078, -1.17

post-transform:


ic| image: tensor([[[-0.8507, -0.8164, -0.8507,  ..., -1.4500, -1.4672, -1.4500],
                    [-0.8164, -0.7822, -0.7993,  ..., -1.4329, -1.4329, -1.4158],
                    [-0.9020, -0.8678, -0.8164,  ..., -1.4329, -1.4158, -1.3987],
                    ...,
                    [ 0.3309,  0.7419,  0.2967,  ..., -1.1247, -1.0904, -1.1075],
                    [ 0.1939,  0.7077,  0.5364,  ..., -1.2617, -1.1932, -1.1589],
                    [-0.6452, -0.2171,  0.2967,  ..., -1.2788, -1.2445, -1.1760]],
           
                   [[-0.7052, -0.6877, -0.7577,  ..., -1.1429, -1.1604, -1.2129],
                    [-0.7052, -0.6702, -0.7052,  ..., -1.1429, -1.1779, -1.1779],
                    [-0.7752, -0.7227, -0.6702,  ..., -1.1429, -1.1604, -1.1779],
                    ...,
                    [ 0.4153,  0.8704,  0.4503,  ..., -0.8452, -0.8102, -0.8277],
                    [ 0.2577,  0.8354,  0.7654,  ..., -0.9153, -0.8803, -0.8452],
                    [-0.5301, -0.12

TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/worker.py", line 351, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/fetch.py", line 55, in fetch
    return self.collate_fn(data)
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/collate.py", line 398, in default_collate
    return collate(batch, collate_fn_map=default_collate_fn_map)
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/collate.py", line 211, in collate
    return [
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/collate.py", line 212, in <listcomp>
    collate(samples, collate_fn_map=collate_fn_map)
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/collate.py", line 240, in collate
    raise TypeError(default_collate_err_msg_format.format(elem_type))
TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'PIL.Image.Image'>


post-transform:


ic

In [None]:
import warnings
from torch.backends import cudnn
from icecream import ic
from colorama import Fore,Back,Style
warnings.filterwarnings('ignore')

train_iter = 0
val_iter = 0

trainSamples = len(urban_train) - (len(urban_train) % BATCH_SIZE)
val_samples = len(urban_val)
iterPerEpoch = len(urban_train_dataloader)
val_steps = len(urban_val_dataloader)

cudnn.benchmark
model_checkpoint = "model" #name

model.train(True)
for epoch in range(NUM_EPOCHS):
    epoch_loss = 0
    IoU = 0

    for i, (inputs, targets) in enumerate(urban_train_dataloader):
        train_iter += 1
        optimizer_fn.zero_grad()

        # feeds in model
        inputs = inputs.to(DEVICE)
        labels = targets.to(device=DEVICE, dtype=torch.int64)

        output_logits,_,_ = model(inputs)

        # compute loss
        loss = loss_fn(output_logits, labels)

        # backward loss and optimizer step
        loss.backward()
        optimizer_fn.step()

        #compute the training accuracy
        _, predicted = torch.max(output_logits.data, 1)
        print(labels)
        print(predicted)

        intersection = torch.logical_and(labels, predicted)
        union = torch.logical_or(labels, predicted)
        print(intersection)
        print(union)
        IoU += torch.sum(intersection).item() / torch.sum(union).item()

        step_loss = loss.data.item()
        epoch_loss += step_loss
    avg_loss = epoch_loss/iterPerEpoch
    print(f"IoU:{IoU}, samples:{trainSamples}")
    avg_IoU = (IoU / trainSamples)
    print(Fore.GREEN + 'Val: Epoch = {} | Loss {:.3f} | mean-IoU = {:.3f}'.format(epoch + 1, avg_loss, avg_IoU))
    """
    #train_logger.add_epoch_data(epoch+1, trainAccuracy, avg_loss)

    if validate:
        if (epoch+1) % 1 == 0:
            model.train(False)
            val_loss_epoch = 0
            numCorr = 0
            for j, (inputs, targets) in enumerate(val_loader):
                val_iter += 1
                inputVariable = inputs.permute(1, 0, 2, 3, 4).to(DEVICE)
                labelVariable = targets.to(DEVICE)

                output_label, _ = model(inputVariable)
                val_loss = loss_fn(output_label, labelVariable)

                val_loss_step = val_loss.data.item()
                val_loss_epoch += val_loss_step
                _, predicted = torch.max(output_label.data, 1)
                numCorr += torch.sum(predicted == labelVariable.data).data.item()
                #val_logger.add_step_data(val_iter, numCorr, val_loss_step)

            val_accuracy = (numCorr / val_samples) * 100
            avg_val_loss = val_loss_epoch / val_steps

            print(Fore.GREEN + 'Val: Epoch = {} | Loss {:.3f} | Accuracy = {:.3f}'.format(epoch + 1, avg_val_loss, val_accuracy))
            if val_accuracy > min_accuracy:
                print("[||| NEW BEST on val||||]")
                save_path_model = os.path.join(model_folder, model_checkpoint)
                torch.save(model.state_dict(), save_path_model)
                min_accuracy = val_accuracy
"""
    optim_scheduler.step()


TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/worker.py", line 351, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/fetch.py", line 52, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "<ipython-input-13-d1c997c7d697>", line 31, in __getitem__
    image = self.transforms(image)
  File "/usr/local/lib/python3.10/dist-packages/torchvision/transforms/transforms.py", line 95, in __call__
    img = t(img)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/torchvision/transforms/transforms.py", line 354, in forward
    return F.resize(img, self.size, self.interpolation, self.max_size, self.antialias)
  File "/usr/local/lib/python3.10/dist-packages/torchvision/transforms/functional.py", line 465, in resize
    _, image_height, image_width = get_dimensions(img)
  File "/usr/local/lib/python3.10/dist-packages/torchvision/transforms/functional.py", line 80, in get_dimensions
    return F_pil.get_dimensions(img)
  File "/usr/local/lib/python3.10/dist-packages/torchvision/transforms/_functional_pil.py", line 31, in get_dimensions
    raise TypeError(f"Unexpected type {type(img)}")
TypeError: Unexpected type <class 'numpy.ndarray'>
