<a href="https://colab.research.google.com/github/aledima00/Project4_SemSeg_AML2024/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Project 4 - Semantic Segmentation

First let's download dataset, that is already split in "Train", "Test" and "Val" modules

In [1]:
!pip install colorama | tail -n 1
!pip install albumentations | tail -n 1
!pip install yacs | tail -n 1
!pip install fvcore | tail -n 1
!pip install tqdm | tail -n 1

Successfully installed colorama-0.4.6
Successfully installed yacs-0.1.8
Successfully installed fvcore-0.1.5.post20221221 iopath-0.1.10 portalocker-3.1.1


Generic Imports: here we import all generic libraries required from now on; more specific libraries will be imported later.

In [2]:
# miscellaneous
import os
import glob
from enum import Enum
import gdown
import numpy as np

# torch basics
import torch
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# image management
import torchvision
from torchvision import transforms
import albumentations
from PIL import Image

# logging and printing
import logging
from colorama import Fore, Back, Style
from tqdm.auto import tqdm

None # suppress output

  check_for_updates()


General Configuration:

In [3]:
DBG = False                   # set to True for debug mode (lighter execution + dbg prints)
CONFIG_DATASET = True         # set to True to download and config all dataset resources
CONFIG_DEEPLABV2 = True       # set to True to download and config all DeepLabv2 resources
CONFIG_PIDNET = True          # set to True to download and config all PIDNET resources

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE}")
TRAIN_PATH = "Train"          # path of the train folder
VAL_PATH = "Val"              # path of the val folder
TEST_PATH = "Test"            # path of the test folder

DEEPLABV2_PRETRAIN_WEIGHTS_PATH = "deeplabv2-pretrain-weights.pth"  # path of the deeplabv2 pretrain weights
PIDNET_PRETRAIN_WEIGHTS_PATH = "PIDNet/pretrained_models/imagenet/imagenet.pth.tar" # path of the PIDNet pretrain weights

MODELS_FOLDER = "saved_models"
DEEPLABV2_FINAL_WEIGHTS_PATH = MODELS_FOLDER + "/deeplab_v2.pth"
PIDNET_FINAL_WEIGHTS_PATH = MODELS_FOLDER + "/pidnet.pth"
PIDNET_DACS_FINAL_WEIGHTS_PATH = MODELS_FOLDER + "/pidnet_DACS.pth"
if not os.path.isdir(MODELS_FOLDER):
    os.makedirs(MODELS_FOLDER)


Using device: cuda


function to download datasets:

In [4]:
def config_generic_dataset(DS_PATHNAME,URL):
  !rm -rf {DS_PATHNAME}
  ZIP_PATH = DS_PATHNAME + ".zip"
  !rm {ZIP_PATH}
  !wget -O {ZIP_PATH} {URL}
  !unzip {ZIP_PATH} | tail -n 3
  !rm {ZIP_PATH}

def config_train_dataset():
  config_generic_dataset(TRAIN_PATH, "https://zenodo.org/records/5706578/files/Train.zip?download=1")
def config_val_dataset():
  config_generic_dataset(VAL_PATH, "https://zenodo.org/records/5706578/files/Val.zip?download=1")
def config_test_dataset():
  config_generic_dataset(TEST_PATH, "https://zenodo.org/records/5706578/files/Test.zip?download=1")

def config_all_dataset(*,force=False):
  print("Dowloading and Configuring Dataset")
  if force or (not os.path.exists(TRAIN_PATH)):
    config_train_dataset()
  if force or (not os.path.exists(VAL_PATH)):
    config_val_dataset()
  if force or (not os.path.exists(TEST_PATH)):
    config_test_dataset()

function to download and configure deeplabv2 model library (_with R101 backbone_) and the pretrain weights:

In [5]:
def config_deeplabv2_model():
  print("Dowloading and Configuring DeepLabv2 Model")
  import sys
  import gdown
  !rm -rf "MLDL2024_project1"
  !git clone https://github.com/Gabrysse/MLDL2024_project1.git
  sys.path.append("/content/MLDL2024_project1/")
  gdown.download("https://drive.google.com/uc?id=1ZX0UCXvJwqd2uBGCX7LI2n-DfMg3t74v", DEEPLABV2_PRETRAIN_WEIGHTS_PATH, quiet=False)
  gdown.download("https://drive.google.com/uc?id=15eXpt8tqLK_mgReNP58Q37wFwnA4RF_n", DEEPLABV2_FINAL_WEIGHTS_PATH, quiet=False)


function to download and configure pidnet model library and the pretrain weights:

In [6]:
def config_pidnet():
  import sys
  import gdown
  print("Dowloading and Configuring PIDNET Model")
  !rm -rf "PIDNet"
  !git clone https://github.com/XuJiacong/PIDNet.git
  sys.path.append("/content/PIDNet/")
  gdown.download("https://drive.google.com/uc?id=1hIBp_8maRr60-B3PF0NVtaA6TYBvO4y-", PIDNET_PRETRAIN_WEIGHTS_PATH, quiet=False)
  gdown.download("https://drive.google.com/uc?id=1kwpTYYbqs4BNYsw12j9Zth81sOKbMzFU", PIDNET_FINAL_WEIGHTS_PATH, quiet=False)


In [7]:
if CONFIG_DATASET:
  config_all_dataset()
if CONFIG_DEEPLABV2:
  config_deeplabv2_model()
if CONFIG_PIDNET:
  config_pidnet()

Dowloading and Configuring Dataset
rm: cannot remove 'Train.zip': No such file or directory
--2025-01-07 20:31:47--  https://zenodo.org/records/5706578/files/Train.zip?download=1
Resolving zenodo.org (zenodo.org)... 188.185.45.92, 188.185.43.25, 188.185.48.194, ...
Connecting to zenodo.org (zenodo.org)|188.185.45.92|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4021669263 (3.7G) [application/octet-stream]
Saving to: ‘Train.zip’


2025-01-07 20:37:18 (11.6 MB/s) - ‘Train.zip’ saved [4021669263/4021669263]

  inflating: Train/Urban/masks_png/2519.png  
  inflating: Train/Urban/masks_png/2520.png  
  inflating: Train/Urban/masks_png/2521.png  
rm: cannot remove 'Val.zip': No such file or directory
--2025-01-07 20:37:58--  https://zenodo.org/records/5706578/files/Val.zip?download=1
Resolving zenodo.org (zenodo.org)... 188.185.43.25, 188.185.45.92, 188.185.48.194, ...
Connecting to zenodo.org (zenodo.org)|188.185.43.25|:443... connected.
HTTP request sent, awaiti

Downloading...
From (original): https://drive.google.com/uc?id=1ZX0UCXvJwqd2uBGCX7LI2n-DfMg3t74v
From (redirected): https://drive.google.com/uc?id=1ZX0UCXvJwqd2uBGCX7LI2n-DfMg3t74v&confirm=t&uuid=70c49334-087f-4a76-896d-ac60b3154499
To: /content/deeplabv2-pretrain-weights.pth
100%|██████████| 177M/177M [00:05<00:00, 32.7MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=15eXpt8tqLK_mgReNP58Q37wFwnA4RF_n
From (redirected): https://drive.google.com/uc?id=15eXpt8tqLK_mgReNP58Q37wFwnA4RF_n&confirm=t&uuid=d05d26ce-68a7-4dd4-8a86-aa7fd3958777
To: /content/saved_models/deeplab_v2.pth
100%|██████████| 173M/173M [00:03<00:00, 56.2MB/s]


Dowloading and Configuring PIDNET Model
Cloning into 'PIDNet'...
remote: Enumerating objects: 386, done.[K
remote: Counting objects: 100% (193/193), done.[K
remote: Compressing objects: 100% (68/68), done.[K
remote: Total 386 (delta 131), reused 125 (delta 125), pack-reused 193 (from 1)[K
Receiving objects: 100% (386/386), 212.80 MiB | 16.10 MiB/s, done.
Resolving deltas: 100% (184/184), done.


Downloading...
From: https://drive.google.com/uc?id=1hIBp_8maRr60-B3PF0NVtaA6TYBvO4y-
To: /content/PIDNet/pretrained_models/imagenet/imagenet.pth.tar
100%|██████████| 38.1M/38.1M [00:00<00:00, 52.6MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=1kwpTYYbqs4BNYsw12j9Zth81sOKbMzFU
From (redirected): https://drive.google.com/uc?id=1kwpTYYbqs4BNYsw12j9Zth81sOKbMzFU&confirm=t&uuid=80676456-ec25-4235-82c1-ab840a537e65
To: /content/saved_models/pidnet.pth
100%|██████████| 31.1M/31.1M [00:00<00:00, 42.3MB/s]


In [8]:
def dbgp(name,value):
  """ Debug print function """
  if DBG:
    print(f"{Fore.BLACK}{Back.GREEN}{Style.BRIGHT}{name}:\t{value}{Fore.RESET}{Back.RESET}{Style.RESET_ALL}")

## Data Preparation

Here we want to create the DataSet class and define the interfaces to use it.

First let's define some parameters about data:

In [9]:
NUM_CLASSES = 7
BATCH_SIZE = 2 if DBG else 16
IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STDDEV = (0.229, 0.224, 0.225)

we define the transformation to apply on the dataset using albumentations in order to automatically distinguish which transformation must be applied on both images and masks and which others must be applied only on images:

In [10]:
import albumentations as ALB
from albumentations.pytorch import ToTensorV2

train_transform = ALB.Compose([
      ALB.Resize(256,256),
      ALB.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STDDEV),
      ToTensorV2()
  ])

test_transform = ALB.Compose([
    ALB.Resize(256,256),
    ALB.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STDDEV),
    ToTensorV2()
])


then we define the `COLOR_MAP` used to interpretate the mask representation:

In [11]:
# taken from official repo of LoveDA
IGNORE_INDEX = -1
COLOR_MAP = {
    IGNORE_INDEX:"IGNORE",
    0:"Background",
    1:"Building",
    2:"Road",
    3:"Water",
    4:"Barren",
    5:"Forest",
    6:"Agricultural"
}
CLASSES = list(key for key in COLOR_MAP.keys() if COLOR_MAP[key] != "IGNORE")

finally, we can define the `LoveDA` class, used to create dataset instances; in addition, we define:
- `DataType` *enum* class, used to choose what dataset to instantiate;
- `pil_loader` function, to load images and masks

In [12]:
def pil_loader(path,*,format:str="RGB"):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert(format)

class DataType(Enum):
  RURAL = 0
  URBAN = 1

class LoveDA(Dataset):
  def __init__(self, basedir, data_type:DataType, transforms=None):
    #super(LoveDA, self).__init__(basedir, transforms, target_transform) # should we do this??
    if data_type == DataType.RURAL:
        self.base_path = os.path.join(basedir, "Rural")
    else: #data_type == DataType.URBAN:
        self.base_path = os.path.join(basedir, "Urban")


    # list of integers that identifies paths relative to both images_png and masks_png
    self.int_pathrefs = os.listdir(os.path.join(self.base_path, "images_png"))
    self.int_pathrefs = list(int(st.split(".")[0]) for st in self.int_pathrefs)

    # DEBUG PRINT
    if DBG:
      self.int_pathrefs = self.int_pathrefs[:15] # limit the dataset for debug

    self.transforms = transforms

  def __getitem__(self, idx):
    assert idx < len(self), 'Index out of range'
    image_path = os.path.join(self.base_path, "images_png", str(self.int_pathrefs[idx]) + ".png")
    mask_path = os.path.join(self.base_path, "masks_png", str(self.int_pathrefs[idx]) + ".png")
    image = pil_loader(image_path,format="RGB")
    mask = pil_loader(mask_path,format="L")

    # Convert PIL images to numpy arrays
    image = np.array(image)
    mask = np.array(mask, dtype=np.int8)

    if self.transforms is not None:
      augmented = self.transforms(image=image, mask=mask)
      image = augmented["image"]
      mask = augmented["mask"]

    mask -= 1

    return image, mask

  def __len__(self):
    return len(self.int_pathrefs)


we can therefore use the former classes to instantiate the dataset and dataloader objects that we're going to use later:

In [13]:
# Urban Datasets (train, val, test)
urban_train = LoveDA(TRAIN_PATH, DataType.URBAN, transforms=train_transform)
urban_val = LoveDA(VAL_PATH, DataType.URBAN, transforms=test_transform)

# Urban Dataloaders (train, val, test)

NUM_WORKERS = 2 if DBG else 4
urban_train_dataloader = DataLoader(urban_train, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, drop_last=True)
urban_val_dataloader = DataLoader(urban_val, shuffle=False, num_workers=NUM_WORKERS, drop_last=False)



## Utilities

Now we define some utilities useful for different purposes.

First we define the `IoUMeter` class, useful to compute IoU for classes and over the whole set:

In [14]:
EPSILON_IOU = 1e-7

class IoUMeter:
  total_intersection_per_class:dict
  total_union_per_class:dict

  def __init__(self):
    self.total_intersection_per_class = {cls: 0 for cls in CLASSES}
    self.total_union_per_class = {cls: 0 for cls in CLASSES}
  def zero(self):
    self.total_intersection_per_class = {cls: 0 for cls in CLASSES}
    self.total_union_per_class = {cls: 0 for cls in CLASSES}
  def addCouple(self,predicted,labels):
    for cls in CLASSES:
      true_mask = (labels == cls)  # Crea una maschera booleana per la classe `cls` nel target
      pred_mask = (predicted == cls)  # Crea una maschera booleana per la classe `cls` nelle predizioni

      # Calcola l'intersezione e l'unione per quella classe
      intersection = torch.logical_and(true_mask, pred_mask).sum().item()
      union = torch.logical_or(true_mask, pred_mask).sum().item()

      # Aggiungi i valori all'array totale di intersezione e unione per ogni classe
      self.total_intersection_per_class[cls] += intersection
      self.total_union_per_class[cls] += union
  def getPerClass(self):
    return { cls: self.total_intersection_per_class[cls] / (self.total_union_per_class[cls] + EPSILON_IOU) for cls in CLASSES }
  def getTotal(self):
    class_IoUs = self.getPerClass()
    return sum(class_IoUs.values()) / NUM_CLASSES



now we define a plotter to shrink lines of code in loops:

In [15]:
class StatPlotMode(Enum):
  TRAINING = 0
  VALIDATION = 1
  TESTING = 2

  def __str__(self):
    return self.name

def stat_plot(avg_loss,iou_meter:IoUMeter,*,mode:StatPlotMode,epoch=None):
  mean_IoU = iou_meter.getTotal()
  if mode==StatPlotMode.TRAINING:
    print(Fore.GREEN + Style.NORMAL + 'TRAINING RESULTS (@epoch={}): mean Loss = {:.3f} | mean-IoU = {:.3f}'.format(epoch + 1, avg_loss, mean_IoU)+Style.RESET_ALL)
  elif mode==StatPlotMode.VALIDATION:
    print(Fore.BLACK + Back.YELLOW + Style.BRIGHT + 'VALIDATION RESULTS (@epoch={}): mean Loss = {:.3f} | mean-IoU = {:.3f}'.format(epoch+1, avg_loss, mean_IoU)+Style.RESET_ALL)
  else:
    print(Fore.BLACK + Back.GREEN + Style.BRIGHT + 'TEST RESULTS on VALIDATION SET: mean Loss = {:.3f} | mean-IoU = {:.3f}'.format(avg_loss, mean_IoU)+Style.RESET_ALL)

  if mode != StatPlotMode.TRAINING:
    # print IoU for each
    print(Fore.CYAN + Style.NORMAL + "Class-wise IoUs:"+ Style.RESET_ALL)
    class_IoUs = iou_meter.getPerClass()
    for cls in CLASSES:
        print(Fore.WHITE + Style.DIM + f"Class {cls} ({COLOR_MAP[cls]}): IoU = {class_IoUs[cls]:.3f}"+ Style.RESET_ALL)


flops, parameters and latency are computed for a model using the function `analyze_model`

In [16]:
from fvcore.nn import FlopCountAnalysis, parameter_count

import warnings
from torch.backends import cudnn

def analyze_model(model,*,iterations=100,batch_size=1):

  warnings.filterwarnings('ignore')
  cudnn.benchmark
  CUDA_LAUNCH_BLOCKING=1

  # Set the model to evaluation mode to avoid issues with batch normalization
  model.eval()
  model.training=False

  # test images batch (zeroes)
  height = 256
  width = 256
  image = torch.zeros((batch_size,3, height, width)).to(DEVICE)

  flops = FlopCountAnalysis(model, image)
  params = parameter_count(model)['']
  print(f"Model FLOPs: {flops.total()}")
  print(f"Model Parameters: {params}")

  # latency compute
  latency = list()
  start_event = torch.cuda.Event(enable_timing=True)
  end_event = torch.cuda.Event(enable_timing=True)
  for _ in tqdm(range(iterations)):
    start_event.record()  # Record start time on GPU
    _ = model(image)      # Run inference
    end_event.record()    # Record end time on GPU

    # Wait for GPU synchronization to ensure accurate timing
    torch.cuda.synchronize()

    # Calculate time elapsed in milliseconds
    latency.append(start_event.elapsed_time(end_event))
  avg_latency = sum(latency) / len(latency)
  print(f"Average latency: {avg_latency:.2f} ms")

the model weights can be saved and reloaded using the following functions:

In [17]:
def save_model_weights(model,save_path):
  print(f"Saving Model to {save_path}...")
  torch.save(model.state_dict(), save_path)
  print("Done!")

def load_model_weights(model, weights_path):
    if weights_path is None:
        return model
    weights_dict = torch.load(weights_path, map_location=torch.device(DEVICE))
    if 'state_dict' in weights_dict:
        weights_dict = weights_dict['state_dict']
    model.load_state_dict(weights_dict, strict = False)
    msg = 'Loaded {} parameters!'.format(len(weights_dict))
    return model


## DeepLabv2 on LoveDA (Urban)

In [18]:
LR = 2e-4               # The initial Learning Rate
MOMENTUM = 0.9          # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-4     # Regularization, you can keep this at the default
NUM_EPOCHS = 20         # Total number of training epochs (iterations over dataset)
STEP_SIZE = [10, 15]    # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1             # Multiplicative factor for learning rate step-down

Get DeepLabv2 model with pretrain weights:

In [19]:
from MLDL2024_project1.models.deeplabv2 import deeplabv2

model = deeplabv2.get_deeplab_v2(num_classes=NUM_CLASSES,pretrain=True,pretrain_model_path=DEEPLABV2_PRETRAIN_WEIGHTS_PATH)
model_name = "deeplab_v2"
weights_path = DEEPLABV2_FINAL_WEIGHTS_PATH

Deeplab pretraining loading...


  saved_state_dict = torch.load(pretrain_model_path)


### Optimizer, Loss, ecc.

In [20]:
# enable validation during training
validate = True

model = model.to(DEVICE) # switch to GPU

# Loss (as said in DeepLabv2 docs)
loss_fn = nn.CrossEntropyLoss(ignore_index=IGNORE_INDEX)

# Opt
trainable_params = [p for p in model.parameters() if p.requires_grad]
optimizer_fn = optim.Adam(trainable_params, lr=LR, weight_decay=WEIGHT_DECAY, eps=1e-4)

#Scheduler
optim_scheduler = optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=STEP_SIZE, gamma=GAMMA)

In [21]:
logging.getLogger("fvcore.nn.jit_analysis").setLevel(logging.ERROR)
analyze_model(model,iterations=1000,batch_size=1)

Model FLOPs: 47669164800
Model Parameters: 43016284


  0%|          | 0/1000 [00:00<?, ?it/s]

Average latency: 38.79 ms


### Training Loop

In [None]:
import warnings
from torch.backends import cudnn
warnings.filterwarnings('ignore')

train_iter = 0
iterPerEpoch = len(urban_train_dataloader)

cudnn.benchmark
CUDA_LAUNCH_BLOCKING=1

best_IoU = 0

for epoch in range(NUM_EPOCHS):
    model.train(True)
    epoch_loss = 0

    iou_meter = IoUMeter()

    for i, (inputs, targets) in enumerate(urban_train_dataloader):

        optimizer_fn.zero_grad()

        # feeds in model
        inputs = inputs.to(DEVICE)
        labels = targets.long().to(device=DEVICE)
        output_logits,_,_ = model(inputs)

        # compute loss
        loss = loss_fn(output_logits, labels)

        # backward loss and optimizer step
        loss.backward()
        optimizer_fn.step()

        #compute the training accuracy
        _, predicted = torch.max(output_logits.data, 1)

        iou_meter.addCouple(predicted,labels)

        step_loss = loss.data.item()
        epoch_loss += step_loss

    # FINAL EPOCH-WISE COMPUTATIONS
    avg_loss = epoch_loss/iterPerEpoch
    stat_plot(avg_loss,iou_meter,mode=StatPlotMode.TRAINING,epoch=epoch)

    if validate:
        iterPerVal = len(urban_val_dataloader)
        model.eval()
        val_loss = 0
        iou_meter = IoUMeter()
        for j, (inputs, targets) in enumerate(urban_val_dataloader):

            # feeds in model
            inputs = inputs.to(DEVICE)
            labels = targets.long().to(device=DEVICE)
            output_logits = model(inputs)

            # compute loss
            loss = loss_fn(output_logits, labels)

            # compute the training accuracy
            _, predicted = torch.max(output_logits.data, 1)

            iou_meter.addCouple(predicted,labels)

            step_loss = loss.data.item()
            val_loss += step_loss


        # FINAL EPOCH-WISE COMPUTATIONS
        mean_IoU = iou_meter.getTotal()
        avg_loss = val_loss/iterPerVal

        stat_plot(avg_loss,iou_meter,mode=StatPlotMode.VALIDATION,epoch=epoch)

        if mean_IoU > best_IoU:
            best_IoU = mean_IoU
            save_model_weights(model,weights_path)
        # END OF VALIDATION

    optim_scheduler.step()


### Testing
Now we test using the val dataset:

In [23]:
model = load_model_weights(model,weights_path).to(DEVICE)
model.eval()
iterPerVal = len(urban_val_dataloader)
val_loss = 0
iou_meter = IoUMeter()
for j, (inputs, targets) in enumerate(urban_val_dataloader):

    # feeds in model
    inputs = inputs.to(DEVICE)
    labels = targets.long().to(device=DEVICE)
    output_logits = model(inputs)

    # compute loss
    loss = loss_fn(output_logits, labels)

    # compute the training accuracy
    _, predicted = torch.max(output_logits.data, 1)

    iou_meter.addCouple(predicted,labels)

    step_loss = loss.data.item()
    val_loss += step_loss


# FINAL EPOCH-WISE COMPUTATIONS

avg_loss = val_loss/iterPerVal
stat_plot(avg_loss,iou_meter,mode=StatPlotMode.TESTING)

[30m[42m[1mTEST RESULTS on VALIDATION SET: mean Loss = 1.608 | mean-IoU = 0.384[0m
[36m[22mClass-wise IoUs:[0m
[37m[2mClass 0 (Background): IoU = 0.304[0m
[37m[2mClass 1 (Building): IoU = 0.435[0m
[37m[2mClass 2 (Road): IoU = 0.472[0m
[37m[2mClass 3 (Water): IoU = 0.611[0m
[37m[2mClass 4 (Barren): IoU = 0.195[0m
[37m[2mClass 5 (Forest): IoU = 0.356[0m
[37m[2mClass 6 (Agricultural): IoU = 0.317[0m


## PIDNet & LoveDA

In [24]:
from PIDNet.models.pidnet import PIDNet
model = PIDNet(m=2, n=3, num_classes=NUM_CLASSES, planes=32, ppm_planes=96, head_planes=128, augment=True)
model = load_model_weights(model,PIDNET_PRETRAIN_WEIGHTS_PATH)
weights_path = PIDNET_FINAL_WEIGHTS_PATH

### Optimizer, Loss, ecc.

In [25]:
LR = 2e-4               # The initial Learning Rate
MOMENTUM = 0.9          # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-4     # Regularization, you can keep this at the default
NUM_EPOCHS = 20         # Total number of training epochs (iterations over dataset)
STEP_SIZE = [10, 15]    # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1             # Multiplicative factor for learning rate step-down

In [26]:
# enable validation during training
validate = True

model.train()
model = model.to(DEVICE) # switch to GPU

# loss functions
sem_criterion = nn.CrossEntropyLoss(ignore_index=IGNORE_INDEX)

#Opt
trainable_params = [p for p in model.parameters() if p.requires_grad]
optimizer_fn = optim.Adam(trainable_params, lr=LR, weight_decay=WEIGHT_DECAY, eps=1e-4)

#Scheduler
optim_scheduler = optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=STEP_SIZE, gamma=GAMMA)


In [27]:
analyze_model(model,iterations=1000,batch_size=1)

Model FLOPs: 1579333632
Model Parameters: 7717839


  0%|          | 0/1000 [00:00<?, ?it/s]

Average latency: 14.24 ms


### Training loop

In [None]:
import warnings

from torch.backends import cudnn
warnings.filterwarnings('ignore')

iterPerEpoch = len(urban_train_dataloader)

cudnn.benchmark
CUDA_LAUNCH_BLOCKING=1

best_IoU = 0

for epoch in range(NUM_EPOCHS):
    model.train()
    epoch_loss = 0
    iou_meter = IoUMeter()

    for i, (inputs, targets) in enumerate(urban_train_dataloader):
        optimizer_fn.zero_grad()

        # send inputs to gpu
        inputs = inputs.to(DEVICE)
        labels = targets.long().to(device=DEVICE)

        # feeds in the model
        output_logits,_,_ = model(inputs)

        h, w = labels.size(1), labels.size(2)
        ph, pw = output_logits.size(2), output_logits.size(3)
        if ph != h or pw != w:
          output_logits = F.interpolate(output_logits, size=(h, w), mode='bilinear', align_corners=True)


        # compute loss
        loss = sem_criterion(output_logits, labels)
        """
        filler = torch.ones_like(labels) * config.TRAIN.IGNORE_LABEL
        bd_label = torch.where(F.sigmoid(outputs[-1][:,0,:,:])>0.8, labels, filler)
        loss_sb = self.sem_loss(outputs[-2], bd_label)
        loss += loss_sb
        """

        # backward loss and optimizer step
        loss.backward()
        optimizer_fn.step()

        #compute the training accuracy
        _, predicted = torch.max(output_logits.data, 1)

        iou_meter.addCouple(predicted,labels)

        step_loss = loss.data.item()
        epoch_loss += step_loss

    # FINAL EPOCH-WISE COMPUTATIONS
    avg_loss = epoch_loss/iterPerEpoch
    stat_plot(avg_loss,iou_meter,mode=StatPlotMode.TRAINING,epoch=epoch)

    if validate:
        model.eval()
        val_loss = 0
        iterPerVal = len(urban_val_dataloader)
        iou_meter = IoUMeter()
        for j, (inputs, targets) in enumerate(urban_val_dataloader):

            # feeds in model
            inputs = inputs.to(DEVICE)
            labels = targets.long().to(device=DEVICE)
            output_logits,_,_ = model(inputs)
            output_logits = F.interpolate(output_logits, size=labels.shape[1:], mode='bilinear', align_corners=True)

            # compute loss
            loss = sem_criterion(output_logits, labels)

            # compute the training accuracy
            _, predicted = torch.max(output_logits.data, 1)

            iou_meter.addCouple(predicted,labels)

            step_loss = loss.data.item()
            val_loss += step_loss


        # FINAL EPOCH-WISE COMPUTATIONS
        avg_loss = val_loss/iterPerVal
        mean_IoU = iou_meter.getTotal()

        stat_plot(avg_loss,iou_meter,mode=StatPlotMode.VALIDATION,epoch=epoch)

        if mean_IoU > best_IoU:
            best_IoU = mean_IoU
            save_model_weights(model,weights_path)
        # END OF VALIDATION

    optim_scheduler.step()

### Testing

Now we test using the val dataset:

In [28]:
model = load_model_weights(model,weights_path).to(DEVICE)
model.eval()
val_loss = 0
iterPerVal = len(urban_val_dataloader)
iou_meter = IoUMeter()

for j, (inputs, targets) in enumerate(urban_val_dataloader):

      # feeds in model
      inputs = inputs.to(DEVICE)
      labels = targets.long().to(device=DEVICE)
      output_logits,_,_ = model(inputs)
      output_logits = F.interpolate(output_logits, size=labels.shape[1:], mode='bilinear', align_corners=True)

      # compute loss
      loss = sem_criterion(output_logits, labels)

      # compute the training accuracy
      _, predicted = torch.max(output_logits.data, 1)

      iou_meter.addCouple(predicted,labels)

      step_loss = loss.data.item()
      val_loss += step_loss


# FINAL EPOCH-WISE COMPUTATIONS
avg_loss = val_loss/iterPerVal
stat_plot(avg_loss,iou_meter,mode=StatPlotMode.TESTING)


[30m[42m[1mTEST RESULTS on VALIDATION SET: mean Loss = 1.540 | mean-IoU = 0.343[0m
[36m[22mClass-wise IoUs:[0m
[37m[2mClass 0 (Background): IoU = 0.322[0m
[37m[2mClass 1 (Building): IoU = 0.405[0m
[37m[2mClass 2 (Road): IoU = 0.370[0m
[37m[2mClass 3 (Water): IoU = 0.520[0m
[37m[2mClass 4 (Barren): IoU = 0.206[0m
[37m[2mClass 5 (Forest): IoU = 0.340[0m
[37m[2mClass 6 (Agricultural): IoU = 0.239[0m


## Domain Shift

Add dataset and loaders for rural images:

In [29]:
# Rural Datasets (train, val, test)
rural_val = LoveDA(VAL_PATH, DataType.RURAL, transforms=test_transform)

# Rural Dataloaders (train, val, test)
NUM_WORKERS = 2 if DBG else 4
rural_val_dataloader = DataLoader(rural_val, shuffle=False, num_workers=NUM_WORKERS, drop_last=False)
weights_path = PIDNET_FINAL_WEIGHTS_PATH

first let's test with domain shift:

In [30]:
model = load_model_weights(model,weights_path).to(DEVICE)
model.eval()
val_loss = 0
iterPerVal = len(rural_val_dataloader)
iou_meter = IoUMeter()
for j, (inputs, targets) in enumerate(rural_val_dataloader):

      # feeds in model
      inputs = inputs.to(DEVICE)
      labels = targets.long().to(device=DEVICE)
      output_logits,_,_ = model(inputs)
      output_logits = F.interpolate(output_logits, size=labels.shape[1:], mode='bilinear', align_corners=True)

      # compute loss
      loss = sem_criterion(output_logits, labels)

      # compute the training accuracy
      _, predicted = torch.max(output_logits.data, 1)

      iou_meter.addCouple(predicted,labels)

      step_loss = loss.data.item()
      val_loss += step_loss

# FINAL EPOCH-WISE COMPUTATIONS
avg_loss = val_loss/iterPerVal
stat_plot(avg_loss,iou_meter,mode=StatPlotMode.TESTING)

[30m[42m[1mTEST RESULTS on VALIDATION SET: mean Loss = 1.806 | mean-IoU = 0.267[0m
[36m[22mClass-wise IoUs:[0m
[37m[2mClass 0 (Background): IoU = 0.468[0m
[37m[2mClass 1 (Building): IoU = 0.283[0m
[37m[2mClass 2 (Road): IoU = 0.209[0m
[37m[2mClass 3 (Water): IoU = 0.352[0m
[37m[2mClass 4 (Barren): IoU = 0.082[0m
[37m[2mClass 5 (Forest): IoU = 0.139[0m
[37m[2mClass 6 (Agricultural): IoU = 0.333[0m


### Augment
Now let's try adding some data augmentation techniques:

In [31]:
rural_val = LoveDA(VAL_PATH, DataType.RURAL, transforms=test_transform)
rural_val_dataloader = DataLoader(rural_val, shuffle=False, num_workers=NUM_WORKERS, drop_last=False)
NUM_WORKERS = 2 if DBG else 4

In [32]:
aug1 = ALB.Compose([
          ALB.HorizontalFlip(),
          ALB.RandomRotate90(),
          ALB.ColorJitter()
          ],p= 0.5)
aug2 = ALB.Compose([
          ALB.RandomBrightnessContrast(),
          ALB.RandomGamma(),
          ALB.GaussianBlur(),
          ],p= 0.5)
aug3 = ALB.Compose([
          ALB.HorizontalFlip(),
          ALB.RandomRotate90(),
          ALB.ColorJitter(),
          ALB.RandomBrightnessContrast(),
          ALB.RandomGamma(),
          ALB.GaussianBlur(),
          ],p= 0.5)

augmentations = [aug1,aug2,aug3]


# WE DEFINE A LIST OF DIFFERENT AUGMENTATION CHAINS FOR DIFFERENT RUNS
train_transform_augmentations = [
  ALB.Compose([
      ALB.Resize(256,256),
      aug_i,
      ALB.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STDDEV),
      ToTensorV2()
  ])
  for aug_i in augmentations
]

NUM_AUGMENTATIONS = len(augmentations) # we are performing 3 augmentations

Now we define which augmentation to appy in train/testing:

In [33]:
TRAIN_INDEXES = {0,1,2} # skip first as it load weights from google
gdown_links = ["https://drive.google.com/uc?id=1FyNDjz0YvxnWvy63Aff4MpxEuanQaKP9","https://drive.google.com/uc?id=1rTDn1dJm1_HzbwlVZL4bSwpxpCpGxzOb","https://drive.google.com/uc?id=1zU7CIOfOj0mKFA-EqN6VGDCWtcK9GmAd"]
weights_paths = [MODELS_FOLDER + f"/pidnet_augmented_v{i+1}.pth" for i in range(NUM_AUGMENTATIONS)]

for i in range(NUM_AUGMENTATIONS):
  if i not in TRAIN_INDEXES:
    gdown.download(gdown_links[i], weights_paths[i], quiet=False)


In [34]:
import warnings
from torch.backends import cudnn

# enable validation during training
validate = True

for k, trf in enumerate(train_transform_augmentations):
    weights_path = weights_paths[k]

    if k in TRAIN_INDEXES:

        model = load_model_weights(model,PIDNET_PRETRAIN_WEIGHTS_PATH).to(DEVICE)

        urban_train_augmented = LoveDA(TRAIN_PATH, DataType.URBAN, transforms=trf) # change transformation in each run
        urban_train_dataloader_augmented = DataLoader(urban_train_augmented, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, drop_last=True)

        model.train()
        model = model.to(DEVICE) # switch to GPU

        optimizer_fn = optim.Adam(trainable_params, lr=LR, weight_decay=WEIGHT_DECAY, eps=1e-4)
        optim_scheduler = optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=STEP_SIZE, gamma=GAMMA)

        iterPerEpoch = len(urban_train_dataloader_augmented)

        warnings.filterwarnings('ignore')
        cudnn.benchmark
        CUDA_LAUNCH_BLOCKING=1

        best_IoU = 0

        print(Fore.BLACK+Back.RED+Style.BRIGHT+f"Training PIDNet with LoveDA-URBAN with augmentation v{k+1}"+Style.RESET_ALL)

        for epoch in range(NUM_EPOCHS):
            model.train()
            epoch_loss = 0
            iou_meter = IoUMeter()

            for i, (inputs, targets) in enumerate(urban_train_dataloader_augmented):
                optimizer_fn.zero_grad()

                # send inputs to gpu
                inputs = inputs.to(DEVICE)
                labels = targets.long().to(device=DEVICE)

                # feeds in the model
                output_logits,_,_ = model(inputs)

                h, w = labels.size(1), labels.size(2)
                ph, pw = output_logits.size(2), output_logits.size(3)
                if ph != h or pw != w:
                  output_logits = F.interpolate(output_logits, size=(h, w), mode='bilinear', align_corners=True)


                # compute loss
                loss = sem_criterion(output_logits, labels)
                """
                filler = torch.ones_like(labels) * config.TRAIN.IGNORE_LABEL
                bd_label = torch.where(F.sigmoid(outputs[-1][:,0,:,:])>0.8, labels, filler)
                loss_sb = self.sem_loss(outputs[-2], bd_label)
                loss += loss_sb
                """

                # backward loss and optimizer step
                loss.backward()
                optimizer_fn.step()

                #compute the training accuracy
                _, predicted = torch.max(output_logits.data, 1)

                iou_meter.addCouple(predicted,labels)

                step_loss = loss.data.item()
                epoch_loss += step_loss

            # FINAL EPOCH-WISE COMPUTATIONS
            avg_loss = epoch_loss/iterPerEpoch
            stat_plot(avg_loss,iou_meter,mode=StatPlotMode.TRAINING,epoch=epoch)

            if validate:
                model.eval()
                val_loss = 0
                iterPerVal = len(rural_val_dataloader)
                iou_meter = IoUMeter()
                for j, (inputs, targets) in enumerate(rural_val_dataloader):

                    # feeds in model
                    inputs = inputs.to(DEVICE)
                    labels = targets.long().to(device=DEVICE)
                    output_logits,_,_ = model(inputs)
                    output_logits = F.interpolate(output_logits, size=labels.shape[1:], mode='bilinear', align_corners=True)

                    # compute loss
                    loss = sem_criterion(output_logits, labels)

                    # compute the training accuracy
                    _, predicted = torch.max(output_logits.data, 1)

                    iou_meter.addCouple(predicted,labels)

                    step_loss = loss.data.item()
                    val_loss += step_loss

                # FINAL EPOCH-WISE COMPUTATIONS
                avg_loss = val_loss/iterPerVal
                mean_IoU = iou_meter.getTotal()
                stat_plot(avg_loss,iou_meter,mode=StatPlotMode.VALIDATION,epoch=epoch)

                if mean_IoU > best_IoU:
                    best_IoU = mean_IoU
                    save_model_weights(model,weights_path)
                # END OF VALIDATION

        optim_scheduler.step()
    # END OF TRAINING

    # TESTING ON BEST WEIGHTS
    print(Fore.BLACK+Back.CYAN+Style.BRIGHT+f"Testing PIDNet with LoveDA-URBAN with augmentation v{k+1}"+Style.RESET_ALL)
    model = load_model_weights(model,weights_path).to(DEVICE)
    model.eval()
    val_loss = 0
    iterPerVal = len(rural_val_dataloader)
    iou_meter = IoUMeter()
    for j, (inputs, targets) in enumerate(rural_val_dataloader):

        # feeds in model
        inputs = inputs.to(DEVICE)
        labels = targets.long().to(device=DEVICE)
        output_logits,_,_ = model(inputs)
        output_logits = F.interpolate(output_logits, size=labels.shape[1:], mode='bilinear', align_corners=True)

        # compute loss
        loss = sem_criterion(output_logits, labels)

        # compute the training accuracy
        _, predicted = torch.max(output_logits.data, 1)

        iou_meter.addCouple(predicted,labels)

        step_loss = loss.data.item()
        val_loss += step_loss


    # FINAL EPOCH-WISE COMPUTATIONS
    avg_loss = val_loss/iterPerVal
    stat_plot(avg_loss,iou_meter,mode=StatPlotMode.TESTING)
    # END OF TESTING



[30m[41m[1mTraining PIDNet with LoveDA-URBAN with augmentation v1[0m
[32m[22mTRAINING RESULTS (@epoch=1): mean Loss = 1.395 | mean-IoU = 0.238[0m
[30m[43m[1mVALIDATION RESULTS (@epoch=1): mean Loss = 2.000 | mean-IoU = 0.191[0m
[36m[22mClass-wise IoUs:[0m
[37m[2mClass 0 (Background): IoU = 0.429[0m
[37m[2mClass 1 (Building): IoU = 0.203[0m
[37m[2mClass 2 (Road): IoU = 0.117[0m
[37m[2mClass 3 (Water): IoU = 0.365[0m
[37m[2mClass 4 (Barren): IoU = 0.075[0m
[37m[2mClass 5 (Forest): IoU = 0.122[0m
[37m[2mClass 6 (Agricultural): IoU = 0.028[0m
Saving Model to saved_models/pidnet_augmented_v1.pth...
Done!
[32m[22mTRAINING RESULTS (@epoch=2): mean Loss = 1.063 | mean-IoU = 0.313[0m
[30m[43m[1mVALIDATION RESULTS (@epoch=2): mean Loss = 1.913 | mean-IoU = 0.210[0m
[36m[22mClass-wise IoUs:[0m
[37m[2mClass 0 (Background): IoU = 0.462[0m
[37m[2mClass 1 (Building): IoU = 0.244[0m
[37m[2mClass 2 (Road): IoU = 0.140[0m
[37m[2mClass 3 (Water): Io

## Domain Adaptation techniques

### DACS: Domain Adaptation via Cross-domain mixed Sampling
Now we try to implement another **UDA** technique that is based on mixing samples from the two domains.

In [35]:
half_classes_num,m = divmod(NUM_CLASSES,2)
half_classes_num += m*np.random.randint(0,2) # in case NUM_CLASSES IS ODD

def DACS_mix_batch(source_batch,source_labels,target_batch,target_pseudo_labels):
  # initialize the mixed items with a clone of the target
  dacs_batch = target_batch.clone()
  dacs_labels = target_pseudo_labels.clone()

  # create a mask with half of the classes from source
  mask_classes = np.random.choice(NUM_CLASSES, half_classes_num, replace=False) # select a subset of classes
  mask = torch.isin(source_labels,mask_classes) # create a mask with them

  # replaces in target parts of the mask with the source
  dacs_batch[mask] = source_batch[mask]
  dacs_labels[mask] = source_labels[mask]
  return dacs_batch, dacs_labels

In [None]:
BEST_TRF_INDEX = 0
best_trf = train_transform_augmentations[BEST_TRF_INDEX]

In [None]:
# Train Datasets (urban and DACS mix of urban and rural with pseudo-labels)
urban_train_augmented = LoveDA(TRAIN_PATH, DataType.URBAN, transforms=best_trf) # best augmentation
rural_train = LoveDA(TRAIN_PATH, DataType.RURAL,transforms=train_transform) # base transformation
urban_train_dataloader_augmented = DataLoader(urban_train_augmented, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, drop_last=True)

# Train dataloaders (urban and DACS mix of urban and rural with pseudo-labels)

rural_train_dataloader = DataLoader(rural_train, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, drop_last=True)

# Val dataset (only rural)
rural_val = LoveDA(VAL_PATH, DataType.RURAL, transforms=test_transform)

# Rural Dataloaders (train, val, test)
NUM_WORKERS = 2 if DBG else 4
rural_val_dataloader = DataLoader(rural_val, shuffle=False, num_workers=NUM_WORKERS, drop_last=False)
weights_path = PIDNET_DACS_FINAL_WEIGHTS_PATH

In [None]:
import warnings
from torch.backends import cudnn

warnings.filterwarnings('ignore')
cudnn.benchmark
CUDA_LAUNCH_BLOCKING=1

model = load_model_weights(model,PIDNET_PRETRAIN_WEIGHTS_PATH).to(DEVICE)

iterPerEpoch = len(urban_train_dataloader_augmented)


best_IoU = 0

print(Fore.BLACK+Back.RED+Style.BRIGHT+f"Training PIDNet with LoveDA-URBAN with augmentation v{i+1}"+Style.RESET_ALL)

for epoch in range(NUM_EPOCHS):
    model.train()
    epoch_loss = 0
    iou_meter = IoUMeter()

    for i, (urban_inputs, urban_targets) in enumerate(urban_train_dataloader_augmented):
        optimizer_fn.zero_grad()

        # send inputs to gpu
        inputs = inputs.to(DEVICE)
        labels = targets.long().to(device=DEVICE)

        # feeds in the model
        output_logits,_,_ = model(inputs)

        h, w = labels.size(1), labels.size(2)
        ph, pw = output_logits.size(2), output_logits.size(3)
        if ph != h or pw != w:
          output_logits = F.interpolate(output_logits, size=(h, w), mode='bilinear', align_corners=True)


        # compute loss
        loss = sem_criterion(output_logits, labels)
        """
        filler = torch.ones_like(labels) * config.TRAIN.IGNORE_LABEL
        bd_label = torch.where(F.sigmoid(outputs[-1][:,0,:,:])>0.8, labels, filler)
        loss_sb = self.sem_loss(outputs[-2], bd_label)
        loss += loss_sb
        """

        # backward loss and optimizer step
        loss.backward()
        optimizer_fn.step()

        #compute the training accuracy
        _, predicted = torch.max(output_logits.data, 1)

        iou_meter.addCouple(predicted,labels)

        step_loss = loss.data.item()
        epoch_loss += step_loss

    # FINAL EPOCH-WISE COMPUTATIONS
    avg_loss = epoch_loss/iterPerEpoch
    stat_plot(avg_loss,iou_meter,mode=StatPlotMode.TRAINING,epoch=epoch)

    if validate:
        model.eval()
        val_loss = 0
        iterPerVal = len(rural_val_dataloader)
        iou_meter = IoUMeter()
        for j, (inputs, targets) in enumerate(rural_val_dataloader):

            # feeds in model
            inputs = inputs.to(DEVICE)
            labels = targets.long().to(device=DEVICE)
            output_logits,_,_ = model(inputs)
            output_logits = F.interpolate(output_logits, size=labels.shape[1:], mode='bilinear', align_corners=True)

            # compute loss
            loss = sem_criterion(output_logits, labels)

            # compute the training accuracy
            _, predicted = torch.max(output_logits.data, 1)

            iou_meter.addCouple(predicted,labels)

            step_loss = loss.data.item()
            val_loss += step_loss


        # FINAL EPOCH-WISE COMPUTATIONS
        avg_loss = val_loss/iterPerVal
        mean_IoU = iou_meter.getTotal()
        stat_plot(avg_loss,iou_meter,mode=StatPlotMode.VALIDATION,epoch=epoch)

        if mean_IoU > best_IoU:
            best_IoU = mean_IoU
            save_model_weights(model,weights_path)
        # END OF VALIDATION

optim_scheduler.step()
# END OF TRAINING


testing

In [None]:
# TESTING ON BEST WEIGHTS
print(Fore.BLACK+Back.CYAN+Style.BRIGHT+f"Testing PIDNet with LoveDA-URBAN with augmentation v{i+1}"+Style.RESET_ALL)
model = load_model_weights(model,weights_path).to(DEVICE)
model.eval()
val_loss = 0
iterPerVal = len(rural_val_dataloader)
iou_meter = IoUMeter()
for j, (inputs, targets) in enumerate(rural_val_dataloader):

    # feeds in model
    inputs = inputs.to(DEVICE)
    labels = targets.long().to(device=DEVICE)
    output_logits,_,_ = model(inputs)
    output_logits = F.interpolate(output_logits, size=labels.shape[1:], mode='bilinear', align_corners=True)

    # compute loss
    loss = sem_criterion(output_logits, labels)


    # compute the training accuracy
    _, predicted = torch.max(output_logits.data, 1)

    iou_meter.addCouple(predicted,labels)

    step_loss = loss.data.item()
    val_loss += step_loss


# FINAL EPOCH-WISE COMPUTATIONS
avg_loss = val_loss/iterPerVal
stat_plot(avg_loss,iou_meter,mode=StatPlotMode.TESTING)
# END OF TESTING

## 5

In [None]:

import cv2
def canny_with_cv2(images_tensors, low_threshold=0.1, high_threshold=0.2):
    # Convert to NumPy
    edges_tensors = images_tensors.clone().cpu()
    for i,img in enumerate(edges_tensors):
      # Convert to NumPy
      image_np = img.numpy()
      edges_np = cv2.Canny((image_np*255).astype('uint8'), low_threshold, high_threshold)

      # Convert back to tensor
      edges_tensors[i] = torch.from_numpy(edges_np).float() / 255.0
    return edges_tensors