<a href="https://colab.research.google.com/github/aledima00/Project4_SemSeg_AML2024/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Project 4 - Semantic Segmentation

First let's download dataset, that is already split in "Train", "Test" and "Val" modules

In [2]:
!pip install colorama | tail -n 1
!pip install icecream | tail -n 1
!pip install albumentations | tail -n 1
!pip install yacs | tail -n 1
!pip install fvcore | tail -n 1
!pip install tqdm | tail -n 1



In [3]:
import os
import glob
from skimage.io import imread
import logging
from enum import Enum
import gdown

from icecream import ic

import torch
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
from torchvision import transforms

from PIL import Image

import numpy as np

import albumentations

  check_for_updates()


General Configuration:

In [65]:
DBG = False                   # set to True for debug mode (lighter execution + dbg prints)
CONFIG_DATASET = True         # set to True to download and config all dataset resources
CONFIG_DEEPLABV2 = True       # set to True to download and config all DeepLabv2 resources
CONFIG_PIDNET = True          # set to True to download and config all PIDNET resources

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE}")
TRAIN_PATH = "Train"          # path of the train folder
VAL_PATH = "Val"              # path of the val folder
TEST_PATH = "Test"            # path of the test folder
MODELS_FOLDER = "saved_models"
if not os.path.isdir(MODELS_FOLDER):
    os.makedirs(MODELS_FOLDER)

DEEPLABV2_WEIGHTS_PATH = "deeplabv2-pretrain-weights.pth"  # path of the deeplabv2 folder
PIDNET_WEIGHTS_PATH = "PIDNet/pretrained_models/imagenet/imagenet.pth.tar"

Using device: cuda


download datasets:

In [5]:
def config_generic_dataset(DS_PATHNAME,URL):
  !rm -rf {DS_PATHNAME}
  ZIP_PATH = DS_PATHNAME + ".zip"
  !rm {ZIP_PATH}
  !wget -O {ZIP_PATH} {URL}
  !unzip {ZIP_PATH} | tail -n 3
  !rm {ZIP_PATH}

def config_train_dataset():
  config_generic_dataset(TRAIN_PATH, "https://zenodo.org/records/5706578/files/Train.zip?download=1")
def config_val_dataset():
  config_generic_dataset(VAL_PATH, "https://zenodo.org/records/5706578/files/Val.zip?download=1")
def config_test_dataset():
  config_generic_dataset(TEST_PATH, "https://zenodo.org/records/5706578/files/Test.zip?download=1")

def config_all_dataset(*,force=False):
  print("Dowloading and Configuring Dataset")
  if force or (not os.path.exists(TRAIN_PATH)):
    config_train_dataset()
  if force or (not os.path.exists(VAL_PATH)):
    config_val_dataset()
  if force or (not os.path.exists(TEST_PATH)):
    config_test_dataset()

download and configure deeplabv2 model library (_with R101 backbone_) and the pretrain weights:

In [6]:
def config_deeplabv2_model():
  print("Dowloading and Configuring DeepLabv2 Model")
  import sys
  import gdown
  !rm -rf "MLDL2024_project1"
  !git clone https://github.com/Gabrysse/MLDL2024_project1.git
  sys.path.append("/content/MLDL2024_project1/")
  gdown.download("https://drive.google.com/uc?id=1ZX0UCXvJwqd2uBGCX7LI2n-DfMg3t74v", DEEPLABV2_WEIGHTS_PATH, quiet=False)


config pidnet...

In [7]:
def config_pidnet():
  import sys
  import gdown
  print("Dowloading and Configuring PIDNET Model")
  !rm -rf "PIDNet"
  !git clone https://github.com/XuJiacong/PIDNet.git
  sys.path.append("/content/PIDNet/")
  gdown.download("https://drive.google.com/uc?id=1hIBp_8maRr60-B3PF0NVtaA6TYBvO4y-", PIDNET_WEIGHTS_PATH, quiet=False)


In [8]:
if CONFIG_DATASET:
  config_all_dataset()
if CONFIG_DEEPLABV2:
  config_deeplabv2_model()
if CONFIG_PIDNET:
  config_pidnet()

Dowloading and Configuring Dataset
rm: cannot remove 'Train.zip': No such file or directory
--2025-01-03 21:43:00--  https://zenodo.org/records/5706578/files/Train.zip?download=1
Resolving zenodo.org (zenodo.org)... 188.185.48.194, 188.185.43.25, 188.185.45.92, ...
Connecting to zenodo.org (zenodo.org)|188.185.48.194|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4021669263 (3.7G) [application/octet-stream]
Saving to: ‘Train.zip’


2025-01-03 21:48:35 (11.5 MB/s) - ‘Train.zip’ saved [4021669263/4021669263]

  inflating: Train/Urban/masks_png/2519.png  
  inflating: Train/Urban/masks_png/2520.png  
  inflating: Train/Urban/masks_png/2521.png  
rm: cannot remove 'Val.zip': No such file or directory
--2025-01-03 21:49:15--  https://zenodo.org/records/5706578/files/Val.zip?download=1
Resolving zenodo.org (zenodo.org)... 188.185.43.25, 188.185.48.194, 188.185.45.92, ...
Connecting to zenodo.org (zenodo.org)|188.185.43.25|:443... connected.
HTTP request sent, await

Downloading...
From (original): https://drive.google.com/uc?id=1ZX0UCXvJwqd2uBGCX7LI2n-DfMg3t74v
From (redirected): https://drive.google.com/uc?id=1ZX0UCXvJwqd2uBGCX7LI2n-DfMg3t74v&confirm=t&uuid=face8b86-8c9a-4ebf-9bbc-64456d51932b
To: /content/deeplabv2-pretrain-weights.pth
100%|██████████| 177M/177M [00:05<00:00, 32.8MB/s]

Dowloading and Configuring PIDNET Model





Cloning into 'PIDNet'...
remote: Enumerating objects: 386, done.[K
remote: Counting objects: 100% (193/193), done.[K
remote: Compressing objects: 100% (68/68), done.[K
remote: Total 386 (delta 131), reused 125 (delta 125), pack-reused 193 (from 1)[K
Receiving objects: 100% (386/386), 212.80 MiB | 15.37 MiB/s, done.
Resolving deltas: 100% (184/184), done.


Downloading...
From: https://drive.google.com/uc?id=1hIBp_8maRr60-B3PF0NVtaA6TYBvO4y-
To: /content/PIDNet/pretrained_models/imagenet/imagenet.pth.tar
100%|██████████| 38.1M/38.1M [00:01<00:00, 28.5MB/s]


In [9]:
from colorama import Fore, Back, Style
def dbgp(name,value):
  """ Debug print function """
  if DBG:
    print(f"{Fore.BLACK}{Back.GREEN}{Style.BRIGHT}{name}:\t{value}{Fore.RESET}{Back.RESET}{Style.RESET_ALL}")

## Data Preparation

Create Dataset class and filter urban pictures...

In [10]:
NUM_CLASSES = 7
BATCH_SIZE = 2 if DBG else 16
IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STDDEV = (0.229, 0.224, 0.225)

In [11]:
import albumentations as A
from albumentations.pytorch import ToTensorV2

train_transform = A.Compose([
      A.Resize(256,256),
      A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STDDEV),
      ToTensorV2()
  ])

test_transform = A.Compose([
    A.Resize(256,256),
    A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STDDEV),
    ToTensorV2()
])


In [12]:
# taken from official repo of LoveDA
IGNORE_INDEX = -1
COLOR_MAP = {
    IGNORE_INDEX:"IGNORE",
    0:"Background",
    1:"Building",
    2:"Road",
    3:"Water",
    4:"Barren",
    5:"Forest",
    6:"Agricultural"
}
CLASSES = list(key for key in COLOR_MAP.keys() if COLOR_MAP[key] != "IGNORE")

In [13]:
def pil_loader(path,*,format:str="RGB"):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert(format)

class DataType(Enum):
  RURAL = 0
  URBAN = 1

class LoveDA(Dataset):
  def __init__(self, basedir, data_type:DataType, transforms=None):
    #super(LoveDA, self).__init__(basedir, transforms, target_transform) # should we do this??
    if data_type == DataType.RURAL:
        self.base_path = os.path.join(basedir, "Rural")
    else: #data_type == DataType.URBAN:
        self.base_path = os.path.join(basedir, "Urban")


    # list of integers that identifies paths relative to both images_png and masks_png
    self.int_pathrefs = os.listdir(os.path.join(self.base_path, "images_png"))
    self.int_pathrefs = list(int(st.split(".")[0]) for st in self.int_pathrefs)

    # DEBUG PRINT
    if DBG:
      self.int_pathrefs = self.int_pathrefs[:15] # limit the dataset for debug

    self.transforms = transforms

  def __getitem__(self, idx):
    assert idx < len(self), 'Index out of range'
    image_path = os.path.join(self.base_path, "images_png", str(self.int_pathrefs[idx]) + ".png")
    mask_path = os.path.join(self.base_path, "masks_png", str(self.int_pathrefs[idx]) + ".png")
    image = pil_loader(image_path,format="RGB")
    mask = pil_loader(mask_path,format="L")

    # Convert PIL images to numpy arrays
    image = np.array(image)
    mask = np.array(mask, dtype=np.int8)

    if self.transforms is not None:
      augmented = self.transforms(image=image, mask=mask)
      image = augmented["image"]
      mask = augmented["mask"]

    mask -= 1

    return image, mask

  def __len__(self):
    return len(self.int_pathrefs)


In [14]:
# Urban Datasets (train, val, test)
urban_train = LoveDA(TRAIN_PATH, DataType.URBAN, transforms=train_transform)
urban_val = LoveDA(VAL_PATH, DataType.URBAN, transforms=test_transform)

# Urban Dataloaders (train, val, test)

NUM_WORKERS = 2 if DBG else 4
urban_train_dataloader = DataLoader(urban_train, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, drop_last=True)
urban_val_dataloader = DataLoader(urban_val, shuffle=False, num_workers=NUM_WORKERS, drop_last=False)



## Utility Functions

flops, parameters and latency are computed for a model using the function `analyze_model`

In [41]:
from fvcore.nn import FlopCountAnalysis, parameter_count
import time
from tqdm.auto import tqdm

import warnings
from torch.backends import cudnn

warnings.filterwarnings('ignore')
cudnn.benchmark
CUDA_LAUNCH_BLOCKING=1


def analyze_model(model,*,iterations=100,batch_size=1):

  model.training=False
  height = 256
  width = 256
  image = torch.zeros((batch_size,3, height, width)).to(DEVICE)

  # Set the model to evaluation mode to avoid issues with batch normalization
  model.eval()

  flops = FlopCountAnalysis(model, image)
  params = parameter_count(model)['']
  print(f"Model FLOPs: {flops.total()}")
  print(f"Model Parameters: {params}")
  #print(f"table:\n{flop_count_table(flops)}")


  # latency compute
  latency = list()
  start_event = torch.cuda.Event(enable_timing=True)
  end_event = torch.cuda.Event(enable_timing=True)
  for _ in tqdm(range(iterations)):
    start_event.record()  # Record start time on GPU
    _ = model(image)      # Run inference
    end_event.record()    # Record end time on GPU

    # Wait for GPU synchronization to ensure accurate timing
    torch.cuda.synchronize()

    # Calculate time elapsed in milliseconds
    latency.append(start_event.elapsed_time(end_event))
  avg_latency = sum(latency) / len(latency)
  print(f"Average latency: {avg_latency:.2f} ms")

the model weights can be saved and reloaded using the following functions:

In [56]:
def save_model_weights(model,model_name):
  save_path = os.path.join(MODELS_FOLDER, model_name)
  print(f"Saving Model {model_name} to {save_path}...")
  torch.save(model.state_dict(), save_path)
  print("Done!")
  return save_path

def load_model_weights(model, weights_path):
    if weights_path is None:
        return model
    weights_dict = torch.load(weights_path, map_location='cpu')
    if 'state_dict' in weights_dict:
        weights_dict = weights_dict['state_dict']
    model_dict = model.state_dict()
    msg = 'Loaded {} parameters!'.format(len(weights_dict))
    model_dict.update(weights_dict)
    model.load_state_dict(model_dict, strict = False)
    return model


## DeepLabv2 on LoveDA (Urban)

In [76]:
LR = 2e-4               # The initial Learning Rate
MOMENTUM = 0.9          # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-4     # Regularization, you can keep this at the default
NUM_EPOCHS = 20         # Total number of training epochs (iterations over dataset)
STEP_SIZE = [10, 15]    # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1             # Multiplicative factor for learning rate step-down

Get DeepLabv2 model with pretrain weights:

In [77]:
from MLDL2024_project1.models.deeplabv2 import deeplabv2

model = deeplabv2.get_deeplab_v2(num_classes=NUM_CLASSES,pretrain=True,pretrain_model_path=DEEPLABV2_WEIGHTS_PATH)
model_name = "deeplab_v2"
weights_path = None

Deeplab pretraining loading...


### Optimizer, Loss, ecc.

In [78]:
# enable validation during training
validate = True

model = model.to(DEVICE) # switch to GPU

# Loss (as said in DeepLabv2 docs)
loss_fn = nn.CrossEntropyLoss(ignore_index=IGNORE_INDEX)

# Opt
trainable_params = [p for p in model.parameters() if p.requires_grad]
optimizer_fn = optim.Adam(trainable_params, lr=LR, weight_decay=WEIGHT_DECAY, eps=1e-4)

#Scheduler
optim_scheduler = optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=STEP_SIZE, gamma=GAMMA)

In [79]:
logging.getLogger("fvcore.nn.jit_analysis").setLevel(logging.ERROR)
analyze_model(model,iterations=1000,batch_size=1)

Model FLOPs: 47669164800
Model Parameters: 43016284


  0%|          | 0/1000 [00:00<?, ?it/s]

Average latency: 37.59 ms


### Training Loop

In [80]:
import warnings
from torch.backends import cudnn
warnings.filterwarnings('ignore')

train_iter = 0
iterPerEpoch = len(urban_train_dataloader)

cudnn.benchmark
CUDA_LAUNCH_BLOCKING=1

best_IoU = 0

EPSILON_IOU = 1e-7

for epoch in range(NUM_EPOCHS):
    model.train(True)
    epoch_loss = 0

    total_intersection_per_class = {cls: 0 for cls in CLASSES}
    total_union_per_class = {cls: 0 for cls in CLASSES}

    for i, (inputs, targets) in enumerate(urban_train_dataloader):

        optimizer_fn.zero_grad()

        # feeds in model
        inputs = inputs.to(DEVICE)
        labels = targets.long().to(device=DEVICE)
        output_logits,_,_ = model(inputs)

        # compute loss
        loss = loss_fn(output_logits, labels)

        # backward loss and optimizer step
        loss.backward()
        optimizer_fn.step()

        #compute the training accuracy
        _, predicted = torch.max(output_logits.data, 1)

        for cls in CLASSES:
            true_mask = (labels == cls)  # Crea una maschera booleana per la classe `cls` nel target
            pred_mask = (predicted == cls)  # Crea una maschera booleana per la classe `cls` nelle predizioni

            # Calcola l'intersezione e l'unione per quella classe
            intersection = torch.logical_and(true_mask, pred_mask).sum().item()
            union = torch.logical_or(true_mask, pred_mask).sum().item()

            # Aggiungi i valori all'array totale di intersezione e unione per ogni classe
            total_intersection_per_class[cls] += intersection
            total_union_per_class[cls] += union

        step_loss = loss.data.item()
        epoch_loss += step_loss

    # FINAL EPOCH-WISE COMPUTATIONS
    class_IoUs = { cls: total_intersection_per_class[cls] / (total_union_per_class[cls] + EPSILON_IOU) for cls in CLASSES }
    mean_IoU = sum(class_IoUs.values()) / NUM_CLASSES
    avg_loss = epoch_loss/iterPerEpoch
    print(Fore.GREEN + Style.NORMAL + 'Train: Epoch = {} | mean Loss = {:.3f} | mean-IoU = {:.3f}'.format(epoch + 1, avg_loss, mean_IoU)+Style.RESET_ALL)

    if validate:
        iterPerVal = len(urban_val_dataloader)
        model.eval()
        val_loss = 0
        total_intersection_per_class = {cls: 0 for cls in CLASSES}
        total_union_per_class = {cls: 0 for cls in CLASSES}
        for j, (inputs, targets) in enumerate(urban_val_dataloader):

            # feeds in model
            inputs = inputs.to(DEVICE)
            labels = targets.long().to(device=DEVICE)
            output_logits = model(inputs)

            # compute loss
            loss = loss_fn(output_logits, labels)

            # compute the training accuracy
            _, predicted = torch.max(output_logits.data, 1)

            for cls in CLASSES:
                true_mask = (labels == cls)  # Crea una maschera booleana per la classe `cls` nel target
                pred_mask = (predicted == cls)  # Crea una maschera booleana per la classe `cls` nelle predizioni

                # Calcola l'intersezione e l'unione per quella classe
                intersection = torch.logical_and(true_mask, pred_mask).sum().item()
                union = torch.logical_or(true_mask, pred_mask).sum().item()

                # Aggiungi i valori all'array totale di intersezione e unione per ogni classe
                total_intersection_per_class[cls] += intersection
                total_union_per_class[cls] += union

            step_loss = loss.data.item()
            val_loss += step_loss


        # FINAL EPOCH-WISE COMPUTATIONS
        class_IoUs = { cls: total_intersection_per_class[cls] / (total_union_per_class[cls] + EPSILON_IOU) for cls in CLASSES }
        mean_IoU = sum(class_IoUs.values()) / NUM_CLASSES
        avg_loss = val_loss/iterPerVal
        print(Fore.BLACK + Back.YELLOW + Style.BRIGHT + 'VALIDATION RESULTS (@epoch={}): mean Loss = {:.3f} | mean-IoU = {:.3f}'.format(epoch+1, avg_loss, mean_IoU)+Style.RESET_ALL)
        # print IoU for each class
        print(Fore.CYAN + Style.NORMAL + "Class-wise IoUs:"+ Style.RESET_ALL)
        for cls in CLASSES:
            print(Fore.WHITE + Style.DIM + f"Class {cls} ({COLOR_MAP[cls]}): IoU = {class_IoUs[cls]:.3f}"+ Style.RESET_ALL)

        if mean_IoU > best_IoU:
            best_IoU = mean_IoU
            weights_path = save_model_weights(model,model_name)
        # END OF VALIDATION

    optim_scheduler.step()


[32m[22mTrain: Epoch = 1 | mean Loss = 1.363 | mean-IoU = 0.246[0m
[30m[43m[1mVALIDATION RESULTS (@epoch=1): mean Loss = 2.594 | mean-IoU = 0.179[0m
[36m[22mClass-wise IoUs:[0m
[37m[2mClass 0 (Background): IoU = 0.251[0m
[37m[2mClass 1 (Building): IoU = 0.299[0m
[37m[2mClass 2 (Road): IoU = 0.356[0m
[37m[2mClass 3 (Water): IoU = 0.319[0m
[37m[2mClass 4 (Barren): IoU = 0.017[0m
[37m[2mClass 5 (Forest): IoU = 0.001[0m
[37m[2mClass 6 (Agricultural): IoU = 0.009[0m
Saving Model deeplab_v2 to saved_models/deeplab_v2...
Done!


KeyboardInterrupt: 

Now we test using the val dataset:

In [81]:
model = load_model_weights(model,weights_path).to(DEVICE)
model.eval()
iterPerVal = len(urban_val_dataloader)
val_loss = 0
total_intersection_per_class = {cls: 0 for cls in CLASSES}
total_union_per_class = {cls: 0 for cls in CLASSES}
for j, (inputs, targets) in enumerate(urban_val_dataloader):

    # feeds in model
    inputs = inputs.to(DEVICE)
    labels = targets.long().to(device=DEVICE)
    output_logits = model(inputs)

    # compute loss
    loss = loss_fn(output_logits, labels)

    # compute the training accuracy
    _, predicted = torch.max(output_logits.data, 1)

    for cls in CLASSES:
        true_mask = (labels == cls)  # Crea una maschera booleana per la classe `cls` nel target
        pred_mask = (predicted == cls)  # Crea una maschera booleana per la classe `cls` nelle predizioni

        # Calcola l'intersezione e l'unione per quella classe
        intersection = torch.logical_and(true_mask, pred_mask).sum().item()
        union = torch.logical_or(true_mask, pred_mask).sum().item()

        # Aggiungi i valori all'array totale di intersezione e unione per ogni classe
        total_intersection_per_class[cls] += intersection
        total_union_per_class[cls] += union

    step_loss = loss.data.item()
    val_loss += step_loss


# FINAL EPOCH-WISE COMPUTATIONS
class_IoUs = { cls: total_intersection_per_class[cls] / (total_union_per_class[cls] + EPSILON_IOU) for cls in CLASSES }
mean_IoU = sum(class_IoUs.values()) / NUM_CLASSES
avg_loss = val_loss/iterPerVal
print(Fore.BLACK + Back.YELLOW + Style.BRIGHT + 'TEST RESULTS: mean Loss = {:.3f} | mean-IoU = {:.3f}'.format(epoch+1, avg_loss, mean_IoU)+Style.RESET_ALL)
# print IoU for each class
print(Fore.CYAN + Style.NORMAL + "Class-wise IoUs:"+ Style.RESET_ALL)
for cls in CLASSES:
    print(Fore.WHITE + Style.DIM + f"Class {cls} ({COLOR_MAP[cls]}): IoU = {class_IoUs[cls]:.3f}"+ Style.RESET_ALL)

[30m[43m[1mTEST RESULTS: mean Loss = 2.000 | mean-IoU = 2.594[0m
[36m[22mClass-wise IoUs:[0m
[37m[2mClass 0 (Background): IoU = 0.251[0m
[37m[2mClass 1 (Building): IoU = 0.299[0m
[37m[2mClass 2 (Road): IoU = 0.356[0m
[37m[2mClass 3 (Water): IoU = 0.319[0m
[37m[2mClass 4 (Barren): IoU = 0.017[0m
[37m[2mClass 5 (Forest): IoU = 0.001[0m
[37m[2mClass 6 (Agricultural): IoU = 0.009[0m


## PIDNet & LoveDA

In [68]:
from PIDNet.models.pidnet import PIDNet
model = PIDNet(m=2, n=3, num_classes=NUM_CLASSES, planes=32, ppm_planes=96, head_planes=128, augment=True)
model = load_model_weights(model,PIDNET_WEIGHTS_PATH)
model_name = "PIDNet"
weights_path = None

### Optimizer, Loss, ecc.

In [69]:
LR = 2e-4               # The initial Learning Rate
MOMENTUM = 0.9          # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-4     # Regularization, you can keep this at the default
NUM_EPOCHS = 20         # Total number of training epochs (iterations over dataset)
STEP_SIZE = [10, 15]    # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1             # Multiplicative factor for learning rate step-down

In [70]:
# enable validation during training
validate = True

model.train()
model = model.to(DEVICE) # switch to GPU

# loss functions
sem_criterion = nn.CrossEntropyLoss(ignore_index=IGNORE_INDEX)

#Opt
trainable_params = [p for p in model.parameters() if p.requires_grad]
optimizer_fn = optim.Adam(trainable_params, lr=LR, weight_decay=WEIGHT_DECAY, eps=1e-4)

#Scheduler
optim_scheduler = optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=STEP_SIZE, gamma=GAMMA)

# model = FullModel(model, sem_loss=sem_criterion, bd_loss=bd_criterion)


In [71]:
analyze_model(model,iterations=1000,batch_size=1)

Model FLOPs: 1579333632
Model Parameters: 7717839


  0%|          | 0/1000 [00:00<?, ?it/s]

Average latency: 14.52 ms


### Training loop

In [72]:
import warnings

from torch.backends import cudnn
warnings.filterwarnings('ignore')

iterPerEpoch = len(urban_train_dataloader)

cudnn.benchmark
CUDA_LAUNCH_BLOCKING=1

best_IoU = 0
EPSILON_IOU = 1e-7

for epoch in range(NUM_EPOCHS):
    model.train()
    epoch_loss = 0
    total_intersection_per_class = {cls: 0 for cls in CLASSES}
    total_union_per_class = {cls: 0 for cls in CLASSES}

    for i, (inputs, targets) in enumerate(urban_train_dataloader):
        optimizer_fn.zero_grad()

        # send inputs to gpu
        inputs = inputs.to(DEVICE)
        labels = targets.long().to(device=DEVICE)

        # feeds in the model
        output_logits,_,_ = model(inputs)

        h, w = labels.size(1), labels.size(2)
        ph, pw = output_logits.size(2), output_logits.size(3)
        if ph != h or pw != w:
          output_logits = F.interpolate(output_logits, size=(h, w), mode='bilinear', align_corners=True)


        # compute loss
        loss = sem_criterion(output_logits, labels)
        """
        filler = torch.ones_like(labels) * config.TRAIN.IGNORE_LABEL
        bd_label = torch.where(F.sigmoid(outputs[-1][:,0,:,:])>0.8, labels, filler)
        loss_sb = self.sem_loss(outputs[-2], bd_label)
        loss += loss_sb
        """

        # backward loss and optimizer step
        loss.backward()
        optimizer_fn.step()

        #compute the training accuracy
        _, predicted = torch.max(output_logits.data, 1)

        for cls in CLASSES:
            true_mask = (labels == cls)  # Crea una maschera booleana per la classe `cls` nel target
            pred_mask = (predicted == cls)  # Crea una maschera booleana per la classe `cls` nelle predizioni

            # Calcola l'intersezione e l'unione per quella classe
            intersection = torch.logical_and(true_mask, pred_mask).sum().item()
            union = torch.logical_or(true_mask, pred_mask).sum().item()

            # Aggiungi i valori all'array totale di intersezione e unione per ogni classe
            total_intersection_per_class[cls] += intersection
            total_union_per_class[cls] += union

        step_loss = loss.data.item()
        epoch_loss += step_loss

    # FINAL EPOCH-WISE COMPUTATIONS
    class_IoUs = { cls: total_intersection_per_class[cls] / (total_union_per_class[cls] + EPSILON_IOU) for cls in CLASSES }
    mean_IoU = sum(class_IoUs.values()) / NUM_CLASSES
    avg_loss = epoch_loss/iterPerEpoch
    print(Fore.GREEN + Style.NORMAL + 'Train: Epoch = {} | mean Loss = {:.3f} | mean-IoU = {:.3f}'.format(epoch + 1, avg_loss, mean_IoU)+Style.RESET_ALL)

    # Stampa l'IoU per ogni classe
    print(Fore.CYAN + Style.NORMAL + "Class-wise IoUs:"+ Style.RESET_ALL)
    for cls in CLASSES:
        print(Fore.WHITE + Style.DIM + f"Class {cls} ({COLOR_MAP[cls]}): IoU = {class_IoUs[cls]:.3f}"+ Style.RESET_ALL)

    if validate:
        model.eval()
        val_loss = 0
        iterPerVal = len(urban_val_dataloader)
        numCorr = 0
        total_intersection_per_class = {cls: 0 for cls in CLASSES}
        total_union_per_class = {cls: 0 for cls in CLASSES}
        for j, (inputs, targets) in enumerate(urban_val_dataloader):

            # feeds in model
            inputs = inputs.to(DEVICE)
            labels = targets.long().to(device=DEVICE)
            output_logits,_,_ = model(inputs)
            output_logits = F.interpolate(output_logits, size=labels.shape[1:], mode='bilinear', align_corners=True)

            # compute loss
            loss = sem_criterion(output_logits, labels)


            # compute the training accuracy
            _, predicted = torch.max(output_logits.data, 1)

            for cls in CLASSES:
                true_mask = (labels == cls)  # Crea una maschera booleana per la classe `cls` nel target
                pred_mask = (predicted == cls)  # Crea una maschera booleana per la classe `cls` nelle predizioni

                # Calcola l'intersezione e l'unione per quella classe
                intersection = torch.logical_and(true_mask, pred_mask).sum().item()
                union = torch.logical_or(true_mask, pred_mask).sum().item()

                # Aggiungi i valori all'array totale di intersezione e unione per ogni classe
                total_intersection_per_class[cls] += intersection
                total_union_per_class[cls] += union

            step_loss = loss.data.item()
            val_loss += step_loss


        # FINAL EPOCH-WISE COMPUTATIONS
        class_IoUs = { cls: total_intersection_per_class[cls] / (total_union_per_class[cls] + EPSILON_IOU) for cls in CLASSES }
        mean_IoU = sum(class_IoUs.values()) / NUM_CLASSES
        avg_loss = val_loss/iterPerVal
        print(Fore.BLACK + Back.YELLOW + Style.BRIGHT + 'VALIDATION RESULTS (@epoch={}): mean Loss = {:.3f} | mean-IoU = {:.3f}'.format(epoch+1, avg_loss, mean_IoU)+Style.RESET_ALL)

        if mean_IoU > best_IoU:
            best_IoU = mean_IoU
            weights_path = save_model_weights(model,model_name)
        # END OF VALIDATION

    optim_scheduler.step()

[32m[22mTrain: Epoch = 1 | mean Loss = 1.940 | mean-IoU = 0.203[0m
[36m[22mClass-wise IoUs:[0m
[37m[2mClass 0 (Background): IoU = 0.384[0m
[37m[2mClass 1 (Building): IoU = 0.281[0m
[37m[2mClass 2 (Road): IoU = 0.204[0m
[37m[2mClass 3 (Water): IoU = 0.124[0m
[37m[2mClass 4 (Barren): IoU = 0.179[0m
[37m[2mClass 5 (Forest): IoU = 0.227[0m
[37m[2mClass 6 (Agricultural): IoU = 0.020[0m
[30m[43m[1mVALIDATION RESULTS (@epoch=1): mean Loss = 1.949 | mean-IoU = 0.233[0m
Saving Model PIDNet to saved_models/PIDNet...
Done!


KeyboardInterrupt: 

In [73]:
model = load_model_weights(model,weights_path).to(DEVICE)
model.eval()
val_loss = 0
iterPerVal = len(urban_val_dataloader)
numCorr = 0
total_intersection_per_class = {cls: 0 for cls in CLASSES}
total_union_per_class = {cls: 0 for cls in CLASSES}
for j, (inputs, targets) in enumerate(urban_val_dataloader):

    # feeds in model
    inputs = inputs.to(DEVICE)
    labels = targets.long().to(device=DEVICE)
    output_logits,_,_ = model(inputs)
    output_logits = F.interpolate(output_logits, size=labels.shape[1:], mode='bilinear', align_corners=True)

    # compute loss
    loss = sem_criterion(output_logits, labels)


    # compute the training accuracy
    _, predicted = torch.max(output_logits.data, 1)

    for cls in CLASSES:
        true_mask = (labels == cls)  # Crea una maschera booleana per la classe `cls` nel target
        pred_mask = (predicted == cls)  # Crea una maschera booleana per la classe `cls` nelle predizioni

        # Calcola l'intersezione e l'unione per quella classe
        intersection = torch.logical_and(true_mask, pred_mask).sum().item()
        union = torch.logical_or(true_mask, pred_mask).sum().item()

        # Aggiungi i valori all'array totale di intersezione e unione per ogni classe
        total_intersection_per_class[cls] += intersection
        total_union_per_class[cls] += union

    step_loss = loss.data.item()
    val_loss += step_loss


# FINAL EPOCH-WISE COMPUTATIONS
class_IoUs = { cls: total_intersection_per_class[cls] / (total_union_per_class[cls] + EPSILON_IOU) for cls in CLASSES }
mean_IoU = sum(class_IoUs.values()) / NUM_CLASSES
avg_loss = val_loss/iterPerVal
print(Fore.BLACK + Back.YELLOW + Style.BRIGHT + 'VALIDATION RESULTS (@epoch={}): mean Loss = {:.3f} | mean-IoU = {:.3f}'.format(epoch+1, avg_loss, mean_IoU)+Style.RESET_ALL)

if mean_IoU > best_IoU:
    best_IoU = mean_IoU
    save_path = os.path.join(MODELS_FOLDER, model_name)
    print(f"New Best Model! Saving it to {save_path}")
    torch.save(model.state_dict(), save_path)
# END OF VALIDATION

[30m[43m[1mVALIDATION RESULTS (@epoch=2): mean Loss = 1.949 | mean-IoU = 0.233[0m


## Domain Shift

Add dataset and loaders for rural images:

In [None]:
# Rural Datasets (train, val, test)
rural_val = LoveDA(VAL_PATH, DataType.RURAL, transforms=test_transform)

# Urban Dataloaders (train, val, test)

NUM_WORKERS = 2 if DBG else 4
rural_val_dataloader = DataLoader(rural_val, shuffle=False, num_workers=NUM_WORKERS, drop_last=False)

first let's test with domain shift:

In [None]:
model.eval()
val_loss = 0
iterPerVal = len(rural_val_dataloader)
numCorr = 0
total_intersection_per_class = {cls: 0 for cls in CLASSES}
total_union_per_class = {cls: 0 for cls in CLASSES}
for j, (inputs, targets) in enumerate(rural_val_dataloader):

    # feeds in model
    inputs = inputs.to(DEVICE)
    labels = targets.long().to(device=DEVICE)
    output_logits,_,_ = model(inputs)
    output_logits = F.interpolate(output_logits, size=labels.shape[1:], mode='bilinear', align_corners=True)

    # compute loss
    loss = sem_criterion(output_logits, labels)


    # compute the training accuracy
    _, predicted = torch.max(output_logits.data, 1)

    for cls in CLASSES:
        true_mask = (labels == cls)  # Crea una maschera booleana per la classe `cls` nel target
        pred_mask = (predicted == cls)  # Crea una maschera booleana per la classe `cls` nelle predizioni

        # Calcola l'intersezione e l'unione per quella classe
        intersection = torch.logical_and(true_mask, pred_mask).sum().item()
        union = torch.logical_or(true_mask, pred_mask).sum().item()

        # Aggiungi i valori all'array totale di intersezione e unione per ogni classe
        total_intersection_per_class[cls] += intersection
        total_union_per_class[cls] += union

    step_loss = loss.data.item()
    val_loss += step_loss


# FINAL EPOCH-WISE COMPUTATIONS
class_IoUs = { cls: total_intersection_per_class[cls] / (total_union_per_class[cls] + EPSILON_IOU) for cls in CLASSES }
mean_IoU = sum(class_IoUs.values()) / NUM_CLASSES
avg_loss = val_loss/iterPerVal
print(Fore.BLACK + Back.YELLOW + Style.BRIGHT + 'VALIDATION RESULTS (@epoch={}): mean Loss = {:.3f} | mean-IoU = {:.3f}'.format(epoch+1, avg_loss, mean_IoU)+Style.RESET_ALL)

if mean_IoU > best_IoU:
    best_IoU = mean_IoU
    save_path = os.path.join(MODELS_FOLDER, model_name)
    print(f"New Best Model! Saving it to {save_path}")
    torch.save(model.state_dict(), save_path)
# END OF VALIDATION

In [None]:
aug1 = A.Compose([
          A.HorizontalFlip(),
          A.RandomRotate90(),
          A.ColorJitter()
          ],p= 0.5)
aug2 = A.Compose([
          A.RandomBrightnessContrast(),
          A.RandomGamma(),
          A.GaussianBlur(),
          ],p= 0.5)
aug3 = A.Compose([
          A.HorizontalFlip(),
          A.RandomRotate90(),
          A.ColorJitter(),
          A.RandomBrightnessContrast(),
          A.RandomGamma(),
          A.GaussianBlur(),
          ],p= 0.5)

augmentations = [aug1,aug2,aug3]


train_transform_augmentations =
[
  A.Compose([
      A.Resize(256,256),
      aug_i,
      A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STDDEV),
      ToTensorV2()
  ])
  for aug_i in augmentations
]


In [None]:
for i, trf in enumerate(train_transform_augmentations):

    # Urban Datasets (train, val, test)
    urban_train_augmented = LoveDA(TRAIN_PATH, DataType.URBAN, transforms=trf)
    rural_val = LoveDA(VAL_PATH, DataType.RURAL, transforms=test_transform)

    # Urban Dataloaders (train, val, test)

    NUM_WORKERS = 2 if DBG else 4
    urban_train_dataloader_augmented = DataLoader(urban_train, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, drop_last=True)
    urban_val_dataloader = DataLoader(rural_val, shuffle=False, num_workers=NUM_WORKERS, drop_last=False)

    import warnings

    from torch.backends import cudnn
    warnings.filterwarnings('ignore')

    iterPerEpoch = len(urban_train_dataloader)

    cudnn.benchmark
    CUDA_LAUNCH_BLOCKING=1

    best_IoU = 0
    EPSILON_IOU = 1e-7

    for epoch in range(NUM_EPOCHS):
        model.train()
        epoch_loss = 0
        total_intersection_per_class = {cls: 0 for cls in CLASSES}
        total_union_per_class = {cls: 0 for cls in CLASSES}

        for i, (inputs, targets) in enumerate(urban_train_dataloader):
            optimizer_fn.zero_grad()

            # send inputs to gpu
            inputs = inputs.to(DEVICE)
            labels = targets.long().to(device=DEVICE)

            # feeds in the model
            output_logits,_,_ = model(inputs)

            h, w = labels.size(1), labels.size(2)
            ph, pw = output_logits.size(2), output_logits.size(3)
            if ph != h or pw != w:
              output_logits = F.interpolate(output_logits, size=(h, w), mode='bilinear', align_corners=True)


            # compute loss
            loss = sem_criterion(output_logits, labels)
            """
            filler = torch.ones_like(labels) * config.TRAIN.IGNORE_LABEL
            bd_label = torch.where(F.sigmoid(outputs[-1][:,0,:,:])>0.8, labels, filler)
            loss_sb = self.sem_loss(outputs[-2], bd_label)
            loss += loss_sb
            """

            # backward loss and optimizer step
            loss.backward()
            optimizer_fn.step()

            #compute the training accuracy
            _, predicted = torch.max(output_logits.data, 1)

            for cls in CLASSES:
                true_mask = (labels == cls)  # Crea una maschera booleana per la classe `cls` nel target
                pred_mask = (predicted == cls)  # Crea una maschera booleana per la classe `cls` nelle predizioni

                # Calcola l'intersezione e l'unione per quella classe
                intersection = torch.logical_and(true_mask, pred_mask).sum().item()
                union = torch.logical_or(true_mask, pred_mask).sum().item()

                # Aggiungi i valori all'array totale di intersezione e unione per ogni classe
                total_intersection_per_class[cls] += intersection
                total_union_per_class[cls] += union

            step_loss = loss.data.item()
            epoch_loss += step_loss

        # FINAL EPOCH-WISE COMPUTATIONS
        class_IoUs = { cls: total_intersection_per_class[cls] / (total_union_per_class[cls] + EPSILON_IOU) for cls in CLASSES }
        mean_IoU = sum(class_IoUs.values()) / NUM_CLASSES
        avg_loss = epoch_loss/iterPerEpoch
        print(Fore.GREEN + Style.NORMAL + 'Train: Epoch = {} | mean Loss = {:.3f} | mean-IoU = {:.3f}'.format(epoch + 1, avg_loss, mean_IoU)+Style.RESET_ALL)

        # Stampa l'IoU per ogni classe
        print(Fore.CYAN + Style.NORMAL + "Class-wise IoUs:"+ Style.RESET_ALL)
        for cls in CLASSES:
            print(Fore.WHITE + Style.DIM + f"Class {cls} ({COLOR_MAP[cls]}): IoU = {class_IoUs[cls]:.3f}"+ Style.RESET_ALL)

        if validate:
            model.eval()
            val_loss = 0
            iterPerVal = len(rural_val_dataloader)
            total_intersection_per_class = {cls: 0 for cls in CLASSES}
            total_union_per_class = {cls: 0 for cls in CLASSES}
            for j, (inputs, targets) in enumerate(rural_val_dataloader):

                # feeds in model
                inputs = inputs.to(DEVICE)
                labels = targets.long().to(device=DEVICE)
                output_logits,_,_ = model(inputs)
                output_logits = F.interpolate(output_logits, size=labels.shape[1:], mode='bilinear', align_corners=True)

                # compute loss
                loss = sem_criterion(output_logits, labels)


                # compute the training accuracy
                _, predicted = torch.max(output_logits.data, 1)

                for cls in CLASSES:
                    true_mask = (labels == cls)  # Crea una maschera booleana per la classe `cls` nel target
                    pred_mask = (predicted == cls)  # Crea una maschera booleana per la classe `cls` nelle predizioni

                    # Calcola l'intersezione e l'unione per quella classe
                    intersection = torch.logical_and(true_mask, pred_mask).sum().item()
                    union = torch.logical_or(true_mask, pred_mask).sum().item()

                    # Aggiungi i valori all'array totale di intersezione e unione per ogni classe
                    total_intersection_per_class[cls] += intersection
                    total_union_per_class[cls] += union

                step_loss = loss.data.item()
                val_loss += step_loss


            # FINAL EPOCH-WISE COMPUTATIONS
            class_IoUs = { cls: total_intersection_per_class[cls] / (total_union_per_class[cls] + EPSILON_IOU) for cls in CLASSES }
            mean_IoU = sum(class_IoUs.values()) / NUM_CLASSES
            avg_loss = val_loss/iterPerVal
            print(Fore.BLACK + Back.YELLOW + Style.BRIGHT + 'VALIDATION RESULTS (@epoch={}): mean Loss = {:.3f} | mean-IoU = {:.3f}'.format(epoch+1, avg_loss, mean_IoU)+Style.RESET_ALL)

            if mean_IoU > best_IoU:
                best_IoU = mean_IoU
                save_path = os.path.join(MODELS_FOLDER, model_name)
                print(f"New Best Model! Saving it to {save_path}")
                torch.save(model.state_dict(), save_path)
            # END OF VALIDATION

        optim_scheduler.step()


## 5

In [None]:

import cv2
def canny_with_cv2(images_tensors, low_threshold=0.1, high_threshold=0.2):
    # Convert to NumPy
    edges_tensors = images_tensors.clone().cpu()
    for i,img in enumerate(edges_tensors):
      # Convert to NumPy
      image_np = img.numpy()
      edges_np = cv2.Canny((image_np*255).astype('uint8'), low_threshold, high_threshold)

      # Convert back to tensor
      edges_tensors[i] = torch.from_numpy(edges_np).float() / 255.0
    return edges_tensors