<a href="https://colab.research.google.com/github/aledima00/Project4_SemSeg_AML2024/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Project 4 - Semantic Segmentation

First let's download dataset, that is already split in "Train", "Test" and "Val" modules

In [1]:
!pip install colorama
!pip install icecream
!pip install albumentations
!pip install yacs



In [1]:
import os
import glob
from skimage.io import imread
import logging
from enum import Enum
import gdown

from icecream import ic

import torch
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
from torchvision import transforms

from PIL import Image

import numpy as np

import albumentations

  check_for_updates()


General Configuration:

In [2]:
DBG = False                   # set to True for debug mode (lighter execution + dbg prints)
CONFIG_DATASET = True         # set to True to download and config all dataset resources
CONFIG_DEEPLABV2 = True       # set to True to download and config all DeepLabv2 resources
CONFIG_PIDNET = True          # set to True to download and config all PIDNET resources

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE}")
TRAIN_PATH = "Train"          # path of the train folder
VAL_PATH = "Val"              # path of the val folder
TEST_PATH = "Test"            # path of the test folder

DEEPLABV2_WEIGHTS_PATH = "deeplabv2-pretrain-weights.pth"  # path of the deeplabv2 folder
PIDNET_WEIGHTS_PATH = "PIDNet/pretrained_models/imagenet/imagenet.pth.tar"

Using device: cuda


download datasets:

In [3]:
def config_generic_dataset(DS_PATHNAME,URL):
  !rm -rf {DS_PATHNAME}
  ZIP_PATH = DS_PATHNAME + ".zip"
  !rm {ZIP_PATH}
  !wget -O {ZIP_PATH} {URL}
  !unzip {ZIP_PATH} | tail -n 3
  !rm {ZIP_PATH}

def config_train_dataset():
  config_generic_dataset(TRAIN_PATH, "https://zenodo.org/records/5706578/files/Train.zip?download=1")
def config_val_dataset():
  config_generic_dataset(VAL_PATH, "https://zenodo.org/records/5706578/files/Val.zip?download=1")
def config_test_dataset():
  config_generic_dataset(TEST_PATH, "https://zenodo.org/records/5706578/files/Test.zip?download=1")

def config_all_dataset(*,force=False):
  print("Dowloading and Configuring Dataset")
  if force or (not os.path.exists(TRAIN_PATH)):
    config_train_dataset()
  if force or (not os.path.exists(VAL_PATH)):
    config_val_dataset()
  if force or (not os.path.exists(TEST_PATH)):
    config_test_dataset()

download and configure deeplabv2 model library (_with R101 backbone_) and the pretrain weights:

In [4]:
def config_deeplabv2_model():
  print("Dowloading and Configuring DeepLabv2 Model")
  import sys
  import gdown
  !rm -rf "MLDL2024_project1"
  !git clone https://github.com/Gabrysse/MLDL2024_project1.git
  sys.path.append("/content/MLDL2024_project1/")
  gdown.download("https://drive.google.com/uc?id=1ZX0UCXvJwqd2uBGCX7LI2n-DfMg3t74v", DEEPLABV2_WEIGHTS_PATH, quiet=False)


config pidnet...

In [5]:
def config_pidnet():
  import sys
  import gdown
  print("Dowloading and Configuring PIDNET Model")
  !rm -rf "PIDNet"
  !git clone https://github.com/XuJiacong/PIDNet.git
  sys.path.append("/content/PIDNet/")
  gdown.download("https://drive.google.com/uc?id=1hIBp_8maRr60-B3PF0NVtaA6TYBvO4y-", PIDNET_WEIGHTS_PATH, quiet=False)


In [6]:
if CONFIG_DATASET:
  config_all_dataset()
if CONFIG_DEEPLABV2:
  config_deeplabv2_model()
if CONFIG_PIDNET:
  config_pidnet()

Dowloading and Configuring Dataset
Dowloading and Configuring DeepLabv2 Model
Cloning into 'MLDL2024_project1'...
remote: Enumerating objects: 34, done.[K
remote: Counting objects: 100% (18/18), done.[K
remote: Compressing objects: 100% (14/14), done.[K
remote: Total 34 (delta 8), reused 4 (delta 4), pack-reused 16 (from 1)[K
Receiving objects: 100% (34/34), 12.06 KiB | 12.06 MiB/s, done.
Resolving deltas: 100% (8/8), done.


Downloading...
From (original): https://drive.google.com/uc?id=1ZX0UCXvJwqd2uBGCX7LI2n-DfMg3t74v
From (redirected): https://drive.google.com/uc?id=1ZX0UCXvJwqd2uBGCX7LI2n-DfMg3t74v&confirm=t&uuid=3423e5db-f2b7-455b-90d6-217390178487
To: /content/deeplabv2-pretrain-weights.pth
100%|██████████| 177M/177M [00:01<00:00, 110MB/s]


Dowloading and Configuring PIDNET Model
Cloning into 'PIDNet'...
remote: Enumerating objects: 386, done.[K
remote: Counting objects: 100% (193/193), done.[K
remote: Compressing objects: 100% (68/68), done.[K
remote: Total 386 (delta 131), reused 125 (delta 125), pack-reused 193 (from 1)[K
Receiving objects: 100% (386/386), 212.80 MiB | 28.60 MiB/s, done.
Resolving deltas: 100% (184/184), done.


Downloading...
From: https://drive.google.com/uc?id=1hIBp_8maRr60-B3PF0NVtaA6TYBvO4y-
To: /content/PIDNet/pretrained_models/imagenet/imagenet.pth.tar
100%|██████████| 38.1M/38.1M [00:00<00:00, 45.3MB/s]


In [7]:
from colorama import Fore, Back, Style
def dbgp(name,value):
  """ Debug print function """
  if DBG:
    print(f"{Fore.BLACK}{Back.GREEN}{Style.BRIGHT}{name}:\t{value}{Fore.RESET}{Back.RESET}{Style.RESET_ALL}")

## Data Preparation

Create Dataset class and filter urban pictures...

In [8]:
NUM_CLASSES = 7
BATCH_SIZE = 2 if DBG else 16
AUGMENT_DATA = True
IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STDDEV = (0.229, 0.224, 0.225)

In [9]:
import albumentations as A
from albumentations.pytorch import ToTensorV2

if AUGMENT_DATA:
  train_transform = A.Compose([
      A.Resize(256,256),
      A.HorizontalFlip(p=0.5),
      A.RandomRotate90(p=0.5),
      A.ColorJitter(p=0.4),
      A.RandomBrightnessContrast(p=0.2),
      A.RandomGamma(p=0.2),
      A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STDDEV),
      ToTensorV2()
  ])
else:
  train_transform = A.Compose([
      A.Resize(256,256),
      A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STDDEV),
      ToTensorV2()
  ])

test_transform = A.Compose([
    A.Resize(256,256),
    A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STDDEV),
    ToTensorV2()
])


In [10]:
# taken from official repo of LoveDA
IGNORE_INDEX = -1
COLOR_MAP = {
    IGNORE_INDEX:"IGNORE",
    0:"Background",
    1:"Building",
    2:"Road",
    3:"Water",
    4:"Barren",
    5:"Forest",
    6:"Agricultural"
}
CLASSES = list(key for key in COLOR_MAP.keys() if COLOR_MAP[key] != "IGNORE")

In [11]:
def pil_loader(path,*,format:str="RGB"):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert(format)

class DataType(Enum):
  RURAL = 0
  URBAN = 1

class LoveDA(Dataset):
  def __init__(self, basedir, data_type:DataType, transforms=None):
    #super(LoveDA, self).__init__(basedir, transforms, target_transform) # should we do this??
    if data_type == DataType.RURAL:
        self.base_path = os.path.join(basedir, "Rural")
    else: #data_type == DataType.URBAN:
        self.base_path = os.path.join(basedir, "Urban")


    # list of integers that identifies paths relative to both images_png and masks_png
    self.int_pathrefs = os.listdir(os.path.join(self.base_path, "images_png"))
    self.int_pathrefs = list(int(st.split(".")[0]) for st in self.int_pathrefs)

    # DEBUG PRINT
    if DBG:
      self.int_pathrefs = self.int_pathrefs[:15] # limit the dataset for debug

    self.transforms = transforms

  def __getitem__(self, idx):
    assert idx < len(self), 'Index out of range'
    image_path = os.path.join(self.base_path, "images_png", str(self.int_pathrefs[idx]) + ".png")
    mask_path = os.path.join(self.base_path, "masks_png", str(self.int_pathrefs[idx]) + ".png")
    image = pil_loader(image_path,format="RGB")
    mask = pil_loader(mask_path,format="L")

    # Convert PIL images to numpy arrays
    image = np.array(image)
    mask = np.array(mask, dtype=np.int8)

    if self.transforms is not None:
      augmented = self.transforms(image=image, mask=mask)
      image = augmented["image"]
      mask = augmented["mask"]

    mask -= 1

    return image, mask

  def __len__(self):
    return len(self.int_pathrefs)


In [12]:
# Urban Datasets (train, val, test)
urban_train = LoveDA(TRAIN_PATH, DataType.URBAN, transforms=train_transform)
urban_val = LoveDA(VAL_PATH, DataType.URBAN, transforms=test_transform)
urban_test = LoveDA(TEST_PATH, DataType.URBAN, transforms=test_transform)

# Urban Dataloaders (train, val, test)

NUM_WORKERS = 2 if DBG else 4
urban_train_dataloader = DataLoader(urban_train, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, drop_last=True)
urban_val_dataloader = DataLoader(urban_val, shuffle=False, num_workers=NUM_WORKERS, drop_last=False)
urban_test_dataloader = DataLoader(urban_test, shuffle=False, num_workers=NUM_WORKERS, drop_last=False)



## DeepLabv2 on LoveDA (Urban)

In [14]:
LR = 2e-4               # The initial Learning Rate
MOMENTUM = 0.9          # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-4     # Regularization, you can keep this at the default
NUM_EPOCHS = 20         # Total number of training epochs (iterations over dataset)
STEP_SIZE = [10, 15]    # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1             # Multiplicative factor for learning rate step-down

Get DeepLabv2 model with pretrain weights:

In [15]:
from MLDL2024_project1.models.deeplabv2 import deeplabv2

model = deeplabv2.get_deeplab_v2(num_classes=NUM_CLASSES,pretrain=True,pretrain_model_path=DEEPLABV2_WEIGHTS_PATH)

Deeplab pretraining loading...


  saved_state_dict = torch.load(pretrain_model_path)


### Optimizer, Loss, ecc.

In [17]:
# enable validation during training
validate = True

model.train(True)

"""
model.multi_level = False # ask in class
for params in model.get_1x_lr_params_no_scale():
  params.requires_grad = True # no training in Backbone
for params in model.get_10x_lr_params():
  params.requires_grad = True # training in classifiers
"""

model = model.to(DEVICE) # switch to GPU

#Loss (as said in DeepLabv2 docs)
loss_fn = nn.CrossEntropyLoss(ignore_index=IGNORE_INDEX)

#Opt
trainable_params = [p for p in model.parameters() if p.requires_grad]
optimizer_fn = optim.Adam(trainable_params, lr=LR, weight_decay=WEIGHT_DECAY, eps=1e-4)

#Scheduler
optim_scheduler = optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=STEP_SIZE, gamma=GAMMA)


### Training Loop

In [18]:
import warnings
from torch.backends import cudnn
warnings.filterwarnings('ignore')
train_iter = 0
val_iter = 0

trainSamples = len(urban_train) - (len(urban_train) % BATCH_SIZE)
val_samples = len(urban_val)
iterPerEpoch = len(urban_train_dataloader)
val_steps = len(urban_val_dataloader)

cudnn.benchmark
CUDA_LAUNCH_BLOCKING=1
model_checkpoint = "model" #name

EPSILON_IOU = 1e-7

for epoch in range(NUM_EPOCHS):
    model.train(True)
    epoch_loss = 0

    total_intersection_per_class = {cls: 0 for cls in CLASSES}
    total_union_per_class = {cls: 0 for cls in CLASSES}

    for i, (inputs, targets) in enumerate(urban_train_dataloader):
        #train_iter += 1
        optimizer_fn.zero_grad()

        # feeds in model
        inputs = inputs.to(DEVICE)
        labels = targets.long().to(device=DEVICE)
        output_logits,_,_ = model(inputs)

        # compute loss
        loss = loss_fn(output_logits, labels)

        # backward loss and optimizer step
        loss.backward()
        optimizer_fn.step()

        #compute the training accuracy
        _, predicted = torch.max(output_logits.data, 1)

        for cls in CLASSES:
            true_mask = (labels == cls)  # Crea una maschera booleana per la classe `cls` nel target
            pred_mask = (predicted == cls)  # Crea una maschera booleana per la classe `cls` nelle predizioni

            # Calcola l'intersezione e l'unione per quella classe
            intersection = torch.logical_and(true_mask, pred_mask).sum().item()
            union = torch.logical_or(true_mask, pred_mask).sum().item()

            # Aggiungi i valori all'array totale di intersezione e unione per ogni classe
            total_intersection_per_class[cls] += intersection
            total_union_per_class[cls] += union

        step_loss = loss.data.item()
        epoch_loss += step_loss

    # FINAL EPOCH-WISE COMPUTATIONS
    class_IoUs = { cls: total_intersection_per_class[cls] / (total_union_per_class[cls] + EPSILON_IOU) for cls in CLASSES }
    mean_IoU = sum(class_IoUs.values()) / NUM_CLASSES
    avg_loss = epoch_loss/iterPerEpoch
    print(Fore.GREEN + Style.NORMAL + 'Train: Epoch = {} | mean Loss = {:.3f} | mean-IoU = {:.3f}'.format(epoch + 1, avg_loss, mean_IoU)+Style.RESET_ALL)

    if validate:
        model.train(False)
        val_loss_epoch = 0
        numCorr = 0
        total_intersection_per_class = {cls: 0 for cls in CLASSES}
        total_union_per_class = {cls: 0 for cls in CLASSES}
        for j, (inputs, targets) in enumerate(urban_val_dataloader):
            val_iter += 1

            # feeds in model
            inputs = inputs.to(DEVICE)
            labels = targets.long().to(device=DEVICE)
            output_logits = model(inputs)

            # compute loss
            loss = loss_fn(output_logits, labels)

            # compute the training accuracy
            _, predicted = torch.max(output_logits.data, 1)

            for cls in CLASSES:
                true_mask = (labels == cls)  # Crea una maschera booleana per la classe `cls` nel target
                pred_mask = (predicted == cls)  # Crea una maschera booleana per la classe `cls` nelle predizioni

                # Calcola l'intersezione e l'unione per quella classe
                intersection = torch.logical_and(true_mask, pred_mask).sum().item()
                union = torch.logical_or(true_mask, pred_mask).sum().item()

                # Aggiungi i valori all'array totale di intersezione e unione per ogni classe
                total_intersection_per_class[cls] += intersection
                total_union_per_class[cls] += union

            step_loss = loss.data.item()
            val_loss_epoch += step_loss


        # FINAL EPOCH-WISE COMPUTATIONS
        class_IoUs = { cls: total_intersection_per_class[cls] / (total_union_per_class[cls] + EPSILON_IOU) for cls in CLASSES }
        mean_IoU = sum(class_IoUs.values()) / NUM_CLASSES
        avg_loss = epoch_loss/iterPerEpoch
        print(Fore.BLACK + Back.YELLOW + Style.BRIGHT + 'VALIDATION RESULTS (@epoch={}): mean Loss = {:.3f} | mean-IoU = {:.3f}'.format(epoch+1, avg_loss, mean_IoU)+Style.RESET_ALL)
        # Stampa l'IoU per ogni classe
        print(Fore.CYAN + Style.NORMAL + "Class-wise IoUs:"+ Style.RESET_ALL)
        for cls in CLASSES:
            print(Fore.WHITE + Style.DIM + f"Class {cls} ({COLOR_MAP[cls]}): IoU = {class_IoUs[cls]:.3f}"+ Style.RESET_ALL)

    optim_scheduler.step()


RuntimeError: input and target batch or spatial sizes don't match: target [16, 256, 256], input [16, 7, 32, 32]

## PIDNet & LoveDA

In [13]:
### copied from repo ###

def load_pretrained(model, pretrained):
    pretrained_dict = torch.load(pretrained, map_location='cpu')
    if 'state_dict' in pretrained_dict:
        pretrained_dict = pretrained_dict['state_dict']
    model_dict = model.state_dict()
    #pretrained_dict = {k[6:]: v for k, v in pretrained_dict.items() if (k[6:] in model_dict and v.shape == model_dict[k[6:]].shape)}
    msg = 'Loaded {} parameters!'.format(len(pretrained_dict))
    print('Attention!!!')
    print(msg)
    print('Over!!!')
    model_dict.update(pretrained_dict)
    model.load_state_dict(model_dict, strict = False)

    return model
########################

In [14]:
from PIDNet.models.pidnet import PIDNet
model = PIDNet(m=2, n=3, num_classes=NUM_CLASSES, planes=32, ppm_planes=96, head_planes=128, augment=True)
model = load_pretrained(model, PIDNET_WEIGHTS_PATH)

Attention!!!
Loaded 322 parameters!
Over!!!


  pretrained_dict = torch.load(pretrained, map_location='cpu')


### Optimizer, Loss, ecc.

In [19]:
LR = 2e-4               # The initial Learning Rate
MOMENTUM = 0.9          # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-4     # Regularization, you can keep this at the default
NUM_EPOCHS = 20         # Total number of training epochs (iterations over dataset)
STEP_SIZE = [10, 15]    # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1             # Multiplicative factor for learning rate step-down

In [20]:
from PIDNet.utils.utils import FullModel
from PIDNet.utils.criterion import CrossEntropy, BondaryLoss


USE_OHEM = False

# enable validation during training
validate = True

model.train()
model = model.to(DEVICE) # switch to GPU

# loss functions
#sem_criterion = nn.CrossEntropyLoss(ignore_index=IGNORE_INDEX)
sem_criterion = CrossEntropy(ignore_label=IGNORE_INDEX)
bd_criterion = BondaryLoss()

#Opt
trainable_params = [p for p in model.parameters() if p.requires_grad]
optimizer_fn = optim.Adam(trainable_params, lr=LR, weight_decay=WEIGHT_DECAY, eps=1e-4)

#Scheduler
optim_scheduler = optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=STEP_SIZE, gamma=GAMMA)

model = FullModel(model, sem_loss=sem_criterion, bd_loss=bd_criterion)


### Training loop

In [21]:
"""
import cv2
def canny_with_opencv(image_tensor, low_threshold=0.1, high_threshold=0.2):
    # Convert to NumPy
    image_np = image_tensor.squeeze().cpu().numpy()
    edges_np = cv2.Canny((image_np * 255).astype('uint8'), low_threshold, high_threshold)

    # Convert back to tensor
    edges_tensor = torch.from_numpy(edges_np).unsqueeze(0).unsqueeze(0).float().to(device=DEVICE) / 255.0
    return edges_tensor

"""

"\nimport cv2\ndef canny_with_opencv(image_tensor, low_threshold=0.1, high_threshold=0.2):\n    # Convert to NumPy\n    image_np = image_tensor.squeeze().cpu().numpy()\n    edges_np = cv2.Canny((image_np * 255).astype('uint8'), low_threshold, high_threshold)\n\n    # Convert back to tensor\n    edges_tensor = torch.from_numpy(edges_np).unsqueeze(0).unsqueeze(0).float().to(device=DEVICE) / 255.0\n    return edges_tensor\n\n"

In [22]:
import cv2
def canny_with_cv2(images_tensors, low_threshold=0.1, high_threshold=0.2):
    # Convert to NumPy
    edges_tensors = images_tensors.clone().cpu()
    for i,img in enumerate(edges_tensors):
      # Convert to NumPy
      image_np = img.numpy()
      edges_np = cv2.Canny((image_np*255).astype('uint8'), low_threshold, high_threshold)

      # Convert back to tensor
      edges_tensors[i] = torch.from_numpy(edges_np).float() / 255.0
    return edges_tensors

In [None]:
import warnings
from torch.backends import cudnn
warnings.filterwarnings('ignore')
train_iter = 0
val_iter = 0

trainSamples = len(urban_train) - (len(urban_train) % BATCH_SIZE)
val_samples = len(urban_val)
iterPerEpoch = len(urban_train_dataloader)
val_steps = len(urban_val_dataloader)

cudnn.benchmark
CUDA_LAUNCH_BLOCKING=1
model_checkpoint = "model" #name

EPSILON_IOU = 1e-7



for epoch in range(NUM_EPOCHS):
    model.train()
    epoch_loss = 0
    total_intersection_per_class = {cls: 0 for cls in CLASSES}
    total_union_per_class = {cls: 0 for cls in CLASSES}

    for i, (inputs, targets) in enumerate(urban_train_dataloader):
        #train_iter += 1
        optimizer_fn.zero_grad()

        # feeds in model
        inputs = inputs.to(DEVICE)
        labels = targets.long().to(device=DEVICE)
        edges = canny_with_cv2(labels).to(device=DEVICE,dtype=torch.float32)
        print(inputs.size())
        print(labels.size())
        print(edges.size())

        # feeds in the model
        losses, _, acc, loss_list = model(inputs, labels, edges)
        print("DONE")
        loss = losses.mean()
        acc  = acc.mean()
        print(f"Loss: {loss.item()}, Acc: {acc.item()}")

        # backward loss and optimizer step
        loss.backward()
        optimizer_fn.step()

        #compute the training accuracy
        _, predicted = torch.max(output_logits.data, 1)

        for cls in CLASSES:
            true_mask = (labels == cls)  # Crea una maschera booleana per la classe `cls` nel target
            pred_mask = (predicted == cls)  # Crea una maschera booleana per la classe `cls` nelle predizioni

            # Calcola l'intersezione e l'unione per quella classe
            intersection = torch.logical_and(true_mask, pred_mask).sum().item()
            union = torch.logical_or(true_mask, pred_mask).sum().item()

            # Aggiungi i valori all'array totale di intersezione e unione per ogni classe
            total_intersection_per_class[cls] += intersection
            total_union_per_class[cls] += union

        step_loss = loss.data.item()
        epoch_loss += step_loss

    # FINAL EPOCH-WISE COMPUTATIONS
    class_IoUs = { cls: total_intersection_per_class[cls] / (total_union_per_class[cls] + EPSILON_IOU) for cls in CLASSES }
    mean_IoU = sum(class_IoUs.values()) / NUM_CLASSES
    avg_loss = epoch_loss/iterPerEpoch
    print(Fore.GREEN + Style.NORMAL + 'Train: Epoch = {} | mean Loss = {:.3f} | mean-IoU = {:.3f}'.format(epoch + 1, avg_loss, mean_IoU)+Style.RESET_ALL)

    # Stampa l'IoU per ogni classe
    print(Fore.CYAN + Style.NORMAL + "Class-wise IoUs:"+ Style.RESET_ALL)
    for cls in CLASSES:
        print(Fore.WHITE + Style.DIM + f"Class {cls} ({COLOR_MAP[cls]}): IoU = {class_IoUs[cls]:.3f}"+ Style.RESET_ALL)

    if validate:
        model.train(False)
        val_loss_epoch = 0
        numCorr = 0
        total_intersection_per_class = {cls: 0 for cls in CLASSES}
        total_union_per_class = {cls: 0 for cls in CLASSES}
        for j, (inputs, targets) in enumerate(urban_val_dataloader):
            val_iter += 1

            # feeds in model
            inputs = inputs.to(DEVICE)
            labels = targets.long().to(device=DEVICE)
            output_logits = model(inputs)

            # compute loss
            loss = loss_fn(output_logits, labels)

            # compute the training accuracy
            _, predicted = torch.max(output_logits.data, 1)

            for cls in CLASSES:
                true_mask = (labels == cls)  # Crea una maschera booleana per la classe `cls` nel target
                pred_mask = (predicted == cls)  # Crea una maschera booleana per la classe `cls` nelle predizioni

                # Calcola l'intersezione e l'unione per quella classe
                intersection = torch.logical_and(true_mask, pred_mask).sum().item()
                union = torch.logical_or(true_mask, pred_mask).sum().item()

                # Aggiungi i valori all'array totale di intersezione e unione per ogni classe
                total_intersection_per_class[cls] += intersection
                total_union_per_class[cls] += union

            step_loss = loss.data.item()
            val_loss_epoch += step_loss


        # FINAL EPOCH-WISE COMPUTATIONS
        class_IoUs = { cls: total_intersection_per_class[cls] / (total_union_per_class[cls] + EPSILON_IOU) for cls in CLASSES }
        mean_IoU = sum(class_IoUs.values()) / NUM_CLASSES
        avg_loss = epoch_loss/iterPerEpoch
        print(Fore.BLACK + Back.YELLOW + Style.BRIGHT + 'VALIDATION RESULTS (@epoch={}): mean Loss = {:.3f} | mean-IoU = {:.3f}'.format(epoch+1, avg_loss, mean_IoU)+Style.RESET_ALL)

    optim_scheduler.step()


torch.Size([16, 3, 256, 256])
torch.Size([16, 256, 256])
torch.Size([16, 256, 256])
