## Import repository

In [15]:
!git clone https://github.com/federico2879/MLDL2024_semantic_segmentation.git

fatal: destination path 'MLDL2024_semantic_segmentation' already exists and is not an empty directory.


## Import package

In [16]:
!pip install -U fvcore

[0m

In [17]:
import torch
from torch import nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
#from MLDL2024_semantic_segmentation.datasets.cityscapes import CityScapes
#from MLDL2024_semantic_segmentation.models.bisenet.build_contextpath import *
from MLDL2024_semantic_segmentation.models.bisenet.build_bisenet import *
from MLDL2024_semantic_segmentation.train import *
from MLDL2024_semantic_segmentation.utils import *
from MLDL2024_semantic_segmentation.models.metrics import *
#from MLDL2024_semantic_segmentation.models.IOU import * 

## Setup

In [18]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

# Setup fixed parameters
num_classes = 19
num_epochs = 1

cuda


In [19]:
from torch.utils.data import Dataset
import os
import numpy as np
from PIL import Image
from torch.utils.data import Dataset
import torch

class CityScapes(Dataset):
    def __init__(self, root_dir, split = 'train', transform=None, label_transform=None):
        super(CityScapes, self).__init__()
        self.root_dir = root_dir
        self.image_dir = os.path.join(root_dir, 'images', split)
        self.label_dir = os.path.join(root_dir, 'gtFine', split)
        self.transform = transform
        self.label_transform = label_transform
        self.images = os.listdir(self.image_dir)

    def __getitem__(self, idx):
        img_name = self.images[idx]
        img_path = os.path.join(self.image_dir, img_name)
        label_name = img_name.replace('leftImg8bit', 'gtFine_labelTrainIds')
        label_path = os.path.join(self.label_dir, label_name)
        
        image = Image.open(img_path).convert('RGB')
        label = Image.open(label_path)
        #label = torch.cat([label] * 3, dim=0)

        if self.transform is not None:
            image = self.transform(image)

        if self.label_transform is not None:
            label = self.label_transform(label)

        label_array = np.array(label) / 255.0
        class_indices = (label_array * 19).astype(np.int)
        label = torch.tensor(class_indices)

        return image, label

    def __len__(self):
        return len(self.images)

## Dataset

In [20]:
# Transformations
transform_image = transforms.Compose([
    transforms.Resize((1024, 512)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])
transform_target = transforms.Compose([
    transforms.Resize((1024, 512))
])

In [21]:
# Create dataloader
train_dataset = CityScapes('/kaggle/input/cityscapes/Cityscapes/Cityspaces', 
                           split = 'train', transform = transform_image, 
                           label_transform = transform_target)
dataloader_train = DataLoader(train_dataset, batch_size=4, shuffle=True)

val_dataset = CityScapes('/kaggle/input/cityscapes/Cityscapes/Cityspaces', 
                         split = 'val', transform = transform_image, 
                         label_transform = transform_target)
dataloader_val = DataLoader(val_dataset, batch_size=4, shuffle=False)

## Network, Loss, Optimizer

In [22]:
# Inizialization of the model
model = BiSeNet(num_classes=num_classes, context_path="resnet18").to(device)

#Putting on the 2 gpus
model = torch.nn.DataParallel(model, device_ids = [0,1]).to(device)

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
# Define loss and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=2.5e-2,
                            momentum=0.9,weight_decay=1e-4)

In [None]:
import numpy as np

def fast_hist(pred, target, num_classes):
    k = (pred >= 0) & (pred < num_classes)
    return np.bincount(num_classes * pred[k].astype(int) + target[k], minlength = num_classes**2).reshape(num_classes, num_classes)

def per_class_iou(hist):
    epsilon = 1e-5
    return (np.diag(hist)) / (hist.sum(1) + hist.sum(0) - np.diag(hist) + epsilon)

def meanIOU(num_classes, pred, target):
  mIOU = 0
  IOU_classes = np.zeros([1,num_classes])  
  for i in range(len(pred)):    
      hist = fast_hist(pred[i].cpu().numpy(), target[i].cpu().numpy(), num_classes)
      IOU = per_class_iou(hist)
      print(IOU)
      IOU_classes = IOU_classes + IOU
      print(IOU_classes)
      mIOU = mIOU + sum(IOU)/num_classes 
  return mIOU, IOU_classes 

In [None]:
import torch
import torchvision
import gc
import numpy as np
from MLDL2024_semantic_segmentation.models.IOU import meanIOU
#from MLDL2024_semantic_segmentation.models.metrics import fast_hist
#from MLDL2024_semantic_segmentation.models.metrics import per_class_iou


# Function to clear GPU memory
def clear_gpu_memory():
    torch.cuda.empty_cache()
    gc.collect()

def train(model, optimizer, train_loader, loss_fn, num_classes, clear_memory_every):
    model.train()
    running_loss = 0.0
    correct = 0
    total_iou = 0
    total_iou_cl = np.zeros([1,num_classes])
    total_images = 0

    for batch_idx, (inputs, targets) in enumerate(train_loader):

        inputs, targets = inputs.cuda(), targets.cuda()

        # Compute prediction and loss
        outputs =  model(inputs)
       
        #Ridimensioning tensor
        #print(targets)
        targets = targets.squeeze(dim=1)
        #print(f'targets size: {targets.size()}')

        targets = targets.long()

        loss = loss_fn(outputs[0], targets)

        # Backpropagation
        optimizer.zero_grad() # reset gradients of parameters
        loss.backward()  # backpropagate the prediction loss
        optimizer.step() # update model

        #running_loss += loss.item()
        _, predicted = outputs[0].max(1)
        iou, iou_cl = meanIOU(num_classes, predicted, targets) #sum of meanIOU over classes di tutte le immagini nel batch
        total_iou += iou
        total_iou_cl += iou_cl
        total_images += len(targets)
        print(total_iou)
        print(total_iou_cl)
        print('######')
        
        # Clear GPU memory periodically
        if clear_memory_every!=0 and batch_idx % clear_memory_every == 0:
            clear_gpu_memory()

    miou = total_iou/total_images
    iou_class = total_iou_cl/total_images
    return miou, iou_class

def test(model, test_loader, loss_fn, num_clasess, clear_memory_every):
    model.eval()
    test_loss = 0
    correct = 0
    total_images = 0
    total_iou = 0
    total_iou_cl = np.zeros([1,num_classes])
    
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(test_loader):
            inputs, targets = inputs.cuda(), targets.cuda()
            outputs = model(inputs)
            
            #Ridimensioning tensor
            targets = targets.squeeze(dim=1)
            targets = targets.long()
            
            loss = loss_fn(outputs, targets)
            test_loss += loss.item()
            _, predicted = outputs.max(1)
            
            iou, iou_cl = meanIOU(num_classes, predicted, targets) #sum of meanIOU over classes di tutte le immagini nel batch
            total_iou += iou
            total_iou_cl += iou_cl
            total_images += len(targets)
            
            # Clear GPU memory periodically
            if clear_memory_every!=0 and batch_idx % clear_memory_every == 0:
                clear_gpu_memory()

    miou = total_iou/total_images
    iou_class = total_iou_cl/total_images
    return miou, iou_class

## Training

In [None]:
# Set the random seeds
torch.manual_seed(42)
torch.cuda.manual_seed(42)

In [None]:
for epoch in range(num_epochs):
    #poly_lr_scheduler(optimizer, 2.5e-2, epoch, lr_decay_iter=1,
    #                  max_iter=num_epochs, power=0.9)
    train(model, optimizer, dataloader_train, loss_fn, 19, 0)
    val_mIOU,_ = test(model, dataloader_val, loss_fn, 19, 0)
    print(f"epoch: {epoch}, Validation IOU: {val_mIOU:.2f}")

    torch.save({
        'epoch': epoch + 1,
        'state_dict': model.state_dict(),
        'optimizer': optimizer.state_dict(),
        'val_IOU': val_mIOU
    },"checkpoint.pth.tar")

print(f"Final mIOU: {val_mIOU:.2f}")

flops = Flops(model, 1024, 512, device)

print(f"Number of flops?: {flops}")

latency = Latency_FPS(model, 1024, 512, device)

print(f"Latency: {latency}")

#print(f"number of parameters: {model.count_params()}")

# Access the actual model being parallelized
actual_model = model.module
# Count the parameters of the actual model
num_params = count_params(actual_model)
print(f"number of parameters: {num_params}")