In [3]:
!git clone https://github.com/federico2879/MLDL2024_semantic_segmentation.git

fatal: destination path 'MLDL2024_semantic_segmentation' already exists and is not an empty directory.


In [4]:
import torch
from torch import nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
#from MLDL2024_semantic_segmentation.datasets.importDataset import Download
#from MLDL2024_semantic_segmentation.datasets.importDataset import Modified_CityScapes
from MLDL2024_semantic_segmentation.datasets.cityscapes import CityScapes
from MLDL2024_semantic_segmentation.models.bisenet.build_bisenet import *


In [5]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device


'cuda'

In [6]:
import gc

# Function to clear GPU memory
def clear_gpu_memory():
    torch.cuda.empty_cache()
    gc.collect()

In [7]:
import time
import numpy as np
import statistics

def fast_hist(pred, target, num_classes):
    k = (pred >= 0) & (pred < num_classes)
    return np.bincount(num_classes * pred[k].astype(int) + target[k], minlength = num_classes**2).reshape(num_classes, num_classes)

def per_class_iou(hist):
    epsilon = 1e-5
    return (np.diag(hist)) / (hist.sum(1) + hist.sum(0) - np.diag(hist) + epsilon)

def meanIOU(num_clasess, pred, target):
  mIOU = 0
  for i in range(len(pred)):    
      hist = fast_hist(pred[i].cpu().numpy(), target[i].cpu().numpy(), num_classes)
      IOU = per_class_iou(hist)
      mIOU = mIOU + sum(IOU)/num_classes 
  return mIOU #*100/len(pred)

In [8]:
clear_memory_every = 10

def train(model, optimizer, train_loader, loss_fn):
    model.train()
    running_loss = 0.0
    correct = 0
    total_iou = 0
    total_images = 0

    for batch_idx, (inputs, targets) in enumerate(train_loader):


        #print(f'batch id: {batch_idx}')
        #print(f'(inputs, targets): {(inputs.size(), targets.size())}')
        first_image = inputs[0]

        # Stampiamo le dimensioni della prima immagine nel batch
        #print("Dimensioni della prima immagine nel batch:", first_image.size())
        inputs, targets = inputs.cuda(), targets.cuda()

        # Compute prediction and loss
        outputs =  model(inputs)
        '''
       # print(f'outputs[0]: {outputs[0]}')
        print(f'outputs[0] type: {outputs[0].type()}')
        print(f'outputs[0] size: {outputs[0].size()}')


        #print(f'targets: {targets}')
        print(f'targets type: {targets.type()}')
        print(f'targets size: {targets.size()}')
        '''
        #Ridimensioning tensor
        targets = targets.squeeze(dim=1)
        #print(f'targets size: {targets.size()}')

        targets = targets.long()

        loss = loss_fn(outputs[0], targets)

        # Backpropagation
        optimizer.zero_grad() # reset gradients of parameters
        loss.backward()  # backpropagate the prediction loss
        optimizer.step() # update model

        running_loss += loss.item()
        _, predicted = outputs[0].max(1)
        #print(f'predicted: {predicted}')
        iou = meanIOU(outputs[0].size()[1], predicted, targets) #sum of meanIOU over classes di tutte le immagini nel batch
        total_iou += iou.sum().item()  #somma di tytte le singole iou calcolate in precedenza
        total_images += len(targets)
        
        # Clear GPU memory periodically
        if batch_idx % clear_memory_every == 0:
            clear_gpu_memory()

    result= total_iou/total_images
    return result

def test(model, test_loader, loss_fn):
    model.eval()
    test_loss = 0
    correct = 0
    total_images = 0
    total_iou = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(test_loader):
            inputs, targets = inputs.cuda(), targets.cuda()
            outputs = model(inputs)
            #Ridimensioning tensor+
            '''
            print(f'outputs: {outputs}')
            print(f'outputs type: {outputs.type()}')
            print(f'outputs size: {outputs.size()}')


            print(f'outputs[0]: {outputs[0]}')

            print(f'outputs[0] type: {outputs[0].type()}')
            print(f'outputs[0] size: {outputs[0].size()}')


            #pri nt(f'targets: {targets}')
            print(f'targets type: {targets.type()}')
            print(f'targets size: {targets.size()}')
            '''
            targets = targets.squeeze(dim=1)

            #print(f'targets size: {targets.size()}')

            targets = targets.long()
            #print(f'targets type: {targets.type()}')
            #print(f'targets size: {targets.size()}')
            loss = loss_fn(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            #print(f'predicted: {predicted}')
            iou = meanIOU(outputs.size()[1], predicted, targets) #sum of meanIOU over classes di tutte le immagini nel batch
            #total += targets.size(0)
            #correct += predicted.eq(targets).sum().item()
            total_iou += iou.sum().item()  #somma di tytte le singole iou calcolate in precedenza

            #print(f'len di targets (=batch_size?): {len(targets)}')
            total_images += len(targets)
            
            # Clear GPU memory periodically
            if batch_idx % clear_memory_every == 0:
                clear_gpu_memory()

    result= total_iou/total_images
    #test_loss = test_loss / len(test_loader)
    #test_accuracy = 100. * correct / total
    return result

In [9]:
# Take dataset
#Download('drive/MyDrive/Cityscapes.zip', '')
#Modified_CityScapes('Cityscapes/Cityspaces')

In [10]:
# Setup fixed parameters
num_epochs = 5
num_classes = 19

In [11]:
# Transformations
transform_image = transforms.Compose([
    transforms.Resize((1024, 512)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])
transform_target = transforms.Compose([
    transforms.Resize((1024, 512)),
    transforms.ToTensor(),
])

# Create dataloader
dataset_train = CityScapes('/kaggle/input/cityscapes/Cityscapes/Cityspaces', split = 'train', transform = transform_image, label_transform = transform_target)
dataloader_train = DataLoader(dataset_train, batch_size=16, shuffle=True)

dataset_val = CityScapes('/kaggle/input/cityscapes/Cityscapes/Cityspaces', split = 'val', transform = transform_image, label_transform = transform_target)
dataloader_val = DataLoader(dataset_train, batch_size=16, shuffle=False)

In [12]:
# Inizialization of the model
model = BiSeNet(num_classes=num_classes, context_path="resnet18").to(device)
model = torch.nn.DataParallel(model, device_ids = [0,1]).to(device)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 129MB/s] 
Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth
100%|██████████| 171M/171M [00:01<00:00, 161MB/s]  


In [14]:
# Define loss and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [15]:
# Set the random seeds
torch.manual_seed(42)
torch.cuda.manual_seed(42)

for epoch in range(num_epochs):
    train(model, optimizer, dataloader_train, loss_fn)
    test_acc = test(model, dataloader_val, loss_fn)
    print(f"Test accuracy: {test_acc:.2f}")

Test accuracy: 0.08
Test accuracy: 0.09
Test accuracy: 0.09
Test accuracy: 0.09
Test accuracy: 0.09
