<a href="https://colab.research.google.com/github/federico2879/MLDL2024_semantic_segmentation/blob/master/training/training-bisenet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://github.com/federico2879/MLDL2024_semantic_segmentation.git

Cloning into 'MLDL2024_semantic_segmentation'...
remote: Enumerating objects: 359, done.[K
remote: Counting objects: 100% (138/138), done.[K
remote: Compressing objects: 100% (114/114), done.[K
remote: Total 359 (delta 74), reused 48 (delta 24), pack-reused 221[K
Receiving objects: 100% (359/359), 194.08 KiB | 2.70 MiB/s, done.
Resolving deltas: 100% (199/199), done.


In [None]:
import torch
from torch import nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from MLDL2024_semantic_segmentation.datasets.cityscapes import CityScapes
from MLDL2024_semantic_segmentation.models.bisenet.build_bisenet import *


In [None]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device


In [None]:
'''
import gc

# Function to clear GPU memory
def clear_gpu_memory():
    torch.cuda.empty_cache()
    gc.collect()
    '''

In [None]:
'''
import time
import numpy as np
import statistics

def fast_hist(pred, target, num_classes):
    k = (pred >= 0) & (pred < num_classes)
    return np.bincount(num_classes * pred[k].astype(int) + target[k], minlength = num_classes**2).reshape(num_classes, num_classes)

def per_class_iou(hist):
    epsilon = 1e-5
    return (np.diag(hist)) / (hist.sum(1) + hist.sum(0) - np.diag(hist) + epsilon)

def meanIOU(num_clasess, pred, target):
  mIOU = 0
  for i in range(len(pred)):
      hist = fast_hist(pred[i].cpu().numpy(), target[i].cpu().numpy(), num_classes)
      IOU = per_class_iou(hist)
      mIOU = mIOU + sum(IOU)/num_classes
  return mIOU #*100/len(pred)
'''

In [None]:
#!pip install -U fvcore


In [None]:
'''
from fvcore.nn import FlopCountAnalysis, flop_count_table
import time
import numpy as np
import statistics

def Flops(model, height, width):
  model.eval()
  with torch.no_grad():
    image = torch.zeros((1, 3, height, width)).to(device)
    flops = FlopCountAnalysis(model, image)
  flops_CT = flop_count_table(flops)
  print(flops_CT)
  return flops, flops_CT

def Latency_FPS(model, height, width):
  image = torch.rand((1, 3, height, width)).to(device)
  iterations = 1000
  latency = []
  FPS = []
  model.eval()
  with torch.no_grad():
    for i in range(iterations):
      start = time.time()

      output = model(image)

      end = time.time()
      ltc_i = end-start
      latency.append(ltc_i)
      FPS_i = 1/ltc_i
      FPS.append(FPS_i)

  meanLatency = statistics.mean(latency)*1000
  stdLatency = statistics.stdev(latency)*1000
  meanFPS = statistics.mean(FPS)*1000
  stdFPS = statistics.stdev(FPS)*1000
  return meanLatency, stdLatency, meanFPS, stdFPS
  '''

In [None]:
from MLDL2024_semantic_segmentation.models.metrics import metric_pip_install

metric_pip_install()



In [None]:
'''
clear_memory_every = 10

def train(model, optimizer, train_loader, loss_fn):
    model.train()
    running_loss = 0.0
    correct = 0
    total_iou = 0
    total_images = 0

    for batch_idx, (inputs, targets) in enumerate(train_loader):


        #print(f'batch id: {batch_idx}')
        #print(f'(inputs, targets): {(inputs.size(), targets.size())}')
        first_image = inputs[0]

        # Stampiamo le dimensioni della prima immagine nel batch
        #print("Dimensioni della prima immagine nel batch:", first_image.size())
        inputs, targets = inputs.cuda(), targets.cuda()

        # Compute prediction and loss
        outputs =  model(inputs)

        #Ridimensioning tensor
        targets = targets.squeeze(dim=1)
        #print(f'targets size: {targets.size()}')

        targets = targets.long()

        loss = loss_fn(outputs[0], targets)

        # Backpropagation
        optimizer.zero_grad() # reset gradients of parameters
        loss.backward()  # backpropagate the prediction loss
        optimizer.step() # update model

        running_loss += loss.item()
        _, predicted = outputs[0].max(1)
        #print(f'predicted: {predicted}')
        iou = meanIOU(outputs[0].size()[1], predicted, targets) #sum of meanIOU over classes di tutte le immagini nel batch
        total_iou += iou.sum().item()  #somma di tytte le singole iou calcolate in precedenza
        total_images += len(targets)

        # Clear GPU memory periodically
        if batch_idx % clear_memory_every == 0:
            clear_gpu_memory()

    result= total_iou/total_images
    return result

def test(model, test_loader, loss_fn):
    model.eval()
    test_loss = 0
    correct = 0
    total_images = 0
    total_iou = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(test_loader):
            inputs, targets = inputs.cuda(), targets.cuda()
            outputs = model(inputs)
            #Ridimensioning tensor+

            targets = targets.squeeze(dim=1)

            #print(f'targets size: {targets.size()}')

            targets = targets.long()
            #print(f'targets type: {targets.type()}')
            #print(f'targets size: {targets.size()}')
            loss = loss_fn(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            #print(f'predicted: {predicted}')
            iou = meanIOU(outputs.size()[1], predicted, targets) #sum of meanIOU over classes di tutte le immagini nel batch
            #total += targets.size(0)
            #correct += predicted.eq(targets).sum().item()
            total_iou += iou.sum().item()  #somma di tytte le singole iou calcolate in precedenza

            #print(f'len di targets (=batch_size?): {len(targets)}')
            total_images += len(targets)

            # Clear GPU memory periodically
            if batch_idx % clear_memory_every == 0:
                clear_gpu_memory()

    result= total_iou/total_images
    #test_loss = test_loss / len(test_loader)
    #test_accuracy = 100. * correct / total
    return result
'''

In [None]:
from MLDL2024_semantic_segmentation.models.metrics import *
from MLDL2024_semantic_segmentation.train import *

In [None]:
# Setup fixed parameters
num_epochs = 1
num_classes = 19

In [None]:
# Transformations
transform_image = transforms.Compose([
    transforms.Resize((1024, 512)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])
transform_target = transforms.Compose([
    transforms.Resize((1024, 512)),
    transforms.ToTensor(),
])

# Create dataloader
dataset_train = CityScapes('/kaggle/input/cityscapes/Cityscapes/Cityspaces', split = 'train', transform = transform_image, label_transform = transform_target)
dataloader_train = DataLoader(dataset_train, batch_size=16, shuffle=True)

dataset_val = CityScapes('/kaggle/input/cityscapes/Cityscapes/Cityspaces', split = 'val', transform = transform_image, label_transform = transform_target)
dataloader_val = DataLoader(dataset_train, batch_size=16, shuffle=False)

In [None]:
# Inizialization of the model
model = BiSeNet(num_classes=num_classes, context_path="resnet18").to(device)
model = torch.nn.DataParallel(model, device_ids = [0,1]).to(device)

In [None]:
# Define loss and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [None]:
def count_params(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


In [None]:
## Set the random seeds
torch.manual_seed(42)
torch.cuda.manual_seed(42)

mIOU = 0

for epoch in range(num_epochs):
    train(model, optimizer, dataloader_train, loss_fn, 10)
    mIOU = test(model, dataloader_val, loss_fn, 10)
    print(f"epoch: {epoch}, Validation IOU: {mIOU:.2f}")

    '''
    torch.save({
        'epoch': epoch + 1,
        'state_dict': model.state_dict(),
        'optimizer': optimizer.state_dict(),
        'val_IOU': val_IOU
    },"checkpoint.pth.tar")
    '''
print(f"Final mIOU: {mIOU:.2f}")

flops = Flops(model, 1024, 512)

print(f"Number of flops?: {flops}")

latency = Latency_FPS(model, 1024, 512)

print(f"Latency: {latency}")

print(f"number of parameters: {model.count_params()}")



# Access the actual model being parallelized
actual_model = model.module
# Count the parameters of the actual model
num_params = count_params(actual_model)
print(f"number of parameters: {num_params}")