In [1]:
#Mounting Google Drive from Google Colab
#from google.colab import drive
#drive.mount('/content/drive')

In [2]:
#Changing the current working directory to the Google Drive
#%cd /content/drive/My Drive/MLDL2024_project1-Enrico

In [3]:
#Importing the necessary libraries
import os
import torch
import numpy as np
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader
from datasets.cityscapes import CityscapesCustom
from models.deeplabv2.deeplabv2 import get_deeplab_v2
from models.bisenet.build_bisenet import BiSeNet
from train import train_model
from utils import test_latency_FPS
from utils import test_FLOPs_params

In [4]:
#Set device agnostic code
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#Set training parameters
cityscapes_height, cityscapes_width = (8, 16)
train_batch_size = 4
n_epochs = 10

In [5]:
#Create Dataloaders for Cityscapes
cityscapes_dir = os.path.dirname(os.getcwd()) + '/Cityscapes/Cityspaces/'

cityscapes_train_dataset = CityscapesCustom(cityscapes_dir, 'train', cityscapes_height, cityscapes_width)
cityscapes_test_dataset = CityscapesCustom(cityscapes_dir, 'val', cityscapes_height, cityscapes_width)

cityscapes_train_dataloader = DataLoader(cityscapes_train_dataset, train_batch_size, shuffle=True)
cityscapes_test_dataloader = DataLoader(cityscapes_test_dataset, train_batch_size, shuffle=False)

print(f'Train: {len(cityscapes_train_dataset)} images, divided into {len(cityscapes_train_dataloader)} batches of size {cityscapes_train_dataloader.batch_size}')
print(f'Test: {len(cityscapes_test_dataset)} images, divided into {len(cityscapes_test_dataloader)} batches of size {cityscapes_test_dataloader.batch_size}')

Train: 1572 images, divided into 393 batches of size 4
Test: 500 images, divided into 125 batches of size 4


In [6]:
#Testing Classic semantic segmentation network

#Set up the model with the pretrained weights
DeepLabV2_model = get_deeplab_v2().to(device)

#Set up the loss function and the optimizer
DeepLabV2_criterion = torch.nn.CrossEntropyLoss(ignore_index=255)
DeepLabV2_optimizer = optim.Adam(DeepLabV2_model.parameters(), lr=1e-3)

#Set the manual seeds
torch.manual_seed(42)
torch.cuda.manual_seed(42)

Deeplab pretraining loading...


In [7]:
#Train the model
train_model(DeepLabV2_model, DeepLabV2_criterion, DeepLabV2_optimizer,
            cityscapes_train_dataloader, cityscapes_test_dataloader,
            device, n_epochs, lr_schedule=False)

Epoch 1/10, Train IoU: 51.69% (916/1772), Test IoU: 70.70% (625/884) [0m 14s]
Epoch 2/10, Train IoU: 60.50% (1020/1686), Test IoU: 57.58% (509/884) [0m 14s]
Epoch 3/10, Train IoU: 58.40% (984/1685), Test IoU: 60.52% (535/884) [0m 13s]
Epoch 4/10, Train IoU: 68.13% (1152/1691), Test IoU: 73.98% (654/884) [0m 15s]
Epoch 5/10, Train IoU: 68.32% (1227/1796), Test IoU: 77.15% (682/884) [0m 14s]
Epoch 6/10, Train IoU: 72.61% (1111/1530), Test IoU: 75.68% (669/884) [0m 15s]
Epoch 7/10, Train IoU: 70.13% (1214/1731), Test IoU: 78.39% (693/884) [0m 15s]
Epoch 8/10, Train IoU: 72.89% (1140/1564), Test IoU: 76.02% (672/884) [0m 15s]
Epoch 9/10, Train IoU: 73.58% (1245/1692), Test IoU: 79.75% (705/884) [0m 14s]
Epoch 10/10, Train IoU: 72.03% (1092/1516), Test IoU: 72.40% (640/884) [0m 15s]
Best IoU: 79.75% at epoch 9


In [8]:
#Test latency and FPS
print(test_latency_FPS(DeepLabV2_model, device, cityscapes_height, cityscapes_width))

#Test FLOPs and number of parameters
print(test_FLOPs_params(DeepLabV2_model, device, cityscapes_height, cityscapes_width))

1000 1000
Mean latency: 0.0515 +/- 0.0180 seconds 
Mean FPS: 20.26 +/- 2.66 frames per second
| module                         | #parameters or shape   | #flops     |
|:-------------------------------|:-----------------------|:-----------|
| model                          | 43.901M                | 0.266G     |
|  conv1                         |  9.408K                |  0.301M    |
|   conv1.weight                 |   (64, 3, 7, 7)        |            |
|  bn1                           |  0.128K                |  4.096K    |
|   bn1.weight                   |   (64,)                |            |
|   bn1.bias                     |   (64,)                |            |
|  layer1                        |  0.216M                |  3.237M    |
|   layer1.0                     |   75.008K              |   1.125M   |
|    layer1.0.conv1              |    4.096K              |    61.44K  |
|    layer1.0.bn1                |    0.128K              |    1.92K   |
|    layer1.0.conv2           

In [9]:
#Testing Real-Time semantic segmentation network

#Set up the model with the pretrained weights
BiSeNet_model = BiSeNet(num_classes=19, context_path='resnet18').to(device)

#Set up the loss function and the optimizer for BiSeNet
BiSeNet_criterion = torch.nn.CrossEntropyLoss(ignore_index=255)
BiSeNet_optimizer = torch.optim.SGD(BiSeNet_model.parameters(), lr=2.5e-2, momentum=0.9, weight_decay=1e-4)

#Set the manual seeds
torch.manual_seed(42)
torch.cuda.manual_seed(42)

In [10]:
#Train the model
train_model(BiSeNet_model, BiSeNet_criterion, BiSeNet_optimizer,
            cityscapes_train_dataloader, cityscapes_test_dataloader,
            device, n_epochs, lr_schedule=True)

Epoch 1/10, Train IoU: 23.53% (417/1772), Test IoU: 47.96% (424/884) [0m 9s]
Epoch 2/10, Train IoU: 39.15% (660/1686), Test IoU: 38.24% (338/884) [0m 8s]
Epoch 3/10, Train IoU: 36.56% (616/1685), Test IoU: 44.23% (391/884) [0m 8s]
Epoch 4/10, Train IoU: 38.91% (658/1691), Test IoU: 41.18% (364/884) [0m 8s]
Epoch 5/10, Train IoU: 41.31% (742/1796), Test IoU: 46.95% (415/884) [0m 9s]
Epoch 6/10, Train IoU: 41.83% (640/1530), Test IoU: 47.85% (423/884) [0m 8s]
Epoch 7/10, Train IoU: 40.50% (701/1731), Test IoU: 49.43% (437/884) [0m 8s]
Epoch 8/10, Train IoU: 40.15% (628/1564), Test IoU: 48.42% (428/884) [0m 8s]
Epoch 9/10, Train IoU: 42.79% (724/1692), Test IoU: 47.85% (423/884) [0m 9s]
Epoch 10/10, Train IoU: 40.63% (616/1516), Test IoU: 44.68% (395/884) [0m 9s]
Best IoU: 49.43% at epoch 7


In [11]:
#Test latency and FPS
print(test_latency_FPS(BiSeNet_model, device, cityscapes_height, cityscapes_width))

#Test FLOPs and number of parameters
print(test_FLOPs_params(BiSeNet_model, device, cityscapes_height, cityscapes_width))

998 998
Mean latency: 0.0098 +/- 0.0070 seconds 
Mean FPS: 116.80 +/- 60.75 frames per second
| module                                      | #parameters or shape   | #flops     |
|:--------------------------------------------|:-----------------------|:-----------|
| model                                       | 12.582M                | 15.019M    |
|  saptial_path                               |  0.371M                |  1.242M    |
|   saptial_path.convblock1                   |   1.856K               |   59.392K  |
|    saptial_path.convblock1.conv1            |    1.728K              |    55.296K |
|    saptial_path.convblock1.bn               |    0.128K              |    4.096K  |
|   saptial_path.convblock2                   |   73.984K              |   0.592M   |
|    saptial_path.convblock2.conv1            |    73.728K             |    0.59M   |
|    saptial_path.convblock2.bn               |    0.256K              |    2.048K  |
|   saptial_path.convblock3                   