In [1]:
#Mounting Google Drive from Google Colab
#from google.colab import drive
#drive.mount('/content/drive')

In [2]:
#Changing the current working directory to the Google Drive
#%cd /content/drive/My Drive/MLDL2024_project1-Enrico

In [3]:
#Importing the necessary libraries
import os
import torch
import numpy as np
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader
from datasets.cityscapes import CityscapesCustom
from models.deeplabv2.deeplabv2 import get_deeplab_v2
from models.bisenet.build_bisenet import BiSeNet
from train import train_model
from utils import test_latency_FPS
from utils import test_FLOPs_params

In [4]:
#Set device agnostic code
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [6]:
#Create Dataloaders for Cityscapes
cityscapes_dir = os.path.dirname(os.getcwd()) + '/Cityscapes/Cityspaces/'

cityscapes_train_dataset = CityscapesCustom(cityscapes_dir, 'train', height=16, width=32)
cityscapes_test_dataset = CityscapesCustom(cityscapes_dir, 'val', height=16, width=32)

cityscapes_train_dataloader = DataLoader(cityscapes_train_dataset, batch_size=4, shuffle=True)
cityscapes_test_dataloader = DataLoader(cityscapes_test_dataset, batch_size=4, shuffle=False)

print(f'Train: {len(cityscapes_train_dataset)} images, divided into {len(cityscapes_train_dataloader)} batches of size {cityscapes_train_dataloader.batch_size}')
print(f'Test: {len(cityscapes_test_dataset)} images, divided into {len(cityscapes_test_dataloader)} batches of size {cityscapes_test_dataloader.batch_size}')

Train: 1572 images, divided into 393 batches of size 4
Test: 500 images, divided into 125 batches of size 4


In [7]:
#Testing Classic semantic segmentation network

#Set up the model with the pretrained weights
DeepLabV2_model = get_deeplab_v2().to(device)

#Set up the loss function and the optimizer
criterion = torch.nn.CrossEntropyLoss(ignore_index=255)
optimizer = optim.Adam(DeepLabV2_model.parameters(), lr=1e-3)

#Set the manual seeds
torch.manual_seed(42)
torch.cuda.manual_seed(42)

Deeplab pretraining loading...


In [8]:
#Train the model
train_model(DeepLabV2_model, criterion, optimizer, cityscapes_train_dataloader, cityscapes_test_dataloader, device, n_epochs=10)

Epoch 1/10, IoU: 44.60%, Intersection: 1263, Union: 2832
Epoch 2/10, IoU: 47.95%, Intersection: 1358, Union: 2832
Epoch 3/10, IoU: 51.38%, Intersection: 1455, Union: 2832
Epoch 4/10, IoU: 52.86%, Intersection: 1497, Union: 2832
Epoch 5/10, IoU: 50.56%, Intersection: 1432, Union: 2832
Epoch 6/10, IoU: 50.49%, Intersection: 1430, Union: 2832
Epoch 7/10, IoU: 52.05%, Intersection: 1474, Union: 2832
Epoch 8/10, IoU: 52.93%, Intersection: 1499, Union: 2832
Epoch 9/10, IoU: 56.18%, Intersection: 1591, Union: 2832
Epoch 10/10, IoU: 56.78%, Intersection: 1608, Union: 2832


In [9]:
#Test latency and FPS
print(test_latency_FPS(DeepLabV2_model, height=16, width=32, device=device))

#Test FLOPs and number of parameters
print(test_FLOPs_params(DeepLabV2_model, height=16, width=32, device=device))

Mean latency: 0.0741 +/- 0.0110 seconds 
Mean FPS: 13.79 +/- 2.12 frames per second
| module                         | #parameters or shape   | #flops     |
|:-------------------------------|:-----------------------|:-----------|
| model                          | 43.901M                | 0.666G     |
|  conv1                         |  9.408K                |  1.204M    |
|   conv1.weight                 |   (64, 3, 7, 7)        |            |
|  bn1                           |  0.128K                |  16.384K   |
|   bn1.weight                   |   (64,)                |            |
|   bn1.bias                     |   (64,)                |            |
|  layer1                        |  0.216M                |  9.711M    |
|   layer1.0                     |   75.008K              |   3.375M   |
|    layer1.0.conv1              |    4.096K              |    0.184M  |
|    layer1.0.bn1                |    0.128K              |    5.76K   |
|    layer1.0.conv2              |    36

In [10]:
#Testing Real-Time semantic segmentation network

#Set up the model with the pretrained weights
BiSeNet_model = BiSeNet(num_classes=19, context_path='resnet18').to(device)

#Set up the loss function and the optimizer
criterion = torch.nn.CrossEntropyLoss(ignore_index=255)
optimizer = optim.Adam(BiSeNet_model.parameters(), lr=1e-3)

#Set the manual seeds
torch.manual_seed(42)
torch.cuda.manual_seed(42)

In [11]:
#Train the model
train_model(BiSeNet_model, criterion, optimizer, cityscapes_train_dataloader, cityscapes_test_dataloader, device, n_epochs=10)

Epoch 1/10, IoU: 19.88%, Intersection: 563, Union: 2832
Epoch 2/10, IoU: 32.49%, Intersection: 920, Union: 2832
Epoch 3/10, IoU: 25.35%, Intersection: 718, Union: 2832
Epoch 4/10, IoU: 36.55%, Intersection: 1035, Union: 2832
Epoch 5/10, IoU: 39.62%, Intersection: 1122, Union: 2832
Epoch 6/10, IoU: 34.92%, Intersection: 989, Union: 2832
Epoch 7/10, IoU: 43.29%, Intersection: 1226, Union: 2832
Epoch 8/10, IoU: 43.50%, Intersection: 1232, Union: 2832
Epoch 9/10, IoU: 43.01%, Intersection: 1218, Union: 2832
Epoch 10/10, IoU: 48.66%, Intersection: 1378, Union: 2832


In [12]:
#Test latency and FPS
print(test_latency_FPS(BiSeNet_model, height=16, width=32, device=device))

#Test FLOPs and number of parameters
print(test_FLOPs_params(BiSeNet_model, height=16, width=32, device=device))

Mean latency: 0.0091 +/- 0.0017 seconds 
Mean FPS: 113.85 +/- 24.21 frames per second
| module                                      | #parameters or shape   | #flops     |
|:--------------------------------------------|:-----------------------|:-----------|
| model                                       | 12.582M                | 29.702M    |
|  saptial_path                               |  0.371M                |  4.968M    |
|   saptial_path.convblock1                   |   1.856K               |   0.238M   |
|    saptial_path.convblock1.conv1            |    1.728K              |    0.221M  |
|    saptial_path.convblock1.bn               |    0.128K              |    16.384K |
|   saptial_path.convblock2                   |   73.984K              |   2.367M   |
|    saptial_path.convblock2.conv1            |    73.728K             |    2.359M  |
|    saptial_path.convblock2.bn               |    0.256K              |    8.192K  |
|   saptial_path.convblock3                   |   0.29