In [1]:
#Mounting Google Drive from Google Colab
#from google.colab import drive
#drive.mount('/content/drive')

In [2]:
#Changing the current working directory to the Google Drive
#%cd /content/drive/My Drive/MLDL2024_project1-Enrico

In [3]:
#!pip install -U fvcore

In [4]:
#Importing the necessary libraries
import os
import torch
import numpy as np
from torch import nn
from torchvision import transforms
from torch.utils.data import DataLoader
from datasets.gta5 import GTA5Custom
from datasets.cityscapes import CityscapesCustom
from models.bisenet.build_bisenet import BiSeNet
from models.discriminator.discriminator import FCDiscriminator
#from train_adversarial import train_adversarial
from utils import test_latency_FPS, test_FLOPs_params, plot_miou_over_epochs

In [5]:
#Set device agnostic code
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')

#Set the manual seeds
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)

#Set training parameters
gta5_height, gta5_width = (8, 16)
gta5_batch_size = 8

cityscapes_height, cityscapes_width = (8, 16)
cityscapes_batch_size = 8

n_epochs = 10

lambda_adv = 0.001

#Set up the interpolation layers
interp_s = nn.Upsample(size=(gta5_height, gta5_width), mode='bilinear')
interp_t = nn.Upsample(size=(cityscapes_height, cityscapes_width), mode='bilinear')

Using device: cpu


In [6]:
#Create Dataloaders for Cityscapes and GTA5
gta5_dir = os.path.dirname(os.getcwd()) + '/GTA5/GTA5/'
cityscapes_dir = os.path.dirname(os.getcwd()) + '/Cityscapes/Cityspaces/'

augment1 = transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.3)
gta5_train_dataset_aug1 = GTA5Custom(gta5_dir, gta5_height, gta5_width, augment=augment1)
cityscapes_test_dataset = CityscapesCustom(cityscapes_dir, 'val', cityscapes_height, cityscapes_width)

gta5_train_dataloader_aug1 = DataLoader(gta5_train_dataset_aug1, gta5_batch_size, shuffle=True)
cityscapes_test_dataloader = DataLoader(cityscapes_test_dataset, cityscapes_batch_size, shuffle=False)

#Get the class names
class_names = cityscapes_test_dataset.get_class_names()

print(f'GTA5 (Train): {len(gta5_train_dataset_aug1)} images, divided into {len(gta5_train_dataloader_aug1)} batches of size {gta5_train_dataloader_aug1.batch_size}')
print(f'Cityscapes (Test): {len(cityscapes_test_dataset)} images, divided into {len(cityscapes_test_dataloader)} batches of size {cityscapes_test_dataloader.batch_size}')

GTA5 (Train): 2500 images, divided into 313 batches of size 8
Cityscapes (Test): 500 images, divided into 63 batches of size 8


In [7]:
#Set up the segmentation network (our Generator) with the pretrained weights
g_model = BiSeNet(num_classes=19, context_path='resnet18').to(device)

#Set up the loss function and the optimizer for the Generator
g_criterion = torch.nn.CrossEntropyLoss(ignore_index=255)
g_optimizer = torch.optim.SGD(g_model.parameters(), lr=2.5e-2, momentum=0.9, weight_decay=1e-4)

In [8]:
#Set up the Discriminator
d_model = FCDiscriminator(num_classes=19).to(device)

#Set up the loss function and the optimizer for the Discriminator
d_criterion = torch.nn.BCEWithLogitsLoss()
d_optimizer = torch.optim.Adam(d_model.parameters(), lr=1e-4, betas=(0.9, 0.99))

In [10]:
#Training and validation loops
import torch
import time
import numpy as np
from tqdm import tqdm
from torch.nn import functional as F
from utils import poly_lr_scheduler, fast_hist, per_class_iou

def train_adversarial(g_model, d_model, g_criterion, d_criterion, g_optimizer, d_optimizer, lambda_adv, s_dataloader, t_dataloader, interp_s, interp_t, class_names, device, n_epochs, model_name):
    n_classes = len(class_names)
    #g_initial_lr = g_optimizer.param_groups[0]['lr']
    #d_initial_lr = d_optimizer.param_groups[0]['lr']
    best_miou = 0.0
    best_class_iou = np.zeros(n_classes)
    best_epoch = 0
    all_train_miou = []
    all_test_miou = []

    #Labels for adversarial training
    source_label = torch.ones((s_dataloader.batch_size, 1, 1, 1)).to(device)#0
    target_label = torch.zeros((t_dataloader.batch_size, 1, 1, 1)).to(device)#1

    for epoch in range(n_epochs):

        start = time.time()

        g_model.train()
        d_model.train()
        train_hist = np.zeros((n_classes, n_classes))
        train_loop = tqdm(zip(s_dataloader, t_dataloader), total=len(s_dataloader)+len(t_dataloader), leave=False)
        for (source_data, source_labels), (target_data, _) in train_loop:
            source_data, source_labels = source_data.to(device), source_labels.to(device)
            target_data = target_data.to(device)

            g_optimizer.zero_grad()
            d_optimizer.zero_grad()

            #TRAIN G

            #Do not accumulate gradients for the Discriminator
            for param in d_model.parameters():
                param.requires_grad = False

            #Train with source
            g_source_output, _, _ = g_model(source_data)
            print(f'gsource: {g_source_output.shape}, {g_source_output.dtype}')
            #g_source_output = interp_s(g_source_output)

            g_loss_seg = g_criterion(g_source_output, source_labels)
            g_loss_seg.backward()

            #Train with target
            g_target_output, _, _ = g_model(target_data)
            print(f'gtarget: {g_target_output.shape}, {g_target_output.dtype}')
            #g_target_output = interp_t(g_target_output)

            d_out = d_model(F.softmax(g_target_output))
            print(f'dout: {d_out.shape}, {d_out.dtype}')

            loss_adv_t = d_criterion(d_out, source_label)
            loss_d = lambda_adv * loss_adv_t
            loss_d.backward()

            #TRAIN D

            ###CONTINUE HERE###


            #Compute the adversarial loss
            d_source_output = d_model(F.softmax(g_source_output))
            print(f'dsource: {d_source_output.shape}, {d_source_output.dtype}')
            d_target_output = d_model(F.softmax(g_target_output))
            print(f'dtarget: {d_target_output.shape}, {d_target_output.dtype}')
            
            d_loss = d_criterion(d_source_output, torch.ones_like(d_source_output)) + d_criterion(d_target_output, torch.zeros_like(d_target_output))
            g_loss = g_loss_seg + lambda_adv * d_loss

            #Backward pass
            g_loss.backward()
            #poly_lr_scheduler(g_optimizer, init_lr=g_initial_lr, iter=epoch, max_iter=n_epochs)
            g_optimizer.step()

            d_loss.backward()
            #poly_lr_scheduler(d_optimizer, init_lr=d_initial_lr, iter=epoch, max_iter=n_epochs)
            d_optimizer.step()

            predictions = torch.argmax(g_source_output, dim=1)

            train_hist += fast_hist(source_labels.cpu().numpy(), predictions.cpu().numpy(), n_classes)
            train_loop.set_description(f'Epoch {epoch+1}/{n_epochs} (Train)')

        train_class_iou = 100*per_class_iou(train_hist)
        train_miou = np.mean(train_class_iou)
        all_train_miou.append(train_miou)

        g_model.eval()
        test_hist = np.zeros((n_classes, n_classes))
        test_loop = tqdm(enumerate(t_dataloader), total=len(t_dataloader), leave=False)
        with torch.no_grad():
            for i, (target_data, target_labels) in test_loop:
                target_data, target_labels = target_data.to(device), target_labels.to(device)

                g_target_output = g_model(target_data)

                predictions = torch.argmax(g_target_output, dim=1)
                test_hist += fast_hist(target_labels.cpu().numpy(), predictions.cpu().numpy(), n_classes)
                test_loop.set_description(f'Epoch {epoch+1}/{n_epochs} (Test)')

        test_class_iou = 100*per_class_iou(test_hist)
        test_miou = np.mean(test_class_iou)
        all_test_miou.append(test_miou)

        #Create a checkpoint dictionary
        checkpoint = {
            'epoch': epoch+1,
            'model_state_dict': g_model.state_dict(),
            'optimizer_state_dict': g_optimizer.state_dict(),
            'train_class_iou': train_class_iou,
            'train_miou': train_miou,
            'test_class_iou': test_class_iou,
            'test_miou': test_miou,
        }

        #torch.save(checkpoint, f'checkpoints/{model_name}_checkpoint_epoch_{epoch+1}.pth')
        
        #Early stopping condition
        if test_miou > best_miou:
            best_miou = test_miou
            best_class_iou = test_class_iou
            best_epoch = epoch
            #torch.save(checkpoint, f'checkpoints/{model_name}_best_epoch_{epoch+1}.pth')

        end = time.time()

        print(f'\nEpoch {epoch+1}/{n_epochs} [{(end-start) // 60:.0f}m {(end-start) % 60:.0f}s]: Train mIoU={train_miou:.2f}%, Test mIoU={test_miou:.2f}%')
        for class_name, iou in zip(class_names, test_class_iou):
            print(f'{class_name}: {iou:.2f}%', end=' ')

    print(f'\nBest mIoU={best_miou:.2f}% at epoch {best_epoch+1}')
    for class_name, iou in zip(class_names, best_class_iou):
        print(f'{class_name}: {iou:.2f}%', end=' ')

    return all_train_miou, all_test_miou, best_epoch

In [11]:
all_train_miou, all_test_miou, best_epoch = train_adversarial(g_model, d_model, g_criterion, d_criterion,
                                                              g_optimizer, d_optimizer, lambda_adv,
                                                              gta5_train_dataloader_aug1, cityscapes_test_dataloader,
                                                              interp_s, interp_t,
                                                              class_names, device, n_epochs, 'BiSeNet_adversarial')

plot_miou_over_epochs(all_train_miou, all_test_miou, best_epoch, 'BiSeNet_adversarial')

  0%|          | 0/376 [00:00<?, ?it/s]

gsource: torch.Size([8, 19, 8, 16]), torch.float32
gtarget: torch.Size([8, 19, 8, 16]), torch.float32


                                       

RuntimeError: Calculated padded input size per channel: (3 x 4). Kernel size: (4 x 4). Kernel size can't be greater than actual input size