In [None]:
import os
import cv2
import torch
import wandb
import numpy as np
import matplotlib.pyplot as plt

from PIL import Image
from dotenv import load_dotenv
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from tqdm.notebook import tqdm

from datasets import GTA5, CityScapes
from models.deeplabv2.deeplabv2 import get_deeplab_v2
from train import train
from utils import (
    poly_lr_scheduler, fast_hist, per_class_iou, compute_flops, 
    get_latency_and_fps, save_results, plot_loss, plot_mIoU, plot_IoU
)

# Load environment variables
load_dotenv()
api_key = os.getenv('WANDB_API_KEY')
wandb.login(key=api_key)

# Set random seed for reproducibility
torch.cuda.manual_seed(42)

# Mapping from class IDs to labels
id_to_label = {
    0: 'road', 1: 'sidewalk', 2: 'building', 3: 'wall', 4: 'fence',
    5: 'pole', 6: 'light', 7: 'sign', 8: 'vegetation', 9: 'terrain',
    10: 'sky', 11: 'person', 12: 'rider', 13: 'car', 14: 'truck',
    15: 'bus', 16: 'train', 17: 'motorcycle', 18: 'bicycle', 255: 'unlabeled'
}


In [None]:
BATCH_SIZE = 6
NC=19
NUM_WORKERS = 8
cityscape_size = (256,512)
GTA5_size = (256,512)
transform_cityscapes_image = transforms.Compose([
    transforms.Resize(cityscape_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
transform_gta5_image = transforms.Compose([
    transforms.Resize(GTA5_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

transform_gta5_label = transforms.Compose([
    transforms.Resize(GTA5_size, interpolation=Image.NEAREST),
    transforms.Lambda(lambda img: torch.tensor(np.array(img, dtype=np.uint8)))
])
transform_cityscapes_label = transforms.Compose([
    transforms.Resize(cityscape_size, interpolation=Image.NEAREST),
    transforms.Lambda(lambda img: torch.tensor(np.array(img, dtype=np.uint8)))
])

cityscapes_train_dataset = CityScapes('./Cityscapes', 'train', transform_image=transform_cityscapes_image , transform_label=transform_cityscapes_label)
cityscapes_test_dataset = CityScapes('./Cityscapes', 'val', transform_image=transform_cityscapes_image , transform_label=transform_cityscapes_label)
GTA5_dataset = GTA5('./GTA5', transform_image=transform_gta5_image , transform_label=transform_gta5_label)


cityscapes_train_dataloader = DataLoader(cityscapes_train_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
cityscapes_test_dataloader = DataLoader(cityscapes_test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
GTA5_dataloader = DataLoader(GTA5_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)



## 2.1 DeepLabV2

In [None]:

loss_fn = torch.nn.CrossEntropyLoss(ignore_index=255)
init_lr = 0.001 #0.0001
model_deeplab = get_deeplab_v2(num_classes=19, pretrain=True, pretrain_model_path='./models/deeplab_resnet_pretrained_imagenet.pth').cuda()
optimizer_deeplab = torch.optim.Adam(model_deeplab.parameters(), lr=init_lr)

deeplab_result = train( model_deeplab,
                        loss_fn, optimizer_deeplab,
                        cityscapes_train_dataloader,
                        GTA5_dataloader, 
                        5,
                        'step2_DeepLabV2')
save_results(model_deeplab, deeplab_result, "deeplab_performance_metrics_2_1", height=256, width=512, iterations=10)
plot_loss(deeplab_result, "DeepLabV2", "step2.1", "CityScapes", "CityScapes")
plot_mIoU(deeplab_result, "DeepLabV2", "step2.1", "CityScapes", "CityScapes")
plot_IoU(deeplab_result, "DeepLabV2", "step2.1", "CityScapes", "CityScapes")

torch.save(model_deeplab.state_dict(), "./checkpoints/deeplab_model.pth")
# model_deeplab.load_state_dict(torch.load("./checkpoints/deeplab_model.pth"))
# model_deeplab.eval()


## 2.2 BiseNet


In [None]:
from tqdm.notebook import tqdm
from models.bisenet.build_bisenet import BiSeNet
model_bisenet = BiSeNet(20, 'resnet18').cuda()
from utils import poly_lr_scheduler, fast_hist, per_class_iou
loss_fn = torch.nn.CrossEntropyLoss(ignore_index=255)
init_lr = 0.001 #0.0001
optimizer_bisenet = torch.optim.Adam(model_bisenet.parameters(), lr=init_lr)
bisenet_result_2_2 = train(model_bisenet, loss_fn, optimizer_bisenet,cityscapes_train_dataloader,cityscapes_test_dataloader, 5,'step2_BiseNet')
save_results(model_bisenet, bisenet_result_2_2, "bisenet_performance_metrics_2_2", height=cityscape_size[0], width=cityscape_size[1], iterations=10)

plot_loss(bisenet_result_2_2, "BiSeNet", "step2.2", "CityScapes", "CityScapes")
plot_mIoU(bisenet_result_2_2, "BiSeNet", "step2.2", "CityScapes", "CityScapes")
plot_IoU(bisenet_result_2_2, "BiSeNet", "step2.2", "CityScapes", "CityScapes")

torch.save(model_bisenet.state_dict(), "./checkpoints/bisenet_model.pth")
# model_bisenet.load_state_dict(torch.load("./checkpoints/bisenet_model.pth"))
# model_bisenet.eval()



## 3.1 Bisenet

In [None]:
from tqdm.notebook import tqdm
from models.bisenet.build_bisenet import BiSeNet

def to_tensor_no_normalize(pil_image):
    # Apply the resize transformation to the image
    # Convert the PIL image to a tensor
    return torch.tensor(np.array(pil_image, dtype=np.uint8))

transform_cityscapes_image = transforms.Compose([
    transforms.Resize(cityscape_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
transform_gta5_image = transforms.Compose([
    transforms.Resize(GTA5_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

transform_gta5_label = transforms.Compose([
    transforms.Resize(GTA5_size, interpolation=Image.NEAREST),
    transforms.Lambda(lambda img: to_tensor_no_normalize(img))
])
transform_cityscapes_label = transforms.Compose([
    transforms.Resize(cityscape_size, interpolation=Image.NEAREST),
    transforms.Lambda(lambda img: to_tensor_no_normalize(img))
])

cityscapes_train_dataset = CityScapes('./Cityscapes', 'train', transform_image=transform_cityscapes_image , transform_label=transform_cityscapes_label)
cityscapes_test_dataset = CityScapes('./Cityscapes', 'val', transform_image=transform_cityscapes_image , transform_label=transform_cityscapes_label)
GTA5_dataset = GTA5('./GTA5', transform_image=transform_gta5_image , transform_label=transform_gta5_label)


cityscapes_train_dataloader = DataLoader(cityscapes_train_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
cityscapes_test_dataloader = DataLoader(cityscapes_test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
GTA5_dataloader = DataLoader(GTA5_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)


model_bisenet_3_1 = BiSeNet(20, 'resnet18').cuda()
from utils import poly_lr_scheduler, fast_hist, per_class_iou
loss_fn = torch.nn.CrossEntropyLoss(ignore_index=255)
init_lr = 0.001 #0.0001
optimizer_bisenet_3_1 = torch.optim.Adam(model_bisenet_3_1.parameters(), lr=init_lr)
bisenet_result_3_1 = train(model_bisenet_3_1, loss_fn, optimizer_bisenet_3_1,GTA5_dataloader,cityscapes_test_dataloader, 50,'step3_BiseNet')
save_results(model_bisenet_3_1, bisenet_result_3_1, "bisenet_performance_metrics_3_1", height=GTA5_size[0], width=GTA5_size[1], iterations=10)
plot_loss(bisenet_result_3_1, "BiSeNet", "step3.1", "GTA5", "CityScapes")
plot_mIoU(bisenet_result_3_1, "BiSeNet", "step3.1", "GTA5", "CityScapes")
plot_IoU(bisenet_result_3_1, "BiSeNet", "step3.1", "GTA5", "CityScapes")

torch.save(model_bisenet_3_1.state_dict(), "./checkpoints/bisenet_model_3_1.pth")
# model_bisenet_3_1.load_state_dict(torch.load("./checkpoints/bisenet_model_3_1.pth"))
# model_bisenet_3_1.eval()


## 3.2 Bisenet with augmentation



In [None]:
import albumentations as A
from tqdm.notebook import tqdm
from models.bisenet.build_bisenet import BiSeNet


augmentations = {
    'transform1': A.Compose([
        A.Resize(GTA5_size),
        A.HorizontalFlip(p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]),
    'transform2': A.Compose([
        A.Resize(GTA5_size),
        A.HueSaturationValue(p=0.5),
        A.GaussianBlur(kernel_size=(5, 5), sigma=(0.1, 1), p=0.5),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]),
    'transform3': A.Compose([
        A.Resize(GTA5_size),
        A.HorizontalFlip(p=0.5),
        A.GaussianBlur(kernel_size=(5, 5), sigma=(0.1, 1), p=0.5),
        A.GaussNoise(p=0.5),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]),
    'transform4': A.Compose([
        A.Resize(GTA5_size),
        A.HorizontalFlip(p=0.5),
        A.HueSaturationValue(p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]),
    'transform5': A.Compose([
        A.Resize(GTA5_size),
        A.GaussianBlur(kernel_size=(5, 5), sigma=(0.1, 1), p=0.5),
        A.GaussNoise(p=0.5),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]),
    'transform6': A.Compose([
        A.Resize(GTA5_size),
        A.HorizontalFlip(p=0.5),
        A.GaussNoise(p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.HueSaturationValue(p=0.5),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
}

best_score = 0
best = ''
for key, value in augmentations.items():
    cityscapes_train_dataset = CityScapes('./Cityscapes', 'train', transform_image=transform_cityscapes_image , transform_label=transform_cityscapes_label)
    cityscapes_test_dataset = CityScapes('./Cityscapes', 'val', transform_image=transform_cityscapes_image , transform_label=transform_cityscapes_label)
    GTA5_dataset = GTA5('./GTA5', transform_image=transform_gta5_image , transform_label=transform_gta5_label, augmentations=value)


    cityscapes_train_dataloader = DataLoader(cityscapes_train_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
    cityscapes_test_dataloader = DataLoader(cityscapes_test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
    GTA5_dataloader = DataLoader(GTA5_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
    
    model_bisenet_3_2 = BiSeNet(20, 'resnet18').cuda()
    init_lr = 0.001 #0.0001
    loss_fn = torch.nn.CrossEntropyLoss(ignore_index=255)
    optimizer = torch.optim.Adam(model_bisenet_3_2.parameters(), lr=init_lr)

    bisenet_result3_2 = train(model_bisenet_3_2, loss_fn, optimizer,GTA5_dataloader,cityscapes_test_dataloader, 50,'step3_BiseNet')
    
        
    save_results(model_bisenet_3_2, bisenet_result3_2, "bisenet_performance_metrics_3_1_{key}", height=cityscape_size[0], width=cityscape_size[1], iterations=10)
    plot_loss(bisenet_result3_2, "BiSeNet", "step3.1", "GTA5", "CityScapes")
    plot_mIoU(bisenet_result3_2, "BiSeNet", "step3.1", "GTA5", "CityScapes")
    plot_IoU(bisenet_result3_2, "BiSeNet", "step3.1", "GTA5", "CityScapes")
    torch.save(model_bisenet_3_2.state_dict(), f"./checkpoints/bisenet_model_3_1_{key}.pth")
    if best_score < bisenet_result3_2[3][-1]:
        best_score = bisenet_result3_2[3][-1]
        best = key
best        

# 4.1 FDA

In [None]:
GTA5_dataset = GTA5('./GTA5', augmentations=augmentations[best], FDA = 0.09)
GTA5_dataloader = DataLoader(GTA5_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
model_bisenet_4_1_FDA = BiSeNet(20, 'resnet18').cuda()
init_lr = 0.001 #0.0001
loss_fn = torch.nn.CrossEntropyLoss(ignore_index=255)
optimizer = torch.optim.Adam(model_bisenet_4_1_FDA.parameters(), lr=init_lr)

bisenet_result4_1_FDA = train(model_bisenet_4_1_FDA, loss_fn, optimizer,GTA5_dataloader,cityscapes_test_dataloader, 50,'step3_BiseNet')

    
save_results(model_bisenet_4_1_FDA, bisenet_result4_1_FDA, "bisenet_performance_metrics_4_1_FDA", height=cityscape_size[0], width=cityscape_size[1], iterations=10)
plot_loss(bisenet_result4_1_FDA, "BiSeNet", "step4.1_FDA", "GTA5", "CityScapes")
plot_mIoU(bisenet_result4_1_FDA, "BiSeNet", "step4.1_FDA", "GTA5", "CityScapes")
plot_IoU(bisenet_result4_1_FDA, "BiSeNet", "step4.1_FDA", "GTA5", "CityScapes")
torch.save(model_bisenet_4_1_FDA.state_dict(), f"./checkpoints/bisenet_model_4_1_FDA.pth")
