In [None]:
import torch
import torchvision
import torchvision.transforms as transform
import PIL
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm
import os
import cv2
import numpy as np
from torch.utils.data import Dataset
import matplotlib.colors as mcolors

In [None]:
import zipfile


zip_file_path = '/home/ubuntu/Cityscapes.zip'

extraction_path = '/home/ubuntu/Cityscapes/'

# Open the zip file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    # Extract all the contents of the zip file to the specified directory
    zip_ref.extractall(extraction_path)





In [None]:
dir_truth = "/home/ubuntu/Cityscapes/gtFine"
dir_input = "/home/ubuntu/Cityscapes/leftImg8bit/"

In [None]:
from PIL import Image

# Target size of each sample in the dataset
sample_size = (512, 256)

# Directories for preprocessed datasets
dir_truth_pp, dir_input_pp = (f'{d}_{sample_size[0]}_{sample_size[1]}' for d in (dir_truth, dir_input))

# Run preprocessing
for dir_full, dir_pp in ((dir_truth, dir_truth_pp), (dir_input, dir_input_pp)):
    # Check if the directory already exists
    if os.path.isdir(dir_pp):
        print(f'Preprocessed directory already exists: {dir_pp}')
        continue

    print(f'Preprocessing: {dir_full}')

    # Walk though the directory and preprocess each file 
    for root,_,files in  os.walk( dir_full ):
        if len(files) == 0:
            continue

        print(f'Preprocessing sub-directory: {root.replace(dir_full, "")}')

        # Create the directory in the preprocessed set
        root_pp = root.replace(dir_full, dir_pp)
        os.makedirs(root_pp, exist_ok=True)

        for f in files:
            if not f.endswith('.png'):
                continue

            # Resize and save PNG image
            path_original = os.path.join(root,f)
            img_resized = Image.open(path_original).resize(sample_size, Image.NEAREST)
            img_resized.save(path_original.replace(dir_full, dir_pp), 'png', quality=100)

print(f'Preprocessing done')

In [None]:
from torchvision.datasets import Cityscapes 


# train_dataset = Cityscapes('D:/Academics/TUe_Modules/Avular/AED/Datasets/Cityscapes/Cityscapes', split='train', mode='fine',
                    #  target_type='semantic')

val_dataset  = Cityscapes('D:/Academics/TUe_Modules/Avular/AED/Datasets/Cityscapes/Cityscapes',split='val', mode='fine', target_type='semantic')


# test_dataset = Cityscapes('/home/ubuntu/Cityscapes/Cityscapes',split='test', mode='fine', target_type='semantic')

print(val_dataset)

In [None]:
custom_colormap = mcolors.ListedColormap(['#000000', '#FF0000'])

# Define a function to display an image and its mask
def display_image_mask(image, mask,colormap):
    plt.figure(figsize=(10, 5))
    
    # Display the original image
    plt.subplot(1, 2, 1)
    plt.imshow(image)
    plt.title('Image')
    
    # Display the segmentation mask
    plt.subplot(1, 2, 2)
    plt.imshow(mask, cmap=colormap, vmin=0, vmax=1)  # Adjust vmin and vmax as needed
    plt.title('Segmentation Mask')
    
    plt.tight_layout()
    plt.show()


def apply_colormap(mask, colormap):
    # Create an RGB image from the mask using the colormap
    height, width, num_classes = mask.shape
    colorized_mask = np.zeros((height, width, 3), dtype=np.uint8)
    
    for class_id, color in colormap.items():
        class_pixels = (mask == class_id)
        colorized_mask[class_pixels] = color
    
    return colorized_mask


In [None]:
class CityscapesSearchDataset(torchvision.datasets.Cityscapes):
    def __init__(self, *args,augmentation=None, preprocessing = None,**kwargs):
        super().__init__(*args, **kwargs)
        self.semantic_target_type_index = [i for i, t in enumerate(self.target_type) if t == "semantic"][0]
        self.colormap = self._generate_colormap()
        self.augmentation = augmentation
        self.preprocessing = preprocessing

    def _generate_colormap(self):
        colormap = {}
        for class_ in self.classes:
            if class_.train_id in (-1, 255):
                continue
            colormap[class_.train_id] = class_.id
        return colormap

    # def _convert_to_segmentation_mask(self, mask):
    #     height, width = mask.shape[:2]
    #     segmentation_mask = np.zeros((height, width, len(self.colormap)), dtype=np.float32)
    #     for label_index, label in self.colormap.items():
    #         segmentation_mask[:, :, label_index] = (mask == label).astype(float)
    #     return segmentation_mask

    
    def _convert_to_segmentation_mask(self, mask):
        height, width = mask.shape[:2]
        road_class_id = 7  # Replace with the actual class ID for 'road' in your dataset
        road_mask = (mask == road_class_id).astype(np.float32)  # Assuming 7 is the class ID for 'road'
        road_mask = np.expand_dims(road_mask, axis=-1)
        return road_mask
    
    def __getitem__(self, index):
        image = cv2.imread(self.images[index])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        mask = cv2.imread(self.targets[index][self.semantic_target_type_index], cv2.IMREAD_UNCHANGED)
        # print("image",image.shape)
        mask = self._convert_to_segmentation_mask(mask)
        
        # print("mask",mask.shape)
        
        

        if self.augmentation:
            sample = self.augmentation(image=image, mask=mask)
            image, mask = sample['image'], sample['mask']
        
        # apply preprocessing
        if self.preprocessing:
            sample = self.preprocessing(image=image, mask=mask)
            image, mask = sample['image'], sample['mask']

        return image, mask

In [None]:
custom_dataset = CityscapesSearchDataset(
    root='D:/Academics/TUe_Modules/Avular/AED/Datasets/Cityscapes/Cityscapes',  # Replace with the actual path to your dataset
    split='train',
    mode='fine',
    target_type='semantic',
)



for i in range(5):  # Change the range to the number of samples you want to visualize
    image, mask = custom_dataset[i]
    display_image_mask(image, mask,custom_colormap)


In [None]:
import albumentations as albu


def get_training_augmentation():


    train_transform = [

        albu.HorizontalFlip(p=0.5),

        albu.ShiftScaleRotate(scale_limit=0.5, rotate_limit=0, shift_limit=0.1, p=1, border_mode=0),

        albu.PadIfNeeded(min_height=320, min_width=320, always_apply=True, border_mode=0),
        albu.RandomCrop(height=320, width=320, always_apply=True),
        #albu.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=0.9),

        albu.GaussNoise(p=0.2),
        albu.Perspective(p=0.5),
            

        albu.OneOf(
            [
                albu.CLAHE(p=1),
                albu.RandomBrightness(p=1),
                albu.RandomGamma(p=1),
            ],
            p=0.9,
        ),

        albu.OneOf(
            [
                albu.Sharpen(p=1),
                albu.Blur(blur_limit=3, p=1),
                albu.MotionBlur(blur_limit=3, p=1),
            ],
            p=0.9,
        ),

        albu.OneOf(
            [
                albu.RandomBrightnessContrast(p=1),
                albu.HueSaturationValue(p=1),
            ],
            p=0.9,
        ),
        # albu.Lambda(image=print_shape, mask= print_shape),
    ]
    return albu.Compose(train_transform)

    



def get_validation_augmentation():
    """Add paddings to make image shape divisible by 32"""
    test_transform = [
        albu.PadIfNeeded(384, 480),
        # albu.Lambda(image=print_shape, mask= print_shape)
    ]
    return albu.Compose(test_transform)


def to_tensor(x, **kwargs):
    return x.transpose(2, 0, 1).astype('float32')


def get_preprocessing(preprocessing_fn):
    """Construct preprocessing transform
    
    Args:
        preprocessing_fn (callable): data normalization function 
            (can be specific for each pretrained neural network)
    Return:
        transform: albumentations.Compose
    
    """
    
    _transform = [
        albu.Lambda(image=preprocessing_fn),
        # albu.Lambda(image=print_shape, mask= print_shape),
        albu.Lambda(image=to_tensor, mask=to_tensor),
        
    ]
    return albu.Compose(_transform)


In [None]:
custom_dataset = CityscapesSearchDataset(
    root='D:/Academics/TUe_Modules/Avular/AED/Datasets/Cityscapes/Cityscapes',  # Replace with the actual path to your dataset
    split='train',
    mode='fine',
    target_type='semantic',
    augmentation= get_training_augmentation()
)


for i in range(5):  # Change the range to the number of samples you want to visualize
    image, mask = custom_dataset[i]
    display_image_mask(image, mask,custom_colormap)

Model training

In [None]:
import torch
import numpy as np
import segmentation_models_pytorch as smp

In [None]:
ENCODER = 'resnet50'
ENCODER_WEIGHTS = 'imagenet'
CLASSES = ['road']
ACTIVATION = 'sigmoid' # could be None for logits or 'softmax2d' for multiclass segmentation
DEVICE = 'cuda'

# @PipelineDecorator.component(return_values=['Training'])
aux_params=dict(
    pooling='avg',             # one of 'avg', 'max'
    dropout=0.5,               # dropout ratio, default is None
    activation=ACTIVATION,      # activation function, default is None
    classes=len(CLASSES),
                                   # define number of output labels
)

# create segmentation model with pretrained encoder
model = smp.Unet(
    encoder_name=ENCODER, 
    encoder_weights=ENCODER_WEIGHTS, 
    classes=len(CLASSES), 
    activation=ACTIVATION,
    # aux_params=aux_params
    
)

preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)

In [None]:
from torch.utils.data import DataLoader


train_dataset = CityscapesSearchDataset(
    root='D:/Academics/TUe_Modules/Avular/AED/Datasets/Cityscapes/Cityscapes',  # Replace with the actual path to your dataset
    split='train',
    mode='fine',
    target_type='semantic',
    augmentation=get_training_augmentation(), 
    preprocessing=get_preprocessing(preprocessing_fn),
    
)

valid_dataset = CityscapesSearchDataset(
    root='D:/Academics/TUe_Modules/Avular/AED/Datasets/Cityscapes/Cityscapes',  # Replace with the actual path to your dataset
    split='val',
    mode='fine',
    target_type='semantic',
    augmentation=get_validation_augmentation(), 
    preprocessing=get_preprocessing(preprocessing_fn),
    
)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=4, shuffle=False)

In [None]:
from segmentation_models_pytorch import utils


loss = smp.utils.losses.DiceLoss()

# loss = smp.losses.SoftBCEWithLogitsLoss()
# loss.__name__ = 'soft_bce'

# loss = smp.losses.TverskyLoss(mode = 'binary', from_logits= True)   
# loss.__name__ = 'Twersky_Loss'

# loss = smp.losses.LovaszLoss(mode = 'binary', from_logits=True)   
# loss.__name__ = 'Lovasz_Loss'

metrics = [
    smp.utils.metrics.IoU(threshold=0.5),
]

optimizer = torch.optim.Adam([ 
    dict(params=model.parameters(), lr=0.0001),
    # torch.optim.SGD( dict(params=model.parameters(), lr=0.1,momentum=0.9,weight_decay=0.0005))
])


In [None]:
# create epoch runners 

train_epoch = smp.utils.train.TrainEpoch(
    model, 
    loss=loss, 
    metrics=metrics, 
    optimizer=optimizer,
    device=DEVICE,
    verbose=True,
)

valid_epoch = smp.utils.train.ValidEpoch(
    model, 
    loss=loss, 
    metrics=metrics, 
    device=DEVICE,
    verbose=True,
)

In [None]:
max_score = 0

# logger = task.get_logger()
for i in range(0, 50):
    
    print('\nEpoch: {}'.format(i))
    train_logs = train_epoch.run(train_loader)
    valid_logs = valid_epoch.run(valid_loader)
    print(train_logs)

    # logger.report_scalar('Dice_loss train', 'model_output', iteration=i, value=train_logs['Dice_loss'])
    # logger.report_scalar('IoU score train', 'model_output', iteration=i, value=train_logs['iou_score'])
    # logger.report_scalar('Dice_loss valid', 'model_output', iteration=i, value=valid_logs['Dice_loss'])
    # logger.report_scalar('IoU score valid', 'model_output', iteration=i, value=valid_logs['iou_score'])
    
    
    # do something (save model, change lr, etc.)
    if max_score < valid_logs['iou_score']:
        max_score = valid_logs['iou_score']
        torch.save(model, './best_model.pth')
        print('Model saved!')
        
    if i == 40:
        optimizer.param_groups[0]['lr'] = 1e-5
        print('Decrease decoder learning rate to 1e-5!')