In [None]:
import os
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.model_selection import train_test_split
import cv2
import torch
import torch.nn as nn
from torchvision.transforms import v2
from torch.autograd import Variable
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, Subset, DataLoader
from torchvision import transforms
from ultralytics import YOLO
from torch.cuda.amp import autocast, GradScaler
import albumentations as A
from albumentations.pytorch import ToTensorV2

# Preprocessing

In [None]:
current_dir = os.getcwd()
print(f"Current working directory: {current_dir}")

base_path = os.path.join(current_dir, "input_data", "dataset", "semantic_drone_dataset")
IMAGE_PATH = os.path.join(base_path, "original_images")
TARGET_PATH = os.path.join(base_path, "label_images_semantic")
COLOR_TARGET_PATH = os.path.join(current_dir,"input_data", "RGB_color_image_masks")
CSV_PATH = os.path.join(current_dir, "input_data", "class_dict_seg.csv")

class_dict = pd.read_csv(CSV_PATH)

In [None]:
num_classes = len(class_dict) - 1
name=[]
masks = []

for dir_name, _, filenames in os.walk(IMAGE_PATH):
    for filename in filenames:
        name.append(filename.split('.')[0])
        
for dir_name, _, filenames in os.walk(TARGET_PATH):
    for filename in filenames:
        masks.append(filename.split('.')[0])

name.sort()
masks.sort()

print('IMAGE {}, MASK {}'.format(len(masks),len(name)))

In [None]:
def visualize_image_and_mask(image_path, mask_path, alpha=0.5):
    """
    Display an image and its corresponding mask side by side.
    
    Args:
    image_path (str): Path to the original image file.
    mask_path (str): Path to the mask image file.
    alpha (float): Transparency of the mask overlay. Range 0-1.
    """
    # Read the original image using cv2
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB

    # Read the mask using PIL
    mask = Image.open(mask_path)
    mask = np.array(mask)

    # Create a figure with two subplots side by side
    fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 5))

    # Display the original image
    ax1.imshow(img)
    ax1.set_title('Original Image')
    ax1.axis('off')

    # Display the mask
    ax2.imshow(mask)
    ax2.set_title('Mask')
    ax2.axis('off')

    # Display the original image with the mask overlay
    ax3.imshow(img)
    ax3.imshow(mask, alpha=alpha)
    ax3.set_title(f'Image with Mask Overlay (alpha={alpha})')
    ax3.axis('off')

    plt.tight_layout()
    plt.show()

visualize_image_and_mask(IMAGE_PATH+"/"+name[0]+'.jpg',TARGET_PATH + "/"+masks[0]+'.png')

# DataLoaders

In [None]:
n_classes = 23 

def create_df():
    name = []
    for dirname, _, filenames in os.walk(IMAGE_PATH):
        for filename in filenames:
            name.append(filename.split('.')[0])
    
    return pd.DataFrame({'id': name}, index = np.arange(0, len(name)))

df = create_df()
print('Total Images: ', len(df))
X_trainval , X_test = train_test_split(df['id'].values,test_size = 0.1,random_state = 42)
X_train , X_val = train_test_split(X_trainval , test_size = 0.2 , random_state = 42)
mean=[0.485, 0.456, 0.406]
std=[0.229, 0.224, 0.225]


class AerialDataset(Dataset):
    def __init__(self, img_path, mask_path, X, mean=None, std=None, transform=None, is_train=True):
        self.img_path = img_path
        self.X = X
        self.mean = mean
        self.std = std
        self.mask_path = mask_path
        self.transform = transform
        self.is_train = is_train

        # Define default augmentations if no transform is provided
        if self.transform is None:
            self.transform = self.get_default_transform(is_train)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        # Load image using cv2
        img = cv2.imread(os.path.join(self.img_path, self.X[idx] + '.jpg'))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Load mask using cv2
        mask = cv2.imread(os.path.join(self.mask_path, self.X[idx] + '.png'), cv2.IMREAD_GRAYSCALE)

        # Apply transformations
        transformed = self.transform(image=img, mask=mask)
        img = transformed['image']
        mask = transformed['mask']

        # Convert mask to tensor
        # Convert mask to long tensor if it's not already a tensor
        if not isinstance(mask, torch.Tensor):
            mask = torch.from_numpy(mask).long()
        else:
            mask = mask.long()

        return img, mask

    def get_default_transform(self, is_train):
        if is_train:
            return A.Compose([
                A.Resize(1000, 1500, interpolation=cv2.INTER_NEAREST),
                A.HorizontalFlip(), 
                A.VerticalFlip(), 
                A.GridDistortion(p=0.5),
                A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=45, p=0.5),
                A.OpticalDistortion(distort_limit=0.1, shift_limit=0.1, p=0.5),
                A.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.5),
                A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=0.5),
                A.GaussNoise(var_limit=(10.0, 50.0), p=0.5),
                A.Normalize(mean=self.mean, std=self.std) if self.mean and self.std else A.Normalize(),
                ToTensorV2(),
            ])
        else:
            return A.Compose([
                A.Resize(1000, 1500, interpolation=cv2.INTER_NEAREST),
                A.Normalize(mean=self.mean, std=self.std) if self.mean and self.std else A.Normalize(),
                ToTensorV2(),
            ])


#datasets
train_set = AerialDataset(IMAGE_PATH, TARGET_PATH, X_train,mean,std,is_train=True)
val_set = AerialDataset(IMAGE_PATH, TARGET_PATH, X_val,mean,std)
test_set = AerialDataset(IMAGE_PATH, TARGET_PATH, X_test,mean,std)

#dataloader
batch_size= 5
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, pin_memory=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=True,pin_memory=True) 
test_loader = DataLoader(test_set, batch_size=4, shuffle=False) 

print("Number of batches in the train data loader:", len(train_loader))
print("Number of batches in the valid data loader:", len(val_loader))
print("Number of batches in the test data loader:", len(test_loader))



# Custom Encoder-Decoder Architecture

In [None]:
def initialize_weights(module):
    if isinstance(module, nn.Conv2d):
        # Use Kaiming initialization for Conv2d layers
        nn.init.kaiming_normal_(module.weight, mode='fan_out', nonlinearity='relu')
        if module.bias is not None:
            nn.init.constant_(module.bias, 0)
    elif isinstance(module, nn.BatchNorm2d):
        nn.init.constant_(module.weight, 1)
        nn.init.constant_(module.bias, 0)

class OutConv(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(OutConv, self).__init__()
        self.conv = torch.nn.Conv2d(in_channels, out_channels, kernel_size=1)

    def forward(self, x):
        return self.conv(x)
    
class DoubleConv(torch.nn.Module):
    """(convolution => [BN] => ReLU) * 2"""

    def __init__(self, in_channels, out_channels):
        super().__init__()
        mid_channels = out_channels // 2  # Adjust the mid_channels based on out_channels
        self.double_conv = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1),
            torch.nn.BatchNorm2d(mid_channels),
            torch.nn.ReLU(inplace=True),
            torch.nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1),
            torch.nn.BatchNorm2d(out_channels),
            torch.nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)
    
class Up(torch.nn.Module):
    def __init__(self, in_channels, skip_channels, out_channels):
        super(Up, self).__init__()
        self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
        self.conv = DoubleConv(in_channels + skip_channels, out_channels)
        self.residual = nn.Conv2d(in_channels + skip_channels, out_channels, kernel_size=1)

    def forward(self, x, skip=None):
        x = self.up(x)
        if skip is not None:
            x = torch.nn.functional.interpolate(x, size=skip.shape[2:], mode='bilinear', align_corners=True)
            x = torch.cat([x, skip], dim=1)
        return self.conv(x) + self.residual(x)

In [None]:
class YoloEncoder(torch.nn.Module):
    def __init__(self, yolov5_model):
        super(YoloEncoder, self).__init__()
        self.yolov5_model = yolov5_model
        #self.apply(initialize_weights) #Comment when using with transfer learning
    
    def forward(self, x):
        features = []
        for i, layer in enumerate(self.yolov5_model.model):
            if isinstance(layer, nn.Module):
                if isinstance(layer, (nn.Sequential, nn.ModuleList)):
                    # For sequential or module list, process each sub-layer
                    for sub_layer in layer:
                        x = sub_layer(x)
                elif hasattr(layer, 'forward'):
                    # Check if the layer expects a list input
                    if 'Concat' in layer.__class__.__name__:
                        x = layer([x, features[-1]])  # Assuming it concatenates with the previous feature
                    else:
                        x = layer(x)
                features.append(x)
            if i == 23:  # Adjust this if you need features from different layers
                break
        return x, features

class UNetWithYoloEncoder(torch.nn.Module):
    def __init__(self, yolov5_encoder, n_classes):
        super(UNetWithYoloEncoder, self).__init__()
        self.encoder = yolov5_encoder
        self.up1 = Up(512, 512, 256)  # input = layer 23 output
        self.up2 = Up(256, 256, 128)  # layer 20
        self.up3 = Up(128, 128, 64)   # layer 17
        self.up4 = Up(64, 256, 128)   # layer 13
        self.up5 = Up(128, 512, 256)  # layer 9
        self.up6 = Up(256, 256, 128)  # layer 6
        self.up7 = Up(128, 128, 64)   # layer 4
        self.up8 = Up(64, 64, 32)     # layer 2
        self.out_conv = torch.nn.Conv2d(32, n_classes, kernel_size=1)  # Update input channels
        self.apply(self._init_decoder_weights)

    def _init_decoder_weights(self, m):
        if isinstance(m, (Up, OutConv)):
            initialize_weights(m)

    def forward(self, x):
        input_size = x.shape[2:]
        x,features = self.encoder(x)
        # Follow layers given above
        x = self.up1(x,features[23])
        x = self.up2(x, features[20])
        x = self.up3(x, features[17])
        x = self.up4(x, features[13])
        x = self.up5(x, features[9])
        x = self.up6(x, features[6])
        x = self.up7(x, features[4])
        x = self.up8(x, features[2])
        
        x = self.out_conv(x)
        x = F.interpolate(x, size=input_size, mode='bilinear', align_corners=True)
        return x

def pixel_accuracy(output, mask):
    with torch.no_grad():
        output = torch.argmax(F.softmax(output, dim=1), dim=1)
        correct = torch.eq(output, mask).int()
        accuracy = float(correct.sum()) / float(correct.numel())
    return accuracy

def mIoU(pred_mask, mask, smooth=1e-10, n_classes=23):
    with torch.no_grad():
        pred_mask = F.softmax(pred_mask, dim=1)
        pred_mask = torch.argmax(pred_mask, dim=1)
        pred_mask = pred_mask.contiguous().view(-1)
        mask = mask.contiguous().view(-1)

        iou_per_class = []
        for clas in range(0, n_classes): #loop per pixel class
            true_class = pred_mask == clas
            true_label = mask == clas

            if true_label.long().sum().item() == 0: #no exist label in this loop
                iou_per_class.append(np.nan)
            else:
                intersect = torch.logical_and(true_class, true_label).sum().float().item()
                union = torch.logical_or(true_class, true_label).sum().float().item()

                iou = (intersect + smooth) / (union +smooth)
                iou_per_class.append(iou)
        return np.nanmean(iou_per_class)

class DiceLoss(nn.Module):
    def __init__(self, smooth=1e-5):
        super(DiceLoss, self).__init__()
        self.smooth = smooth

    def forward(self, predictions, targets):
        # Ensure predictions and targets have the same shape
        assert predictions.shape == targets.shape, f"Predictions shape {predictions.shape} doesn't match targets shape {targets.shape}"
        
        # Flatten predictions and targets
        predictions = predictions.reshape(-1)
        targets = targets.reshape(-1)

        intersection = (predictions * targets).sum()
        dice = (2. * intersection + self.smooth) / (predictions.sum() + targets.sum() + self.smooth)
        return 1 - dice

class CombinedLoss(nn.Module):
    def __init__(self, weight=0.5, num_classes=23):
        super(CombinedLoss, self).__init__()
        self.weight = weight
        self.dice_loss = DiceLoss()
        self.ce_loss = nn.CrossEntropyLoss()
        self.num_classes = num_classes

    def forward(self, predictions, targets):
        # Convert predictions to probabilities
        pred_probs = F.softmax(predictions, dim=1)
        
        # Create one-hot encoded target
        targets_one_hot = F.one_hot(targets.squeeze(1).long(), num_classes=self.num_classes).permute(0, 3, 1, 2).float()
        
        # Calculate Dice loss for each class and average
        dice_loss = 0
        for i in range(self.num_classes):
            dice_loss += self.dice_loss(pred_probs[:, i], targets_one_hot[:, i])
        dice_loss /= self.num_classes

        ce = self.ce_loss(predictions, targets.squeeze(1).long())
        # print(f"Dice Loss: {dice_loss.item()}, CE Loss: {ce.item()}")
        return (self.weight * dice_loss) + ((1 - self.weight) * ce)

# Training

In [None]:
if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")
print(f"Device: {device} ")

In [None]:
class EarlyStopping:
    def __init__(self, patience=7, verbose=False, delta=0.0001, path='YoloUnetV4.pt'):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path

    def __call__(self, val_loss, model):
        score = -val_loss
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model to {self.path} ...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss

In [None]:
# Visualization code for images, target masks, and predictions masks

class_names = class_dict['name'].tolist()

def create_color_map(class_dict):
    color_map = {}
    for _, row in class_dict.iterrows():
        class_name = row['name']
        rgb = [row[' r'] / 255.0, row[' g'] / 255.0, row[' b'] / 255.0]  # Normalize RGB values to [0, 1]
        color_map[class_name] = rgb
    return color_map

color_map = create_color_map(class_dict)

def apply_color_map(mask, color_map, class_dict):
    rgb_mask = np.zeros((*mask.shape, 3))
    for class_name, color in color_map.items():
        class_index = class_dict[class_dict['name'] == class_name].index[0]
        rgb_mask[mask == class_index] = color
    return rgb_mask


def plot_results(images, true_masks, pred_masks, n_classes, color_map, class_dict, batch_size=12):
    fig, axes = plt.subplots(3, 3, figsize=(15, 15))
    
    # Loop thorugh first 3 images in batch (change depending on batch size)
    for i in range(3):
        # Display input image
        img = images[i].permute(1, 2, 0).cpu().numpy()
        img = (img - img.min()) / (img.max() - img.min())  # Normalize to [0, 1]
        axes[i, 0].imshow(img)
        axes[i, 0].set_title('Input Image')
        
        # Display true mask
        true_mask = true_masks[i].cpu().numpy()
        true_mask_rgb = apply_color_map(true_mask, color_map, class_dict)
        axes[i, 1].imshow(true_mask_rgb)
        axes[i, 1].set_title('True Mask')
        
        # Display predicted mask
        pred_mask = pred_masks[i].cpu().numpy()
        pred_mask_rgb = apply_color_map(pred_mask, color_map, class_dict)
        axes[i, 2].imshow(pred_mask_rgb)
        axes[i, 2].set_title('Predicted Mask')
        
    plt.tight_layout()
    plt.show()

def visualize_color_map(color_map, class_dict):
    num_classes = len(color_map)
    fig, ax = plt.subplots(figsize=(12, num_classes * 0.5))
    
    for i, (class_name, color) in enumerate(color_map.items()):
        rect = plt.Rectangle((0, i), 1, 1, facecolor=color)
        ax.add_patch(rect)
        ax.text(1.1, i + 0.5, class_name, va='center')
    
    ax.set_xlim(0, 2)
    ax.set_ylim(0, num_classes)
    ax.axis('off')
    plt.title('Color Map Verification')
    plt.tight_layout()
    plt.show()

# Create the color map
color_map = create_color_map(class_dict)

# Visualize the color map
visualize_color_map(color_map, class_dict)

# Print out the RGB values for each class
print("Class Name: (R, G, B)")
for class_name, color in color_map.items():
    r, g, b = [int(c * 255) for c in color]  # Convert back to 0-255 range for readability
    print(f"{class_name}: ({r}, {g}, {b})")

In [None]:
# Initilize Models and Criteria
num_classes = len(class_dict) - 1

# YOLO-Encoder
# yolov5_model = YOLO('yolov5su.pt')  # or any other YOLOv5 model variant
yolov5_model = torch.load(os.getcwd()+'/yolov5su.pt', map_location=device)['model']
yolov5_model = yolov5_model.to(torch.float32)  # Convert to float32
yolo_encoder = YoloEncoder(yolov5_model).to(device)

# UNET w/ Encoder
model = UNetWithYoloEncoder(yolo_encoder, n_classes=num_classes)
model.to(device)
# for param in model.encoder.parameters():
#     param.requires_grad = False

# Criteria
loss_fn = CombinedLoss(0.75) # Set higher to give more weight to dice loss component, lower gives more weight to cross entropy
optimizer = optim.Adam(model.parameters(), lr=0.001)
# optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)
# scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,mode='min', patience=3,factor=0.1)
early_stopping = EarlyStopping(patience=8, verbose=True,path='YoloUnetV7.pt')
scaler = GradScaler()
num_epochs = 75  # Set the number of epochs

train_losses = []
val_losses = []
val_iou = []; val_acc = []
train_iou = []; train_acc = []

torch.cuda.empty_cache()
for epoch in range(num_epochs):
    epoch_start_time = time.time()

    model.train()  
    train_loss = 0.0
    iou_score = 0
    train_accuracy = 0

    for i, (inputs, masks) in enumerate(train_loader): 
        images, masks = inputs.to(device), masks.to(device)  # Send inputs to GPU if available
        # optimizer.zero_grad()
        optimizer.zero_grad(set_to_none=True)

        # predictions = model(images)
        # # loss_masks =  masks.squeeze(1)
        # loss = loss_fn(predictions,masks) 
        with autocast():
            predictions = model(images)
            loss = loss_fn(predictions, masks) 

        # loss.backward()
        # # nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        # optimizer.step()
        
        # Use the scaler to scale the loss and call backward
        scaler.scale(loss).backward()
        # Unscale gradients and call or skip optimizer.step()
        scaler.step(optimizer)
        scaler.update()

        train_loss += loss.item()
        with torch.no_grad():
            iou_score += mIoU(predictions, masks)
            train_accuracy += pixel_accuracy(predictions, masks)        

    epoch_loss = train_loss / len(train_loader)

    # Start validation
    model.eval()  # Set the model to evaluation mode
    val_loss = 0.0
    val_accuracy = 0
    val_iou_score = 0
    with torch.no_grad():  # No gradient computation in validation phase
        for i, (val_inputs, val_masks) in enumerate(val_loader):
            val_inputs, val_masks = val_inputs.to(device), val_masks.to(device)
            # val_loss_masks = val_masks.squeeze(1).long()

            with autocast():
                val_predictions = model(val_inputs)
                loss = loss_fn(val_predictions, val_masks)

            val_iou_score +=  mIoU(val_predictions,val_masks)
            val_accuracy += pixel_accuracy(val_predictions, val_masks)
            val_loss += loss.item()
        
            # Visualization and printing code...
            # if epoch % 10 == 0 and i % 20== 0:
            #     # Convert the batch to CPU and detach it
            #     inputs_cpu = images.cpu().detach()
            #     masks_cpu = masks.cpu().detach()
            #     predictions_cpu = predictions.cpu().detach()
            #     pred_masks = torch.argmax(predictions_cpu, dim=1)
            #     plot_results(inputs_cpu,masks_cpu,pred_masks,num_classes,color_map,class_dict)
            #     # Check Gradient Norm
            #     total_norm = 0
            #     for p in model.parameters():
            #         if p.grad is not None:
            #             param_norm = p.grad.data.norm(2)
            #             total_norm += param_norm.item() ** 2
            #     total_norm = total_norm ** 0.5
            #     print(f"Gradient norm: {total_norm}")
            #     # print(f"Batch {i}, Epoch {epoch+1}")
            #     # # print(f"Mask min and max: {masks.min()}, {masks.max()}")
            #     # print(f"Unique values in masks: {torch.unique(masks)}")
            #     # print(f"Unique values in predictions: {torch.unique(torch.argmax(predictions, dim=1))}")

        avg_val_loss = val_loss / len(val_loader)
        val_losses.append(avg_val_loss)
        val_iou.append(val_iou_score/len(val_loader))
        train_iou.append(iou_score/len(train_loader))
        train_acc.append(train_accuracy/len(train_loader))
        val_acc.append(val_accuracy/ len(val_loader))

    train_losses.append(epoch_loss)
    epoch_time = time.time() - epoch_start_time
    epoch_minutes = int(epoch_time // 60)
    epoch_seconds = int(epoch_time % 60)

    print("\nEpoch:{}/{} |".format(epoch+1, num_epochs),
            "Train Loss: {:.3f} |".format(epoch_loss),
            "Val Loss: {:.3f} |".format(avg_val_loss),
            "Train mIoU:{:.3f} |".format(iou_score/len(train_loader)),
            "Val mIoU: {:.3f} |".format(val_iou_score/len(val_loader)),
            "Train Acc:{:.3f} |".format(train_accuracy/len(train_loader)),
            "Val Acc:{:.3f} |".format(val_accuracy/len(val_loader)),
            "Time: {:02d}:{:02d} mins".format(epoch_minutes,epoch_seconds))

    
    # Check Gradient Norm
    total_norm = 0
    for p in model.parameters():
        if p.grad is not None:
            param_norm = p.grad.data.norm(2)
            total_norm += param_norm.item() ** 2
    total_norm = total_norm ** 0.5
    print(f"Gradient norm: {total_norm}")

    early_stopping(avg_val_loss, model)
    if early_stopping.early_stop:
        print("Early stopping")
        break

    scheduler.step(avg_val_loss)
    print(f'Learning Rate: {scheduler.get_last_lr()}')

# Plot Training Results

In [None]:
plt.figure(figsize=(15, 5))

plt.subplot(1, 3, 1)
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.title('Loss over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 3, 2)
plt.plot(train_acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.title('Accuracy over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 3, 3)
plt.plot(train_iou, label='Training mIoU')
plt.plot(val_iou, label='Validation mIoU')
plt.title('mIoU over Epochs')
plt.xlabel('Epochs')
plt.ylabel('mIoU')
plt.legend()

plt.tight_layout()
plt.savefig('combined_metrics_plot.png')
plt.show()

# Testing

In [None]:
model.load_state_dict(torch.load('YoloUnetV7.pt'))
color_map = create_color_map(class_dict)
model.eval()
test_loss = 0
test_accuracy = 0
test_iou_score = 0

with torch.no_grad():
    for inputs, masks in test_loader:
        images, masks = inputs.to(device), masks.to(device)
        outputs = model(images)
        loss = loss_fn(outputs, masks)

        # Visualize Reults
        inputs_cpu = images.cpu().detach()
        masks_cpu = masks.cpu().detach()
        predictions_cpu = outputs.cpu().detach()
        pred_masks = torch.argmax(predictions_cpu, dim=1)
        plot_results(inputs_cpu,masks_cpu,pred_masks,num_classes,color_map,class_dict)

        test_loss += loss.item()
        test_iou_score += mIoU(outputs, masks)
        test_accuracy += pixel_accuracy(outputs, masks)

avg_test_loss = test_loss / len(test_loader)
avg_test_iou = test_iou_score / len(test_loader)
avg_test_accuracy = test_accuracy / len(test_loader)

In [None]:
print(f"Test Loss: {avg_test_loss:.4f}")
print(f"Test mIoU: {avg_test_iou:.4f}")
print(f"Test Accuracy: {avg_test_accuracy:.4f}")