In [1]:
import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision

from torchvision.io import read_image
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms.functional import resize
from torchvision.models.segmentation import deeplabv3_resnet50
from PIL import Image

import warnings
warnings.filterwarnings('ignore')
import cv2
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
def create_images_list(path):
    full_path = []
    images = sorted(os.listdir(path))

    for i in images:
        full_path.append(os.path.join(path, i))

    return full_path


train_images = create_images_list('./datasets/kvasir_segmentation_dataset/kvasir_segmentation_dataset/train/images')
train_masks = create_images_list('./datasets/kvasir_segmentation_dataset/kvasir_segmentation_dataset/train/masks')

valid_images = create_images_list('./datasets/kvasir_segmentation_dataset/kvasir_segmentation_dataset/valid/images')
valid_masks = create_images_list('./datasets/kvasir_segmentation_dataset/kvasir_segmentation_dataset/valid/masks')

print(len(train_images), len(train_masks))

24800 24800


In [3]:
train_data = pd.DataFrame({'image': train_images, 'mask': train_masks})
val_data = pd.DataFrame({'image': valid_images, 'mask': valid_masks})

train_data = shuffle(train_data).reset_index().drop(columns=['index'])
val_data = shuffle(val_data).reset_index().drop(columns=['index'])

In [4]:
X_train = train_data['image']
y_train = train_data['mask']

X_validation = val_data['image']
y_validation = val_data['mask']

In [5]:
SEED = 69
IMG_SIZE = 256,256
BATCH_SIZE = 8

In [6]:
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

class CustomDataset(Dataset):
    def __init__(self, image_paths, mask_paths, img_size):
        self.image_paths = list(image_paths)
        self.mask_paths = list(mask_paths)
        self.img_size = img_size

        self.transform_image = transforms.Compose([
            transforms.Resize(self.img_size),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

        self.transform_mask = transforms.Compose([
            transforms.Resize(self.img_size, interpolation=Image.NEAREST),
            transforms.ToTensor()
        ])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        if idx >= len(self.image_paths):
            raise IndexError(f"Index {idx} out of range for dataset with length {len(self.image_paths)}.")
        image_path = self.image_paths[idx]
        mask_path = self.mask_paths[idx]

        image = Image.open(image_path).convert('RGB')
        mask = Image.open(mask_path).convert('L')

        image = self.transform_image(image)
        mask = self.transform_mask(mask)

        return image, mask


In [7]:
# Assuming image and mask paths are stored in X_data, y_data for training
# and X_validation, y_validation for validation
train_dataset = CustomDataset(X_train, y_train, IMG_SIZE)
validation_dataset = CustomDataset(X_validation, y_validation, IMG_SIZE)

In [8]:
from torch.utils.data import DataLoader
NUM_WORKERS = 4  # Based on your system's capability

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True)
validation_loader = DataLoader(validation_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)

In [9]:
from typing import Any

In [10]:
class SEModule(nn.Module):
    def __init__(self, channels: int, ratio: int = 8) -> None:
        super(SEModule, self).__init__()

        # Average Pooling for Squeeze
        self.avgpool = nn.AdaptiveAvgPool2d(1)

        # Excitation Operation
        self.fc = nn.Sequential(
            nn.Linear(channels, channels // ratio),
            nn.ReLU(inplace=True),
            # nn.LeakyReLU(negative_slope=0.1, inplace=True),
            nn.Linear(channels // ratio, channels),
            nn.Sigmoid(),
            # nn.Tanh(),
        )

    def forward(self, x: Any) -> Any:
        # Squeeze & Excite Forward Pass
        b, c, _, _ = x.size()

        y = self.avgpool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)

        # For Tanh
        # y_normalized = (y + 1) * 0.5

        return x * y

In [11]:
class ASPPModule(nn.Module):
    def __init__(self, in_channels: int, out_channels: int, dilations: list[int]) -> None:
        super(ASPPModule, self).__init__()

        # Atrous Convolutions
        self.atrous_convs = nn.ModuleList()
        for d in dilations:
            at_conv = nn.Conv2d(
                in_channels, out_channels, kernel_size=3, dilation=d, padding="same", bias=False
            )
            self.atrous_convs.append(at_conv)

        self.batch_norm = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU()
        self.squeeze_excite = SEModule(channels=out_channels)
        # self.leaky_relu = nn.LeakyReLU(0.1)
        self.dropout = nn.Dropout(p=0.5)
        # Upsampling by Bilinear Interpolation
        self.upsample = nn.UpsamplingBilinear2d(scale_factor=16)
        # Global Average Pooling
        self.avgpool = nn.AvgPool2d(kernel_size=(16, 16))
        # 1x1 Convolution
        self.conv1x1 = nn.Conv2d(
            in_channels, out_channels, kernel_size=1, padding="same", bias=False
        )

        # Final 1x1 Convolution
        self.final_conv = nn.Conv2d(
            in_channels=out_channels * (len(dilations) + 2),
            out_channels=out_channels,
            kernel_size=1,
            padding="same",
            bias=False,
        )

    def forward(self, x: Any) -> Any:
        # ASPP Forward Pass

        # 1x1 Convolution
        x1 = self.conv1x1(x)
        x1 = self.batch_norm(x1)
        x1 = self.dropout(x1)
        x1 = self.relu(x1)
        # x1 = self.leaky_relu(x1)
        x1 = self.squeeze_excite(x1)

        # Atrous Convolutions
        atrous_outputs = []
        for at_conv in self.atrous_convs:
            at_output = at_conv(x)
            at_output = self.batch_norm(at_output)
            at_output = self.relu(at_output)
            # at_output = self.leaky_relu(at_output)
            at_output = self.squeeze_excite(at_output)
            atrous_outputs.append(at_output)

        # Global Average Pooling and 1x1 Convolution for global context
        avg_pool = self.avgpool(x)
        avg_pool = self.conv1x1(avg_pool)
        avg_pool = self.batch_norm(avg_pool)
        avg_pool = self.relu(avg_pool)
        # avg_pool = self.leaky_relu(avg_pool)
        avg_pool = self.upsample(avg_pool)
        avg_pool = self.squeeze_excite(avg_pool)

        # Concatenating Dilated Convolutions and Global Average Pooling
        combined_output = torch.cat((x1, *atrous_outputs, avg_pool), dim=1)

        # Final 1x1 Convolution for ASPP Output
        aspp_output = self.final_conv(combined_output)
        aspp_output = self.batch_norm(aspp_output)
        aspp_output = self.relu(aspp_output)
        # aspp_output = self.leaky_relu(aspp_output)
        aspp_output = self.squeeze_excite(aspp_output)

        return aspp_output

In [12]:
class DecoderModule(nn.Module):
    def __init__(self, in_channels: int, out_channels: int) -> None:
        super(DecoderModule, self).__init__()

        # Squeeze and Excite Module
        self.squeeze_excite = SEModule(channels=304)
        self.squeeze_excite2 = SEModule(channels=out_channels)
        self.squeeze_excite3 = SEModule(channels=48)
        # 1x1 Convolution
        self.conv_low = nn.Conv2d(in_channels, 48, kernel_size=1, padding="same", bias=False)
        self.batch_norm = nn.BatchNorm2d(48)
        self.batch_norm2 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU()
        # self.leaky_relu = nn.LeakyReLU(0.1)
        self.dropout = nn.Dropout(p=0.5)
        # 3x3 Convolution
        self.final_conv1 = nn.Conv2d(
            in_channels=304, out_channels=256, kernel_size=3, padding="same", bias=False
        )
        # 3x3 Convolution
        self.final_conv2 = nn.Conv2d(
            in_channels, out_channels, kernel_size=3, padding="same", bias=False
        )

    def forward(self, x_high: Any, x_low: Any) -> Any:
        # Decoder Forward Pass

        # 1x1 Convolution on Low-Level Features
        x_low = self.conv_low(x_low)
        x_low = self.batch_norm(x_low)
        x_low = self.dropout(x_low)
        x_low = self.relu(x_low)
        # x_low = self.leaky_relu(x_low)
        x_low = self.squeeze_excite3(x_low)

        # Concatenating High-Level and Low-Level Features
        x = torch.cat((x_high, x_low), dim=1)
        x = self.dropout(x)
        x = self.squeeze_excite(x)

        # 3x3 Convolution on Concatenated Feature Map
        x = self.final_conv1(x)
        x = self.batch_norm2(x)
        x = self.relu(x)
        # x = self.leaky_relu(x)
        x = self.squeeze_excite2(x)

        # 3x3 Convolution on Concatenated Feature Map
        x = self.final_conv2(x)
        x = self.batch_norm2(x)
        x = self.relu(x)
        # x = self.leaky_relu(x)
        x = self.squeeze_excite2(x)

        return x

In [13]:
class DeepLabV3Plus(nn.Module):
    def __init__(self, num_classes: int = 1) -> None:
        super(DeepLabV3Plus, self).__init__()
        resnet = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
        self.backbone = nn.Sequential(*list(resnet.children())[:-2])
        in_channels = 1024
        out_channels = 256
        # Dilation Rates
        dilations = [6, 12, 18, 24]
        # ASPP Module
        self.aspp = ASPPModule(in_channels, out_channels, dilations)
        # Decoder Module
        self.decoder = DecoderModule(out_channels, out_channels)
        # Upsampling with Bilinear Interpolation
        self.upsample = nn.UpsamplingBilinear2d(scale_factor=4)
        # Dropout
        self.dropout = nn.Dropout(p=0.5)
        # Final 1x1 Convolution
        self.final_conv = nn.Conv2d(out_channels, num_classes, kernel_size=1)
        # Sigmoid Activation for Binary-Seg
        self.sigmoid = nn.Sigmoid()
        # self.tanh = nn.Tanh()
        # Initialize weights
        self.init_weights()

    def init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

    def forward(self, x: Any) -> Any:
        # DeepLabV3+ Forward Pass
        # Getting Low-Level Features
        x_low = self.backbone[:-3](x)
        # Getting Image Features from Backbone
        x = self.backbone[:-1](x)
        # ASPP forward pass - High-Level Features
        x = self.aspp(x)
        # Upsampling High-Level Features
        x = self.upsample(x)
        x = self.dropout(x)
        # Decoder forward pass - Concatenating Features
        x = self.decoder(x, x_low)
        # Upsampling Concatenated Features from Decoder
        x = self.upsample(x)

        # Final 1x1 Convolution for Binary-Segmentation
        x = self.final_conv(x)
        x = self.sigmoid(x)
        # x = self.tanh(x)

        # For Tanh
        # normalized_x = (x + 1) * 0.5

        return x


In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [15]:
model = DeepLabV3Plus(num_classes=1)  # For binary segmentation, num_classes=1

In [16]:
model.to(device)

DeepLabV3Plus(
  (backbone): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0): Conv

In [17]:
SMOOTH = 1e-8
class DiceLoss(nn.Module):
    def __init__(self) -> None:
        super(DiceLoss, self).__init__()

    def forward(self, pred_mask: Any, true_mask: Any) -> torch.Tensor:
        intersection = torch.sum(pred_mask * true_mask)
        union = torch.sum(pred_mask) + torch.sum(true_mask)

        # Add a small epsilon to the denominator to avoid division by zero
        dice_loss = 1.0 - (2.0 * intersection + SMOOTH) / (union + SMOOTH)
        return dice_loss

In [18]:
model = DeepLabV3Plus(num_classes=1)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
criterion = DiceLoss()

optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-6)

scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode="min", patience=3, factor=0.1, verbose=True
)

# For Early-Stopping
patience_epochs = 20
no_improvement_epochs = 0

In [19]:
def calculate_metrics(pred_mask: Any, true_mask: Any) -> torch.Tensor:
    pred_mask = pred_mask.float()
    true_mask = true_mask.float()

    intersection = torch.sum(pred_mask * true_mask)
    union = torch.sum((pred_mask + true_mask) > 0.5)

    # Add a small epsilon to the denominator to avoid division by zero
    iou = (intersection + SMOOTH) / (union + SMOOTH)
    dice_coefficient = (2 * intersection + SMOOTH) / (
        torch.sum(pred_mask) + torch.sum(true_mask) + SMOOTH
    )
    pixel_accuracy = torch.sum(pred_mask == true_mask) / true_mask.numel()

    return iou.item(), dice_coefficient.item(), pixel_accuracy.item()

In [20]:
from tqdm import tqdm

In [21]:
num_epochs = 50

In [None]:
for epoch in range(num_epochs):
    # TRAINING
    model.train()
    train_loss = 0.0
    total_iou_train = 0.0
    total_pixel_accuracy_train = 0.0
    total_dice_coefficient_train = 0.0

    train_loader = tqdm(
        train_loader, desc=f"Epoch {epoch + 1}/{num_epochs}", unit="batch"
    )

    current_lr = optimizer.param_groups[0]["lr"]

    for images, masks in train_loader:
        images, masks = images.to(device), masks.to(device)

        optimizer.zero_grad()

        outputs = model(images)
        t_loss = criterion(outputs, masks)

        t_loss.backward()
        optimizer.step()

        train_loss += t_loss.item()

        # Calculating metrics for training
        with torch.no_grad():
            pred_masks = outputs > 0.5
            iou_train, dice_coefficient_train, pixel_accuracy_train = calculate_metrics(
                pred_masks, masks
            )

            total_iou_train += iou_train
            total_dice_coefficient_train += dice_coefficient_train
            total_pixel_accuracy_train += pixel_accuracy_train

        # Displaying metrics in the progress bar description
        train_loader.set_postfix(
            loss=t_loss.item(),
            train_iou=iou_train,
            train_pix_acc=pixel_accuracy_train,
            train_dice_coef=dice_coefficient_train,
            lr=current_lr,
        )

    train_loss /= len(train_loader)
    avg_iou_train = total_iou_train / len(train_loader)
    avg_pixel_accuracy_train = total_pixel_accuracy_train / len(train_loader)
    avg_dice_coefficient_train = total_dice_coefficient_train / len(train_loader)

    # VALIDATION
    model.eval()
    val_loss = 0.0
    total_iou_val = 0.0
    total_pixel_accuracy_val = 0.0
    total_dice_coefficient_val = 0.0

    test_dataloader = tqdm(validation_loader, desc=f"Validation", unit="batch")

    with torch.no_grad():
        for images, masks in test_dataloader:
            images, masks = images.to(device), masks.to(device)
            outputs = model(images)

            v_loss = criterion(outputs, masks)
            val_loss += v_loss.item()

            # Calculating metrics for Validation
            pred_masks = outputs > 0.5
            iou_val, dice_coefficient_val, pixel_accuracy_val = calculate_metrics(
                pred_masks, masks
            )

            total_iou_val += iou_val
            total_pixel_accuracy_val += pixel_accuracy_val
            total_dice_coefficient_val += dice_coefficient_val

            # Displaying metrics in progress bar description
            test_dataloader.set_postfix(
                val_loss=v_loss.item(),
                val_iou=iou_val,
                val_pix_acc=pixel_accuracy_val,
                val_dice_coef=dice_coefficient_val,
                lr=current_lr,
            )

    val_loss /= len(test_dataloader)
    avg_iou_val = total_iou_val / len(test_dataloader)
    avg_pixel_accuracy_val = total_pixel_accuracy_val / len(test_dataloader)
    avg_dice_coefficient_val = total_dice_coefficient_val / len(test_dataloader)

    scheduler.step(val_loss)

    print(
        f"\nEpoch {epoch + 1}/{num_epochs}\n"
        f"Avg Train Loss: {train_loss:.4f}\n"
        f"Avg Validation Loss: {val_loss:.4f}\n"
        f"Avg IoU Train: {avg_iou_train:.4f}\n"
        f"Avg IoU Val: {avg_iou_val:.4f}\n"
        f"Avg Pix Acc Train: {avg_dice_coefficient_train:.4f}\n"
        f"Avg Pix Acc Val: {avg_pixel_accuracy_val:.4f}\n"
        f"Avg Dice Coeff Train: {avg_dice_coefficient_train:.4f}\n"
        f"Avg Dice Coeff Val: {avg_dice_coefficient_val:.4f}\n"
        f"Current LR: {current_lr}\n"
        f"{'-'*50}"
    )

Epoch 1/50: 100%|██████████| 3100/3100 [05:29<00:00,  9.40batch/s, loss=0.517, lr=0.0001, train_dice_coef=0.483, train_iou=0.319, train_pix_acc=0.809] 
Validation: 100%|██████████| 13/13 [00:00<00:00, 15.59batch/s, lr=0.0001, val_dice_coef=0.606, val_iou=0.435, val_loss=0.393, val_pix_acc=0.839]



Epoch 1/50
Avg Train Loss: 0.4589
Avg Validation Loss: 0.4081
Avg IoU Train: 0.3807
Avg IoU Val: 0.4283
Avg Pix Acc Train: 0.5415
Avg Pix Acc Val: 0.8325
Avg Dice Coeff Train: 0.5415
Avg Dice Coeff Val: 0.5932
Current LR: 0.0001
--------------------------------------------------


Epoch 2/50: 100%|██████████| 3100/3100 [07:33<00:00,  6.84batch/s, loss=0.401, lr=0.0001, train_dice_coef=0.6, train_iou=0.429, train_pix_acc=0.874]   
Validation: 100%|██████████| 13/13 [00:01<00:00, 11.25batch/s, lr=0.0001, val_dice_coef=0.699, val_iou=0.538, val_loss=0.305, val_pix_acc=0.87] 



Epoch 2/50
Avg Train Loss: 0.3515
Avg Validation Loss: 0.3409
Avg IoU Train: 0.4910
Avg IoU Val: 0.5040
Avg Pix Acc Train: 0.6489
Avg Pix Acc Val: 0.8491
Avg Dice Coeff Train: 0.6489
Avg Dice Coeff Val: 0.6630
Current LR: 0.0001
--------------------------------------------------


Epoch 3/50: 100%|██████████| 3100/3100 [10:45<00:00,  4.81batch/s, loss=0.149, lr=0.0001, train_dice_coef=0.853, train_iou=0.743, train_pix_acc=0.944] 
Validation: 100%|██████████| 13/13 [00:01<00:00, 11.36batch/s, lr=0.0001, val_dice_coef=0.737, val_iou=0.584, val_loss=0.268, val_pix_acc=0.898]



Epoch 3/50
Avg Train Loss: 0.2554
Avg Validation Loss: 0.2420
Avg IoU Train: 0.6057
Avg IoU Val: 0.6221
Avg Pix Acc Train: 0.7453
Avg Pix Acc Val: 0.9049
Avg Dice Coeff Train: 0.7453
Avg Dice Coeff Val: 0.7624
Current LR: 0.0001
--------------------------------------------------


Epoch 4/50: 100%|██████████| 3100/3100 [11:00<00:00,  4.69batch/s, loss=0.283, lr=0.0001, train_dice_coef=0.718, train_iou=0.561, train_pix_acc=0.885] 
Validation: 100%|██████████| 13/13 [00:01<00:00, 11.33batch/s, lr=0.0001, val_dice_coef=0.814, val_iou=0.686, val_loss=0.189, val_pix_acc=0.926]



Epoch 4/50
Avg Train Loss: 0.2003
Avg Validation Loss: 0.2102
Avg IoU Train: 0.6781
Avg IoU Val: 0.6678
Avg Pix Acc Train: 0.8006
Avg Pix Acc Val: 0.9196
Avg Dice Coeff Train: 0.8006
Avg Dice Coeff Val: 0.7944
Current LR: 0.0001
--------------------------------------------------


Epoch 5/50: 100%|██████████| 3100/3100 [11:06<00:00,  4.65batch/s, loss=0.182, lr=0.0001, train_dice_coef=0.819, train_iou=0.694, train_pix_acc=0.919] 
Validation: 100%|██████████| 13/13 [00:01<00:00, 11.31batch/s, lr=0.0001, val_dice_coef=0.797, val_iou=0.663, val_loss=0.212, val_pix_acc=0.923]



Epoch 5/50
Avg Train Loss: 0.1651
Avg Validation Loss: 0.2277
Avg IoU Train: 0.7270
Avg IoU Val: 0.6419
Avg Pix Acc Train: 0.8358
Avg Pix Acc Val: 0.9203
Avg Dice Coeff Train: 0.8358
Avg Dice Coeff Val: 0.7770
Current LR: 0.0001
--------------------------------------------------


Epoch 6/50: 100%|██████████| 3100/3100 [10:59<00:00,  4.70batch/s, loss=0.117, lr=0.0001, train_dice_coef=0.884, train_iou=0.792, train_pix_acc=0.922] 
Validation: 100%|██████████| 13/13 [00:01<00:00, 11.35batch/s, lr=0.0001, val_dice_coef=0.855, val_iou=0.748, val_loss=0.145, val_pix_acc=0.942]



Epoch 6/50
Avg Train Loss: 0.1458
Avg Validation Loss: 0.2033
Avg IoU Train: 0.7554
Avg IoU Val: 0.6737
Avg Pix Acc Train: 0.8551
Avg Pix Acc Val: 0.9253
Avg Dice Coeff Train: 0.8551
Avg Dice Coeff Val: 0.7986
Current LR: 0.0001
--------------------------------------------------


Epoch 7/50: 100%|██████████| 3100/3100 [11:00<00:00,  4.70batch/s, loss=0.0602, lr=0.0001, train_dice_coef=0.94, train_iou=0.887, train_pix_acc=0.956] 
Validation: 100%|██████████| 13/13 [00:01<00:00, 11.45batch/s, lr=0.0001, val_dice_coef=0.839, val_iou=0.723, val_loss=0.162, val_pix_acc=0.941]



Epoch 7/50
Avg Train Loss: 0.1272
Avg Validation Loss: 0.2840
Avg IoU Train: 0.7835
Avg IoU Val: 0.5693
Avg Pix Acc Train: 0.8737
Avg Pix Acc Val: 0.9098
Avg Dice Coeff Train: 0.8737
Avg Dice Coeff Val: 0.7154
Current LR: 0.0001
--------------------------------------------------


Epoch 8/50:  55%|█████▍    | 1690/3100 [06:00<04:59,  4.70batch/s, loss=0.0874, lr=0.0001, train_dice_coef=0.913, train_iou=0.841, train_pix_acc=0.933]

In [None]:
torch.save(model.state_dict(), 'deeplabv3plus.pth')

In [None]:
# model = DeepLabV3Plus(num_classes=1)
# model.load_state_dict(torch.load('deeplabv3plus.pth'))

In [None]:
test_images = create_images_list('./datasets/kvasir_segmentation_dataset/kvasir_segmentation_dataset/test/images')
test_masks = create_images_list('./datasets/kvasir_segmentation_dataset/kvasir_segmentation_dataset/test/masks')

In [None]:
test_data = pd.DataFrame({'image': test_images, 'mask': test_masks})
test_data = shuffle(test_data).reset_index().drop(columns=['index'])

In [None]:
X_test = test_data['image']
y_test = test_data['mask']

In [None]:
test_dataset = CustomDataset(X_test, y_test, IMG_SIZE)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)

In [None]:
test_loader = tqdm(test_loader, desc="Evaluation", unit="image")

In [None]:
# Additional imports
from sklearn.metrics import jaccard_score

def evaluate_model(test_loader, model, criterion, device):
    model.eval()
    total_loss = 0.0
    total_iou = 0.0
    total_dice = 0.0
    total_accuracy = 0.0
    num_samples = 0

    with torch.no_grad():
        for images, masks in test_loader:
            images, masks = images.to(device), masks.to(device)
            outputs = model(images)

            loss = criterion(outputs, masks)
            total_loss += loss.item()

            # Threshold outputs to create binary mask
            preds = outputs > 0.5

            # Calculate metrics
            iou, dice_coefficient, pixel_accuracy = calculate_metrics(preds, masks)

            total_iou += iou
            total_dice += dice_coefficient
            total_accuracy += pixel_accuracy
            num_samples += 1

    avg_loss = total_loss / num_samples
    avg_iou = total_iou / num_samples
    avg_dice = total_dice / num_samples
    avg_accuracy = total_accuracy / num_samples

    return avg_loss, avg_iou, avg_dice, avg_accuracy

# Evaluate the model on the test set
avg_loss, avg_iou, avg_dice, avg_accuracy = evaluate_model(test_loader, model, criterion, device)

print(f"Test Loss: {avg_loss:.4f}")
print(f"Test IoU: {avg_iou:.4f}")
print(f"Test Dice Coefficient: {avg_dice:.4f}")
print(f"Test Pixel Accuracy: {avg_accuracy:.4f}")


In [None]:
def save_overlay_image(img_path: str, mask_path: str, prediction: Any, overlay_path: str) -> None:
    image = cv2.imread(img_path, cv2.IMREAD_COLOR)
    resized_image = cv2.resize(image, (256,256), interpolation = cv2.INTER_AREA)
    mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
    resized_mask = cv2.resize(mask, (256,256), interpolation = cv2.INTER_AREA)
    line = np.ones((256, 10, 3)) * 128

    resized_mask = np.expand_dims(resized_mask, axis=-1)
    resized_mask = np.concatenate([resized_mask, resized_mask, resized_mask], axis=-1)

    prediction = np.expand_dims(prediction, axis=-1)
    prediction = np.concatenate([prediction, prediction, prediction], axis=-1)

    overlay = np.multiply(resized_image, prediction)
    prediction = prediction * 255

    final_img = np.concatenate([resized_image, line, resized_mask, line, prediction, line, overlay], axis=1)

    cv2.imwrite(overlay_path, final_img)


In [None]:
output_dir = './output'

In [None]:
with torch.no_grad():
    for i, (image, _) in enumerate(test_loader):
        image = image.to(device)

        output = model(image)
        prediction = output.cpu().numpy()[0, 0]

        img_path = test_dataset.image_paths[i]
        mask_path = test_dataset.mask_paths[i]

        output_img_name = img_path.split('/')[-1][:-4]

        output_img_path = os.path.join(output_dir, f"output_{output_img_name}.png")

        save_overlay_image(img_path, mask_path, prediction, output_img_path)