In [7]:

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

import torchvision.transforms as transforms

import albumentations as A
from albumentations.pytorch import ToTensorV2

import numpy as np
import cv2
import os
from PIL import Image
from tqdm import tqdm
import matplotlib.pyplot as plt

import torch.optim as optim


In [8]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [9]:
import os


In [10]:
class SegmentationDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transform=None):
        """
        Args:
            image_dir (str): Path to frames directory.
            mask_dir (str): Path to masks directory.
            transform (albumentations.Compose, optional): Transform to apply.
        """
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform

        # Sort the files by name to ensure they match up correctly
        self.images = sorted(os.listdir(image_dir))  # Sorting image filenames
        self.masks = sorted(os.listdir(mask_dir))    # Sorting mask filenames

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.images[idx])
        mask_path = os.path.join(self.mask_dir, self.masks[idx])

        # Load both in grayscale
        image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

        # Convert to float32 and normalize to [0, 1]
        image = image.astype(np.float32) / 255.0  # Normalize to [0, 1]
        mask = mask.astype(np.float32) / 255.0    # Normalize to [0, 1]
        
        # Expand dimensions to (H, W, 1) for albumentations
        image = np.expand_dims(image, axis=-1)  # (H, W, 1)
        mask = np.expand_dims(mask, axis=-1)    # (H, W, 1)

        if self.transform:
            augmented = self.transform(image=image)
            image = augmented["image"]  # Tensor: (1, 224, 224)
            augmented = self.transform(image=mask)
            mask = augmented["image"]    # Tensor: (224, 224)

        # Ensure mask has the shape (1, 224, 224) for consistency
        if len(mask.shape) == 2:  # If mask is (224, 224)
            mask = mask.unsqueeze(0)  # Convert to (1, 224, 224)

        return image, mask  # Shape: (1, 224, 224), (1, 224, 224)


In [11]:
image_dir = "/kaggle/input/segmentation/data1/encoder_directory_new"  
mask_dir = "/kaggle/input/segmentation/data1/decoder_directory_new"    

transform = A.Compose([
    A.Resize(224, 224),
    ToTensorV2()  
])

dataset = SegmentationDataset(image_dir=image_dir, mask_dir=mask_dir, transform=transform)

print(f"Dataset size: {len(dataset)}")


Dataset size: 3526


In [12]:
import torch
from torch.utils.data import random_split, DataLoader

# Define the splits
train_size = int(0.75 * len(dataset))  # 75% for training
val_size = int(0.10 * len(dataset))   # 10% for validation
test_size = len(dataset) - train_size - val_size  # 15% for testing

# Perform the split
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Example to check the data loader sizes
print(f"Training set size: {len(train_dataset)}")
print(f"Validation set size: {len(val_dataset)}")
print(f"Test set size: {len(test_dataset)}")


Training set size: 2644
Validation set size: 352
Test set size: 530


In [None]:
# Import the model class (ensure the model code is in scope or imported)
segmentation_model = AttentionUNet(img_ch=1, output_ch=1)



weights_path = '/kaggle/input/seg1/other/default/1/checkpoint_epoch_20.pth'  # Update with correct path

state_dict = torch.load(weights_path, map_location=device)

segmentation_model.load_state_dict(state_dict)

# Set to evaluation mode
segmentation_model.eval()

print("Segmentation model weights loaded successfully.")



In [15]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class FocalTverskyLoss(nn.Module):
    def __init__(self, alpha=0.7, beta=0.3, gamma=0.75, smooth=1e-6):
        """
        alpha: controls penalty for false positives
        beta: controls penalty for false negatives
        gamma: focusing parameter to focus on hard examples
        smooth: smoothing constant to avoid division by zero
        """
        super(FocalTverskyLoss, self).__init__()
        self.alpha = alpha
        self.beta = beta
        self.gamma = gamma
        self.smooth = smooth

    def forward(self, preds, targets):
        """
        preds: predicted mask (logits or probabilities) - shape [B, 1, H, W]
        targets: ground truth mask - shape [B, 1, H, W]
        """
        # Apply sigmoid if preds are logits
        preds = torch.sigmoid(preds)

        # Flatten the tensors
        preds = preds.view(-1)
        targets = targets.view(-1)

        # Calculate Tversky components
        TP = (preds * targets).sum()
        FP = ((1 - targets) * preds).sum()
        FN = (targets * (1 - preds)).sum()

        # Tversky index
        Tversky = (TP + self.smooth) / (TP + self.alpha * FP + self.beta * FN + self.smooth)

        # Focal Tversky Loss
        loss = (1 - Tversky) ** self.gamma

        return loss


In [16]:
criterion = FocalTverskyLoss(alpha=0.7, beta=0.3, gamma=0.75)  # Your custom loss function
optimizer = optim.Adam(model.parameters(), lr=1e-4)

In [17]:


# Training Loop
num_epochs = 40
print("starting")
for epoch in range(num_epochs):
    print(f"starting {epoch}")
    model.train()  # Set the model to training mode
    running_loss = 0.0
    
    for batch_idx, (images, masks) in enumerate(train_loader):  # Loading batches of images and masks
        images, masks = images.to(device), masks.to(device)  # Move to device (GPU/CPU)
        
        optimizer.zero_grad()  # Clear gradients from the previous step
        
        # Forward pass: Get predictions
        outputs = model(images)
        
        # Calculate loss
        loss = criterion(outputs, masks)
        
        # Backward pass: Calculate gradients
        loss.backward()
        
        # Update weights
        optimizer.step()
        
        running_loss += loss.item()
        
    avg_train_loss = running_loss / len(train_loader)  # Average loss for this epoch
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_train_loss:.4f}")
    
    # Validation after each epoch
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():  # No gradient calculation during validation
        val_loss = 0.0
        for images, masks in val_loader:  # Iterate through validation data
            images, masks = images.to(device), masks.to(device)
            outputs = model(images)
            loss = criterion(outputs, masks)
            val_loss += loss.item()
        
        avg_val_loss = val_loss / len(val_loader)
        print(f"Validation Loss: {avg_val_loss:.4f}")
    
    # Save model checkpoint after every 10 epochs
    if (epoch + 1) % 10 == 0:
        checkpoint_path = f"checkpoint_epoch_{epoch + 1}.pth"
        torch.save(model.state_dict(), checkpoint_path)
        print(f"Checkpoint saved at epoch {epoch + 1}")
    
    # Optionally, you can save the final model at the end of training
    if (epoch + 1) == num_epochs:
        final_model_path = "final_model.pth"
        torch.save(model.state_dict(), final_model_path)
        print(f"Final model saved at epoch {epoch + 1}")


starting
starting 0
Epoch [1/40], Loss: 0.8650
Validation Loss: 0.8443
starting 1
Epoch [2/40], Loss: 0.8314
Validation Loss: 0.8185
starting 2
Epoch [3/40], Loss: 0.8066
Validation Loss: 0.7973
starting 3
Epoch [4/40], Loss: 0.7766
Validation Loss: 0.7586
starting 4
Epoch [5/40], Loss: 0.7403
Validation Loss: 0.7225
starting 5
Epoch [6/40], Loss: 0.6963
Validation Loss: 0.6779
starting 6
Epoch [7/40], Loss: 0.6453
Validation Loss: 0.6147
starting 7
Epoch [8/40], Loss: 0.5895
Validation Loss: 0.5554
starting 8
Epoch [9/40], Loss: 0.5308
Validation Loss: 0.5015
starting 9
Epoch [10/40], Loss: 0.4697
Validation Loss: 0.4346
Checkpoint saved at epoch 10
starting 10
Epoch [11/40], Loss: 0.4123
Validation Loss: 0.3903
starting 11
Epoch [12/40], Loss: 0.3616
Validation Loss: 0.3367
starting 12
Epoch [13/40], Loss: 0.3177
Validation Loss: 0.3070
starting 13
Epoch [14/40], Loss: 0.2787
Validation Loss: 0.2923
starting 14
Epoch [15/40], Loss: 0.2471
Validation Loss: 0.2459
starting 15
Epoch [16

In [27]:
import torch
import cv2
import numpy as np
import matplotlib.pyplot as plt
from torchvision import transforms

# 1. Load the trained model
model = AttentionUNet(img_ch=1, output_ch=1)
model.load_state_dict(torch.load("/kaggle/working/checkpoint_epoch_30.pth", map_location=torch.device('cpu')))
model.eval()
model.to(device)


  model.load_state_dict(torch.load("/kaggle/working/checkpoint_epoch_30.pth", map_location=torch.device('cpu')))


AttentionUNet(
  (MaxPool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (Conv1): ConvBlock(
    (conv): Sequential(
      (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU(inplace=True)
    )
  )
  (Conv2): ConvBlock(
    (conv): Sequential(
      (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU(inplace=True)

In [29]:
import torch

def calculate_metrics(model, test_loader, device="cuda"):
    model.eval()
    true_positives = 0
    false_positives = 0
    false_negatives = 0
    total_pixels = 0
    correct_pixels = 0
    
    with torch.no_grad():
        for images, masks in test_loader:
            images = images.to(device)
            masks = masks.to(device)
            
            outputs = model(images)
            preds = torch.sigmoid(outputs) > 0.5  # Binary thresholding
            
            # Flatten the predictions and the ground truth masks for comparison
            preds_flat = preds.view(-1).bool()
            masks_flat = masks.view(-1).bool()
            
            # Calculate TP, FP, FN
            true_positives += (preds_flat & masks_flat).sum().item()
            false_positives += ((preds_flat) & (~masks_flat)).sum().item()
            false_negatives += ((~preds_flat) & (masks_flat)).sum().item()
            correct_pixels += (preds_flat == masks_flat).sum().item()
            total_pixels += masks_flat.size(0)
    
    # Calculating all metrics
    dice_coefficient = 2 * true_positives / (2 * true_positives + false_positives + false_negatives) if (2 * true_positives + false_positives + false_negatives) != 0 else 0
    iou = true_positives / (true_positives + false_positives + false_negatives) if (true_positives + false_positives + false_negatives) != 0 else 0
    precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) != 0 else 0
    recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) != 0 else 0
    accuracy = correct_pixels / total_pixels * 100 if total_pixels != 0 else 0

    # Print all metrics
    print(f"Dice Coefficient: {dice_coefficient:.4f}")
    print(f"IoU: {iou:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall/Sensitivity: {recall:.4f}")
    print(f"Pixel-wise Accuracy: {accuracy:.2f}%")

# Usage
calculate_metrics(model, test_loader, device="cuda")


Dice Coefficient: 0.9574
IoU: 0.9184
Precision: 0.9835
Recall/Sensitivity: 0.9327
Pixel-wise Accuracy: 99.65%
