In [4]:
# ============================================================================
# IMPORTS AND SETUP
# ============================================================================

import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import pandas as pd 
import warnings
warnings.filterwarnings('ignore')

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms.functional as TF
from PIL import Image
from model import UNet
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2

# Set random seeds
SEED = 1337
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")


Using device: cuda


In [5]:
# ============================================================================\n
# CONFIGURATION (Updated for Task 2)
# ============================================================================\n
train_file = 'train.txt'
val_file = 'val.txt'

class Config:
    # Dataset paths
    DATASET_PATH = "data\\kvasir-seg"
    IMAGE_DIR = "images"
    MASK_DIR = "masks"
    
    # Experiment parameters
    # RESOLUTIONS = [512, 256, 128, 64] # <-- Removed/Commented out (Task 1 only)
    TARGET_SIZE = 256
    
    # Training parameters
    BATCH_SIZE = 8
    NUM_EPOCHS = 25
    LEARNING_RATE = 1e-4
    
    # Early stopping
    EARLY_STOPPING_PATIENCE = 5 
    # LR Scheduler
    SCHEDULER_PATIENCE = 3 
    SCHEDULER_FACTOR = 0.1 
    
    # Model parameters
    IN_CHANNELS = 3
    OUT_CHANNELS = 1
    FEATURES = [64, 128, 256, 512]
    
config = Config()

In [6]:
# ============================================================================\n
# DATASET CLASS (Updated for Task 2 with Albumentations)
# ============================================================================\n

class KvasirDataset(Dataset):
    
    def __init__(self, image_paths, mask_paths, target_size=256, use_augmentations=False):
        self.image_paths = image_paths
        self.mask_paths = mask_paths
        
        # --- Define Augmentation Pipelines ---
        
        # Validation/Base pipeline (just resize)
        val_transform = [
            A.Resize(height=target_size, width=target_size, 
                     interpolation=cv2.INTER_LINEAR),
            ToTensorV2() # Converts image to (C,H,W) tensor and scales, 
                         # converts mask to (H,W) int64 tensor
        ]
        
        if use_augmentations:
            # Training pipeline (resize + augs)
            self.transform = A.Compose([
                A.Resize(height=target_size, width=target_size, 
                         interpolation=cv2.INTER_LINEAR),
                A.HorizontalFlip(p=0.5),
                A.VerticalFlip(p=0.5),
                A.Rotate(limit=30, interpolation=cv2.INTER_LINEAR, 
                         border_mode=cv2.BORDER_CONSTANT, p=0.5),
                A.ColorJitter(brightness=0.3, contrast=0.3, 
                              saturation=0.3, hue=0.1, p=0.5),
                ToTensorV2()
            ])
        else:
            # Validation pipeline
            self.transform = A.Compose(val_transform)

    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        # Load image and mask as numpy arrays
        image = np.array(Image.open(self.image_paths[idx]).convert('RGB'))
        mask = np.array(Image.open(self.mask_paths[idx]).convert('L'))
        
        # Binarize mask (0 or 1) *before* transforms
        # Albumentations expects uint8 mask with class indices
        mask = (mask > 128).astype(np.uint8) # 0 for background, 1 for polyp
        
        # Apply transforms
        # 'image' will be key for image, 'mask' for mask
        augmented = self.transform(image=image, mask=mask)
        
        image_tensor = augmented['image']
        mask_tensor = augmented['mask']
        
        # Add channel dimension to mask (H, W) -> (1, H, W)
        # and convert from int64 to float for the loss function
        mask_tensor = mask_tensor.unsqueeze(0).float()
        
        return image_tensor, mask_tensor

In [7]:
# ============================================================================
# PATH COLLECTION AND VALIDATION
# ============================================================================

import os
from pathlib import Path

# Simple file existence check
if not os.path.exists('train.txt') or not os.path.exists('val.txt'):
    raise FileNotFoundError('train.txt or val.txt missing.')

base_path = Path(config.DATASET_PATH)
image_paths = sorted(list((base_path / config.IMAGE_DIR).glob('*.jpg')))
mask_paths = sorted(list((base_path / config.MASK_DIR).glob('*.jpg')))

if len(image_paths) != len(mask_paths):
    raise ValueError("Mismatch between number of images and masks.")

# Read train/val lists
with open('train.txt', 'r') as f:
    train_stems = {line.strip() for line in f}
with open('val.txt', 'r') as f:
    val_stems = {line.strip() for line in f}

# Split dataset according to txt files
train_images = [p for p in image_paths if p.stem in train_stems]
train_masks = [p for p in mask_paths if p.stem in train_stems]
val_images = [p for p in image_paths if p.stem in val_stems]
val_masks = [p for p in mask_paths if p.stem in val_stems]

print(f"Train: {len(train_images)} images, Val: {len(val_images)} images")


Train: 700 images, Val: 300 images


In [8]:
# ============================================================================
# MODEL CALL
# ============================================================================

def test_model_call():
    model = UNet(in_channels=3, out_channels=1).to(device)
    print("Model created successfully!")
    
    # Create a dummy input tensor
    dummy_input = torch.randn(1, 3, 256, 256).to(device)
    print(f"Input shape: {dummy_input.shape}")
    
    # Forward pass
    with torch.no_grad():
        output = model(dummy_input)
    print(f"Output shape: {output.shape}")

test_model_call()

Model created successfully!
Input shape: torch.Size([1, 3, 256, 256])
Output shape: torch.Size([1, 1, 256, 256])


In [9]:
class DoubleConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
        )
    def forward(self, x):
        return self.double_conv(x)

In [None]:
class UNet(nn.Module):
    def __init__(self, in_channel=3, out_channel=1, features=[64, 128, 256, 512]):
        super(UNet, self).__init__()
        self.downs = nn.ModuleList()
        self.ups = nn.ModuleList()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        for feature in features: 
            self.downs.append(DoubleConv(in_channels, feature))
            in_channels = feature

        self.bottleneck = DoubleConv(features[-1], features[-1]*2)

        for features in reversed(features):
            self.ups.append(nn.ConvTranspose2d(features*2, feature, kernel_size=2, stride=2))
            self.ups.append(DoubleConv(feature*2, feature))

        self.final_conv = nn.Conv2d(features[0], out_channels, kernel_size=1)

    def forward(self, x):
        skip_connections = []

        for down in self.downs:
            x = down(x)
            skip_connections.append(x)
            x = self.pool(x)
            

                 