Task 1: Impact of image resolution on the final outcome

Task 1: Impact of Image Resolution on U-Net Segmentation Performance
Dataset: Kvasir-SEG (Polyp Segmentation)
Workflow: Original Image -> Scale to [512, 256, 128, 64] -> Rescale to 256x256 -> U-Net

This notebook investigates how different input resolutions affect segmentation quality.

(Accuracy, IOU, F1, DICE, MCC, precision, sensitivity)

In [None]:
# =========== IGNORE =========================================================
# shuffle should be true for train, false for validation
# use semi-transparent masks when printing images with mask overlay
# visual representations of predictions alongside metrics (numbers). not separate
# paper should include tables, images, curves
# preprocess mask? totensor will give values between 0 and 1. Not 0, 1. (bad)
# create model in separate .py, reuse in other tasks
# ============================================================================

In [None]:
# ============================================================================
# IMPORTS AND SETUP
# ============================================================================

import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from sklearn.model_selection import train_test_split
import pandas as pd 
from tqdm.auto import tqdm
import warnings
warnings.filterwarnings('ignore')

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms.functional as TF
from PIL import Image
from model import UNet

# Set random seeds
SEED = 1337
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")


Using device: cuda


In [11]:
# ============================================================================
# CONFIGURATION
# ============================================================================
train_file = 'train.txt'
val_file = 'val.txt'

class Config:
    # Dataset paths
    DATASET_PATH = "data\kvasir-seg"
    IMAGE_DIR = "images"
    MASK_DIR = "masks"
    
    # Experiment parameters
    RESOLUTIONS = [512, 256, 128, 64]
    TARGET_SIZE = 256
    
    # Training parameters
    BATCH_SIZE = 8
    NUM_EPOCHS = 25
    LEARNING_RATE = 1e-4
    
    # Model parameters
    IN_CHANNELS = 3
    OUT_CHANNELS = 1
    FEATURES = [64, 128, 256, 512]
    
    RESULTS_DIR = "results_task1"
    
config = Config()
os.makedirs(config.RESULTS_DIR, exist_ok=True) # is this needed?


In [9]:
# ============================================================================
# DATASET CLASS
# ============================================================================

class KvasirDataset(Dataset):
    
    def __init__(self, image_paths, mask_paths, resolution, target_size=256):
        self.image_paths = image_paths
        self.mask_paths = mask_paths
        self.resolution = resolution
        self.target_size = target_size
        
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        # Load image and mask
        image = Image.open(self.image_paths[idx]).convert('RGB')
        mask = Image.open(self.mask_paths[idx]).convert('L')
        
        # Scale to test resolution (information loss)
        image = TF.resize(image, (self.resolution, self.resolution), 
                         interpolation=Image.BILINEAR)
        mask = TF.resize(mask, (self.resolution, self.resolution), 
                        interpolation=Image.NEAREST)
        
        # Scale back to target size
        image = TF.resize(image, (self.target_size, self.target_size), 
                         interpolation=Image.BILINEAR)
        mask = TF.resize(mask, (self.target_size, self.target_size), 
                        interpolation=Image.NEAREST)
        
        # Convert to tensors
        image = TF.to_tensor(image)
        mask = TF.to_tensor(mask)
        mask = (mask > 0.5).float()
        
        return image, mask

In [12]:
# ============================================================================
# PATH COLLECTION AND VALIDATION
# ============================================================================

import os
from pathlib import Path

# Simple file existence check
if not os.path.exists('train.txt') or not os.path.exists('val.txt'):
    raise FileNotFoundError('train.txt or val.txt missing.')

base_path = Path(config.DATASET_PATH)
image_paths = sorted(list((base_path / config.IMAGE_DIR).glob('*.jpg')))
mask_paths = sorted(list((base_path / config.MASK_DIR).glob('*.jpg')))

if len(image_paths) != len(mask_paths):
    raise ValueError("Mismatch between number of images and masks.")

# Read train/val lists
with open('train.txt', 'r') as f:
    train_stems = {line.strip() for line in f}
with open('val.txt', 'r') as f:
    val_stems = {line.strip() for line in f}

# Split dataset according to txt files
train_images = [p for p in image_paths if p.stem in train_stems]
train_masks = [p for p in mask_paths if p.stem in train_stems]
val_images = [p for p in image_paths if p.stem in val_stems]
val_masks = [p for p in mask_paths if p.stem in val_stems]

print(f"Train: {len(train_images)} images, Val: {len(val_images)} images")


Train: 700 images, Val: 300 images


In [None]:
# ============================================================================
# MODEL CALL
# ============================================================================

def test_model_call():
    model = UNet(in_channels=3, out_channels=1).to(device)
    print("Model created successfully!")
    
    # Create a dummy input tensor
    dummy_input = torch.randn(1, 3, 256, 256).to(device)
    print(f"Input shape: {dummy_input.shape}")
    
    # Forward pass
    with torch.no_grad():
        output = model(dummy_input)
    print(f"Output shape: {output.shape}")

test_model_call()

Model created successfully!
Input shape: torch.Size([1, 3, 256, 256])
Output shape: torch.Size([1, 1, 256, 256])
