In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        (os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
pip install torch torchvision albumentations==1.1.0 opencv-python numpy scikit-learn tqdm


Collecting albumentations==1.1.0
  Downloading albumentations-1.1.0-py3-none-any.whl.metadata (31 kB)
Collecting qudida>=0.0.4 (from albumentations==1.1.0)
  Downloading qudida-0.0.4-py3-none-any.whl.metadata (1.5 kB)
Downloading albumentations-1.1.0-py3-none-any.whl (102 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m102.4/102.4 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading qudida-0.0.4-py3-none-any.whl (3.5 kB)
Installing collected packages: qudida, albumentations
  Attempting uninstall: albumentations
    Found existing installation: albumentations 2.0.8
    Uninstalling albumentations-2.0.8:
      Successfully uninstalled albumentations-2.0.8
Successfully installed albumentations-1.1.0 qudida-0.0.4
Note: you may need to restart the kernel to use updated packages.


In [3]:
import os
import cv2
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import Dataset, DataLoader
from torchvision.models import resnet50, ResNet50_Weights
from sklearn.model_selection import train_test_split

import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm


In [4]:
IMAGE_DIR = "/kaggle/input/chest-x-ray-lungs-segmentation/Chest-X-Ray/Chest-X-Ray/image"
MASK_DIR  = "/kaggle/input/chest-x-ray-lungs-segmentation/Chest-X-Ray/Chest-X-Ray/mask"


In [5]:
train_transform = A.Compose([
    A.Resize(256,256),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.2),
    A.Normalize(mean=(0.5,0.5,0.5), std=(0.5,0.5,0.5)),
    ToTensorV2()
])

val_test_transform = A.Compose([
    A.Resize(256,256),
    A.Normalize(mean=(0.5,0.5,0.5), std=(0.5,0.5,0.5)),
    ToTensorV2()
])


In [6]:
class XRayLungDataset(Dataset):
    def __init__(self, images, masks, transform=None):
        self.images = images
        self.masks  = masks
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img  = cv2.imread(self.images[idx], cv2.IMREAD_GRAYSCALE)
        mask = cv2.imread(self.masks[idx],  cv2.IMREAD_GRAYSCALE)

        # Convert to 3-channel for ResNet
        img = np.stack([img,img,img], axis=-1)

        # Normalize and convert to float32
        img  = (img / 255.0).astype(np.float32)
        mask = (mask / 255.0).astype(np.float32)

        if self.transform:
            augmented = self.transform(image=img, mask=mask)
            img  = augmented["image"]
            mask = augmented["mask"]

        return img, mask.unsqueeze(0)


In [7]:
# class XRayLungDataset(Dataset):
#     def __init__(self, images, masks, transform=None):
#         self.images = images
#         self.masks = masks
#         self.transform = transform

#     def __len__(self):
#         return len(self.images)

#     def __getitem__(self, idx):
#         img = cv2.imread(self.images[idx], cv2.IMREAD_GRAYSCALE)
#         mask= cv2.imread(self.masks[idx], cv2.IMREAD_GRAYSCALE)

#         # convert to 3 channels for pretrained ResNet
#         img = np.stack([img, img, img], axis=-1)

#         img = img / 255.0
#         mask= mask / 255.0

#         if self.transform:
#             augmented = self.transform(image=img, mask=mask)
#             img  = augmented["image"]
#             mask = augmented["mask"]

#         return img, mask.unsqueeze(0)


In [8]:
all_images = sorted([os.path.join(IMAGE_DIR, f) for f in os.listdir(IMAGE_DIR)])
all_masks  = sorted([os.path.join(MASK_DIR, f) for f in os.listdir(MASK_DIR)])

X_train, X_temp, y_train, y_temp = train_test_split(all_images, all_masks, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test   = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)


In [9]:
train_loader = DataLoader(XRayLungDataset(X_train, y_train, train_transform), batch_size=8, shuffle=True)
val_loader   = DataLoader(XRayLungDataset(X_val,   y_val,   val_test_transform), batch_size=8, shuffle=False)
test_loader  = DataLoader(XRayLungDataset(X_test,  y_test,  val_test_transform), batch_size=1, shuffle=False)


In [10]:
class AttentionBlock(nn.Module):
    def __init__(self, F_g, F_l, F_int):
        super().__init__()
        self.W_g = nn.Conv2d(F_g, F_int, 1)
        self.W_x = nn.Conv2d(F_l, F_int, 1)
        self.psi = nn.Sequential(nn.Conv2d(F_int, 1, 1), nn.Sigmoid())
        self.relu= nn.ReLU(inplace=True)

    def forward(self, g, x):
        psi = self.relu(self.W_g(g) + self.W_x(x))
        psi = self.psi(psi)
        return x * psi


In [11]:
class ResNet50_Attention_UNet(nn.Module):
    def __init__(self):
        super().__init__()
        resnet = resnet50(weights=ResNet50_Weights.DEFAULT)

        self.e1 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu)
        self.e2 = resnet.layer1
        self.e3 = resnet.layer2
        self.e4 = resnet.layer3
        self.e5 = resnet.layer4

        self.up4 = nn.ConvTranspose2d(2048, 1024, 2, 2)
        self.att4= AttentionBlock(1024,1024,512)
        self.d4  = nn.Conv2d(2048,1024,3,padding=1)
        self.up3 = nn.ConvTranspose2d(1024, 512, 2,2)
        self.att3= AttentionBlock(512,512,256)
        self.d3  = nn.Conv2d(1024,512,3,padding=1)
        self.up2 = nn.ConvTranspose2d(512,256,2,2)
        self.att2= AttentionBlock(256,256,128)
        self.d2  = nn.Conv2d(512,256,3,padding=1)
        self.up1 = nn.ConvTranspose2d(256, 64,2,2)
        self.d1  = nn.Conv2d(128,64,3,padding=1)
        self.out = nn.Conv2d(64,1,1)

    def forward(self,x):
        e1 = self.e1(x)
        e2 = self.e2(e1)
        e3 = self.e3(e2)
        e4 = self.e4(e3)
        e5 = self.e5(e4)

        d4 = self.up4(e5)
        e4 = self.att4(d4, e4)
        d4 = self.d4(torch.cat([d4,e4],1))

        d3 = self.up3(d4)
        e3 = self.att3(d3, e3)
        d3 = self.d3(torch.cat([d3,e3],1))

        d2 = self.up2(d3)
        e2 = self.att2(d2, e2)
        d2 = self.d2(torch.cat([d2,e2],1))

        d1 = self.up1(d2)
        if d1.shape[2:] != e1.shape[2:]:
            e1 = nn.functional.interpolate(e1, size=d1.shape[2:], mode="bilinear", align_corners=False)
        d1 = self.d1(torch.cat([d1,e1],1))

        return torch.sigmoid(self.out(d1))


In [12]:
def dice_score(pred, target):
    smooth=1e-6
    pred=pred.view(-1)
    target=target.view(-1)
    return (2*(pred*target).sum()+smooth)/((pred+target).sum()+smooth)

def iou_score(pred,target):
    smooth=1e-6
    pred=pred.view(-1); target=target.view(-1)
    inter=(pred*target).sum()
    union=pred.sum()+target.sum()-inter
    return (inter+smooth)/(union+smooth)


In [13]:
# device = "cuda" if torch.cuda.is_available() else "cpu"
# model = ResNet50_Attention_UNet().to(device)
# criterion = nn.BCELoss()
# optimizer = optim.Adam(model.parameters(), lr=1e-4)

# EPOCHS = 20

# for epoch in range(EPOCHS):
#     model.train()
#     train_loss = 0

#     for imgs, masks in tqdm(train_loader):
#         # 1️⃣ Move tensors to device (GPU or CPU)
#         imgs, masks = imgs.to(device), masks.to(device)

#         # 2️⃣ Convert masks to float32 to match BCELoss
#         masks = masks.float()  # <-- THIS IS THE FIX

#         # 3️⃣ Forward pass
#         preds = model(imgs)

#         # 4️⃣ Compute loss
#         loss = criterion(preds, masks)

#         # 5️⃣ Backpropagation
#         optimizer.zero_grad()
#         loss.backward()
#         optimizer.step()

#         train_loss += loss.item()

#     print(f"Epoch [{epoch+1}/{EPOCHS}] - Train Loss: {train_loss/len(train_loader):.4f}")


In [14]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = ResNet50_Attention_UNet().to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

EPOCHS = 10

def dice_score(pred, target):
    smooth = 1e-6
    pred = pred.view(-1)
    target = target.view(-1)
    return (2*(pred*target).sum()+smooth)/((pred+target).sum()+smooth)

for epoch in range(EPOCHS):
    model.train()
    train_loss = 0
    train_dice = 0

    for imgs, masks in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS} - Training"):
        imgs, masks = imgs.to(device), masks.to(device)
        masks = masks.float()  # ensure BCELoss compatibility

        preds = model(imgs)
        loss = criterion(preds, masks)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        train_dice += dice_score(preds, masks).item()

    train_loss /= len(train_loader)
    train_dice /= len(train_loader)

    # -------------- Validation -----------------
    model.eval()
    val_loss = 0
    val_dice = 0

    with torch.no_grad():
        for imgs, masks in tqdm(val_loader, desc=f"Epoch {epoch+1}/{EPOCHS} - Validation"):
            imgs, masks = imgs.to(device), masks.to(device)
            masks = masks.float()

            preds = model(imgs)
            loss = criterion(preds, masks)

            val_loss += loss.item()
            val_dice += dice_score(preds, masks).item()

    val_loss /= len(val_loader)
    val_dice /= len(val_loader)

    print(f"\nEpoch [{epoch+1}/{EPOCHS}] "
          f"Train Loss: {train_loss:.4f} | Train Dice: {train_dice:.4f} "
          f"| Val Loss: {val_loss:.4f} | Val Dice: {val_dice:.4f}\n")


Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth


100%|██████████| 97.8M/97.8M [00:00<00:00, 206MB/s] 
Epoch 1/10 - Training: 100%|██████████| 62/62 [03:23<00:00,  3.29s/it]
Epoch 1/10 - Validation: 100%|██████████| 14/14 [00:37<00:00,  2.65s/it]



Epoch [1/10] Train Loss: 0.2046 | Train Dice: 0.7544 | Val Loss: 1.1064 | Val Dice: 0.0317



Epoch 2/10 - Training: 100%|██████████| 62/62 [03:11<00:00,  3.09s/it]
Epoch 2/10 - Validation: 100%|██████████| 14/14 [00:33<00:00,  2.43s/it]



Epoch [2/10] Train Loss: 0.0736 | Train Dice: 0.9169 | Val Loss: 0.8693 | Val Dice: 0.0972



Epoch 3/10 - Training: 100%|██████████| 62/62 [03:11<00:00,  3.10s/it]
Epoch 3/10 - Validation: 100%|██████████| 14/14 [00:33<00:00,  2.42s/it]



Epoch [3/10] Train Loss: 0.0606 | Train Dice: 0.9319 | Val Loss: 1.0401 | Val Dice: 0.2127



Epoch 4/10 - Training: 100%|██████████| 62/62 [03:11<00:00,  3.09s/it]
Epoch 4/10 - Validation: 100%|██████████| 14/14 [00:33<00:00,  2.41s/it]



Epoch [4/10] Train Loss: 0.0533 | Train Dice: 0.9395 | Val Loss: 1.7582 | Val Dice: 0.0027



Epoch 5/10 - Training: 100%|██████████| 62/62 [03:11<00:00,  3.08s/it]
Epoch 5/10 - Validation: 100%|██████████| 14/14 [00:33<00:00,  2.41s/it]



Epoch [5/10] Train Loss: 0.0505 | Train Dice: 0.9425 | Val Loss: 0.6023 | Val Dice: 0.3939



Epoch 6/10 - Training: 100%|██████████| 62/62 [03:10<00:00,  3.08s/it]
Epoch 6/10 - Validation: 100%|██████████| 14/14 [00:33<00:00,  2.41s/it]



Epoch [6/10] Train Loss: 0.0501 | Train Dice: 0.9426 | Val Loss: 1.4725 | Val Dice: 0.0097



Epoch 7/10 - Training: 100%|██████████| 62/62 [03:11<00:00,  3.09s/it]
Epoch 7/10 - Validation: 100%|██████████| 14/14 [00:34<00:00,  2.43s/it]



Epoch [7/10] Train Loss: 0.0512 | Train Dice: 0.9430 | Val Loss: 1.7631 | Val Dice: 0.0027



Epoch 8/10 - Training: 100%|██████████| 62/62 [03:11<00:00,  3.09s/it]
Epoch 8/10 - Validation: 100%|██████████| 14/14 [00:33<00:00,  2.41s/it]



Epoch [8/10] Train Loss: 0.0501 | Train Dice: 0.9429 | Val Loss: 0.3767 | Val Dice: 0.4823



Epoch 9/10 - Training: 100%|██████████| 62/62 [03:11<00:00,  3.09s/it]
Epoch 9/10 - Validation: 100%|██████████| 14/14 [00:33<00:00,  2.41s/it]



Epoch [9/10] Train Loss: 0.0448 | Train Dice: 0.9491 | Val Loss: 0.1651 | Val Dice: 0.8365



Epoch 10/10 - Training: 100%|██████████| 62/62 [03:11<00:00,  3.08s/it]
Epoch 10/10 - Validation: 100%|██████████| 14/14 [00:33<00:00,  2.41s/it]


Epoch [10/10] Train Loss: 0.0433 | Train Dice: 0.9495 | Val Loss: 0.5147 | Val Dice: 0.4232






In [15]:
# device="cuda" if torch.cuda.is_available() else "cpu"
# model=ResNet50_Attention_UNet().to(device)
# criterion=nn.BCELoss()
# optimizer=optim.Adam(model.parameters(), lr=1e-4)

# for epoch in range(20):
#     model.train()
#     loss_sum=0
#     for imgs, masks in tqdm(train_loader):
#         imgs, masks = imgs.to(device), masks.to(device)
#         preds = model(imgs)
#         loss  = criterion(preds, masks)
#         optimizer.zero_grad()
#         loss.backward()
#         optimizer.step()
#         loss_sum += loss.item()

#     print(f"Epoch {epoch+1} - Train Loss: {loss_sum/len(train_loader):.4f}")


In [16]:
model.eval()
dice, iou = 0,0
with torch.no_grad():
    for imgs, masks in test_loader:
        imgs, masks = imgs.to(device), masks.to(device)
        preds = model(imgs)
        dice += dice_score(preds, masks).item()
        iou  += iou_score(preds, masks).item()

print(f"Test Dice: {dice/len(test_loader):.4f}")
print(f"Test IoU : {iou/len(test_loader):.4f}")


Test Dice: 0.4361
Test IoU : 0.2823


In [17]:
import torch

def compute_metrics(pred, target, threshold=0.5):
    """
    pred : torch.Tensor (B,1,H,W) predicted mask probabilities
    target : torch.Tensor (B,1,H,W) ground truth
    threshold : probability threshold to convert to binary mask
    returns : dict of metrics (dice, iou, precision, recall, f1, specificity, accuracy)
    """
    pred_bin = (pred > threshold).float()
    target_bin = target.float()
    
    TP = (pred_bin * target_bin).sum()
    FP = ((pred_bin == 1) & (target_bin == 0)).sum()
    FN = ((pred_bin == 0) & (target_bin == 1)).sum()
    TN = ((pred_bin == 0) & (target_bin == 0)).sum()
    
    dice = (2*TP) / (2*TP + FP + FN + 1e-6)
    iou  = TP / (TP + FP + FN + 1e-6)
    precision = TP / (TP + FP + 1e-6)
    recall    = TP / (TP + FN + 1e-6)
    f1        = 2*precision*recall / (precision + recall + 1e-6)
    specificity = TN / (TN + FP + 1e-6)
    accuracy = (TP + TN) / (TP + TN + FP + FN + 1e-6)
    
    return {
        "dice": dice.item(),
        "iou": iou.item(),
        "precision": precision.item(),
        "recall": recall.item(),
        "f1": f1.item(),
        "specificity": specificity.item(),
        "accuracy": accuracy.item()
    }


In [None]:
import matplotlib.pyplot as plt
from collections import defaultdict

EPOCHS = 20

train_history = defaultdict(list)
val_history = defaultdict(list)

for epoch in range(EPOCHS):
    # ---------------- TRAIN -------------------
    model.train()
    train_loss_sum = 0
    train_metrics_sum = defaultdict(float)
    
    for imgs, masks in train_loader:
        imgs, masks = imgs.to(device), masks.to(device)
        masks = masks.float()
        
        preds = model(imgs)
        loss = criterion(preds, masks)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_loss_sum += loss.item()
        
        metrics = compute_metrics(preds, masks)
        for k in metrics:
            train_metrics_sum[k] += metrics[k]
    
    # Average metrics for the epoch
    n_train = len(train_loader)
    train_history["loss"].append(train_loss_sum / n_train)
    for k in train_metrics_sum:
        train_history[k].append(train_metrics_sum[k]/n_train)
    
    # ---------------- VALIDATION -------------------
    model.eval()
    val_loss_sum = 0
    val_metrics_sum = defaultdict(float)
    
    with torch.no_grad():
        for imgs, masks in val_loader:
            imgs, masks = imgs.to(device), masks.to(device)
            masks = masks.float()
            
            preds = model(imgs)
            loss = criterion(preds, masks)
            val_loss_sum += loss.item()
            
            metrics = compute_metrics(preds, masks)
            for k in metrics:
                val_metrics_sum[k] += metrics[k]
    
    n_val = len(val_loader)
    val_history["loss"].append(val_loss_sum / n_val)
    for k in val_metrics_sum:
        val_history[k].append(val_metrics_sum[k]/n_val)
    
    # --------- Print Epoch Summary -------------
    print(f"Epoch {epoch+1}/{EPOCHS}")
    print(f"Train Loss: {train_history['loss'][-1]:.4f} | Dice: {train_history['dice'][-1]:.4f} | IoU: {train_history['iou'][-1]:.4f}")
    print(f"Val   Loss: {val_history['loss'][-1]:.4f} | Dice: {val_history['dice'][-1]:.4f} | IoU: {val_history['iou'][-1]:.4f}\n")


Epoch 1/20
Train Loss: 0.0428 | Dice: 0.9660 | IoU: 0.9343
Val   Loss: 0.0987 | Dice: 0.9292 | IoU: 0.8680

Epoch 2/20
Train Loss: 0.0421 | Dice: 0.9668 | IoU: 0.9359
Val   Loss: 0.3581 | Dice: 0.5825 | IoU: 0.4137

Epoch 3/20
Train Loss: 0.0390 | Dice: 0.9685 | IoU: 0.9391
Val   Loss: 0.6071 | Dice: 0.3750 | IoU: 0.2431

Epoch 4/20
Train Loss: 0.0385 | Dice: 0.9691 | IoU: 0.9400
Val   Loss: 0.5784 | Dice: 0.2613 | IoU: 0.1542

Epoch 5/20
Train Loss: 0.0362 | Dice: 0.9704 | IoU: 0.9426
Val   Loss: 0.1210 | Dice: 0.9102 | IoU: 0.8357



In [None]:
plt.figure(figsize=(10,5))
plt.plot(train_history['loss'], label="Train Loss")
plt.plot(val_history['loss'], label="Val Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Training vs Validation Loss")
plt.legend()
plt.show()


In [None]:
import matplotlib.pyplot as plt

model.eval()
with torch.no_grad():
    for imgs, masks in test_loader:
        imgs, masks = imgs.to(device), masks.to(device)
        masks = masks.float()
        
        preds = model(imgs)
        pred_mask = (preds > 0.5).float()
        
        img_np = imgs[0].cpu().permute(1,2,0).numpy()
        mask_np = masks[0,0].cpu().numpy()
        pred_np = pred_mask[0,0].cpu().numpy()
        
        plt.figure(figsize=(12,4))
        plt.subplot(1,3,1); plt.imshow(img_np, cmap='gray'); plt.title("X-ray Image")
        plt.subplot(1,3,2); plt.imshow(mask_np, cmap='gray'); plt.title("Ground Truth Mask")
        plt.subplot(1,3,3); plt.imshow(pred_np, cmap='gray'); plt.title("Predicted Mask")
        plt.show()
        break  # show one batch


In [None]:
from sklearn.model_selection import KFold
k = 5
kf = KFold(n_splits=k, shuffle=True, random_state=42)

all_images = sorted([os.path.join(IMAGE_DIR, f) for f in os.listdir(IMAGE_DIR)])
all_masks  = sorted([os.path.join(MASK_DIR, f) for f in os.listdir(MASK_DIR)])

fold = 1
fold_metrics = []

for train_idx, val_idx in kf.split(all_images):
    print(f"Fold {fold}/{k}")
    X_train = [all_images[i] for i in train_idx]
    y_train = [all_masks[i] for i in train_idx]
    X_val   = [all_images[i] for i in val_idx]
    y_val   = [all_masks[i] for i in val_idx]

    # Create loaders, model, optimizer (same as above)
    # Train model (same training loop)
    # Compute metrics on validation set
    # Save mean Dice / IoU for this fold
    fold_metrics.append(val_dice)  # or dict with all metrics
    fold += 1

mean_dice = np.mean(fold_metrics)
std_dice  = np.std(fold_metrics)
print(f"5-Fold CV Dice Mean ± Std: {mean_dice:.4f} ± {std_dice:.4f}")


In [None]:
from scipy.stats import wilcoxon

# Suppose dice_model1 and dice_model2 are lists of fold dice scores
stat, p = wilcoxon(dice_model1, dice_model2)
print(f"Wilcoxon test p-value: {p:.4f}")
