In [13]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        (os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [14]:
pip install torch torchvision albumentations opencv-python numpy scikit-learn tqdm


Note: you may need to restart the kernel to use updated packages.


In [15]:
import os
import cv2
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import Dataset, DataLoader
from torchvision.models import resnet50
from sklearn.model_selection import train_test_split

import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm


In [16]:
IMAGE_DIR = "/kaggle/input/chest-x-ray-lungs-segmentation/Chest-X-Ray/Chest-X-Ray/image"
MASK_DIR  = "/kaggle/input/chest-x-ray-lungs-segmentation/Chest-X-Ray/Chest-X-Ray/mask"


In [17]:
train_transform = A.Compose([
    A.Resize(256, 256),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.Rotate(limit=20, p=0.5),
    A.Normalize(mean=(0.5,), std=(0.5,)),
    ToTensorV2()
])

val_test_transform = A.Compose([
    A.Resize(256, 256),
    A.Normalize(mean=(0.5,), std=(0.5,)),
    ToTensorV2()
])


In [18]:
class XRaySegmentationDataset(Dataset):
    def __init__(self, image_paths, mask_paths, transform=None):
        self.image_paths = image_paths
        self.mask_paths = mask_paths
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = cv2.imread(self.image_paths[idx], cv2.IMREAD_GRAYSCALE)
        mask  = cv2.imread(self.mask_paths[idx],  cv2.IMREAD_GRAYSCALE)

        image = np.stack([image, image, image], axis=-1)  # âœ… FIX
        image = image / 255.0
        mask  = mask / 255.0

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented["image"]
            mask  = augmented["mask"]

        return image, mask.unsqueeze(0)


In [19]:
# class XRaySegmentationDataset(Dataset):
#     def __init__(self, image_paths, mask_paths, transform=None):
#         self.image_paths = image_paths
#         self.mask_paths = mask_paths
#         self.transform = transform

#     def __len__(self):
#         return len(self.image_paths)

#     def __getitem__(self, idx):
#         image = cv2.imread(self.image_paths[idx], cv2.IMREAD_GRAYSCALE)
#         mask  = cv2.imread(self.mask_paths[idx],  cv2.IMREAD_GRAYSCALE)

#         image = image / 255.0
#         mask  = mask / 255.0

#         if self.transform:
#             augmented = self.transform(image=image, mask=mask)
#             image = augmented["image"]
#             mask  = augmented["mask"]

#         return image, mask.unsqueeze(0)


In [20]:
images = sorted([os.path.join(IMAGE_DIR, x) for x in os.listdir(IMAGE_DIR)])
masks  = sorted([os.path.join(MASK_DIR, x)  for x in os.listdir(MASK_DIR)])

X_train, X_temp, y_train, y_temp = train_test_split(
    images, masks, test_size=0.3, random_state=42
)

X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42
)


In [21]:
train_loader = DataLoader(
    XRaySegmentationDataset(X_train, y_train, train_transform),
    batch_size=4, shuffle=True
)

val_loader = DataLoader(
    XRaySegmentationDataset(X_val, y_val, val_test_transform),
    batch_size=4, shuffle=False
)

test_loader = DataLoader(
    XRaySegmentationDataset(X_test, y_test, val_test_transform),
    batch_size=1, shuffle=False
)


In [22]:
class AttentionBlock(nn.Module):
    def __init__(self, F_g, F_l, F_int):
        super().__init__()
        self.W_g = nn.Conv2d(F_g, F_int, 1)
        self.W_x = nn.Conv2d(F_l, F_int, 1)
        self.psi = nn.Sequential(
            nn.Conv2d(F_int, 1, 1),
            nn.Sigmoid()
        )
        self.relu = nn.ReLU(inplace=True)

    def forward(self, g, x):
        psi = self.relu(self.W_g(g) + self.W_x(x))
        psi = self.psi(psi)
        return x * psi


In [23]:
class ResNet50_Attention_UNet(nn.Module):
    def __init__(self):
        super().__init__()
        resnet = resnet50(pretrained=True)

        self.e1 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu)
        self.e2 = resnet.layer1
        self.e3 = resnet.layer2
        self.e4 = resnet.layer3
        self.e5 = resnet.layer4

        self.up4 = nn.ConvTranspose2d(2048, 1024, 2, 2)
        self.att4 = AttentionBlock(1024, 1024, 512)
        self.d4 = nn.Conv2d(2048, 1024, 3, padding=1)

        self.up3 = nn.ConvTranspose2d(1024, 512, 2, 2)
        self.att3 = AttentionBlock(512, 512, 256)
        self.d3 = nn.Conv2d(1024, 512, 3, padding=1)

        self.up2 = nn.ConvTranspose2d(512, 256, 2, 2)
        self.att2 = AttentionBlock(256, 256, 128)
        self.d2 = nn.Conv2d(512, 256, 3, padding=1)

        self.up1 = nn.ConvTranspose2d(256, 64, 2, 2)
        self.d1 = nn.Conv2d(128, 64, 3, padding=1)

        self.out = nn.Conv2d(64, 1, 1)

    def forward(self, x):
        e1 = self.e1(x)
        e2 = self.e2(e1)
        e3 = self.e3(e2)
        e4 = self.e4(e3)
        e5 = self.e5(e4)

        d4 = self.up4(e5)
        e4 = self.att4(d4, e4)
        d4 = self.d4(torch.cat([d4, e4], 1))

        d3 = self.up3(d4)
        e3 = self.att3(d3, e3)
        d3 = self.d3(torch.cat([d3, e3], 1))

        d2 = self.up2(d3)
        e2 = self.att2(d2, e2)
        d2 = self.d2(torch.cat([d2, e2], 1))

        d1 = self.up1(d2)
        d1 = self.d1(torch.cat([d1, e1], 1))

        return torch.sigmoid(self.out(d1))


In [24]:
def dice_score(pred, target):
    smooth = 1e-6
    pred = pred.view(-1)
    target = target.view(-1)
    return (2*(pred*target).sum()+smooth)/((pred+target).sum()+smooth)

def iou_score(pred, target):
    smooth = 1e-6
    pred = pred.view(-1)
    target = target.view(-1)
    inter = (pred * target).sum()
    union = pred.sum() + target.sum() - inter
    return (inter + smooth) / (union + smooth)


In [25]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model = ResNet50_Attention_UNet().to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

EPOCHS = 20

for epoch in range(EPOCHS):
    model.train()
    train_loss = 0

    for img, mask in tqdm(train_loader):
        img, mask = img.to(device), mask.to(device)

        pred = model(img)
        loss = criterion(pred, mask)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    print(f"Epoch [{epoch+1}/{EPOCHS}] Train Loss: {train_loss/len(train_loader):.4f}")


  0%|          | 0/123 [00:01<?, ?it/s]


RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 256 but got size 128 for tensor number 1 in the list.

In [None]:
model.eval()
dice, iou = 0, 0

with torch.no_grad():
    for img, mask in test_loader:
        img, mask = img.to(device), mask.to(device)
        pred = model(img)

        dice += dice_score(pred, mask).item()
        iou  += iou_score(pred, mask).item()

print(f"Final Dice Score: {dice/len(test_loader):.4f}")
print(f"Final IoU Score : {iou/len(test_loader):.4f}")
