In [1]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("tinashri/brain-tumor-dataset-includes-the-mask-and-images")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/brain-tumor-dataset-includes-the-mask-and-images


In [2]:
import os

dataset_path = "/kaggle/input/brain-tumor-dataset-includes-the-mask-and-images/data/data"
print("Images folder exists:", os.path.exists(os.path.join(dataset_path, "images")))
print("Masks folder exists:", os.path.exists(os.path.join(dataset_path, "masks")))


Images folder exists: True
Masks folder exists: True


In [3]:
from glob import glob

images = glob(f"{dataset_path}/images/*")
masks = glob(f"{dataset_path}/masks/*")

print(f"Found {len(images)} images and {len(masks)} masks")


Found 3064 images and 3064 masks


In [4]:
import os
import numpy as np
import torch
import cv2
from glob import glob
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as transforms

# Set image dimensions
H, W = 256, 256

# Define dataset class
class BrainTumorDataset(Dataset):
    def __init__(self, image_paths, mask_paths, transform=None):
        self.image_paths = image_paths
        self.mask_paths = mask_paths
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        mask_path = self.mask_paths[idx]

        # Load image and mask
        image = cv2.imread(image_path, cv2.IMREAD_COLOR)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

        # Resize to fixed dimensions
        image = cv2.resize(image, (H, W))
        mask = cv2.resize(mask, (H, W))

        # Normalize image (scale pixel values to [0,1])
        image = image.astype(np.float32) / 255.0
        mask = mask.astype(np.float32) / 255.0

        # Convert to tensors
        image = torch.tensor(image, dtype=torch.float32).permute(2, 0, 1)  # (C, H, W)
        mask = torch.tensor(mask, dtype=torch.float32).unsqueeze(0)  # (1, H, W)

        if self.transform:
            image = self.transform(image)

        return image, mask

# Load dataset paths
def load_dataset(path, split=0.1):
    image_dir = os.path.join(path, "images")
    mask_dir = os.path.join(path, "masks")

    image_paths = sorted(glob(f"{image_dir}/*"))
    mask_paths = sorted(glob(f"{mask_dir}/*"))

    if len(image_paths) == 0 or len(mask_paths) == 0:
        raise ValueError(f"No images or masks found in {path}. Check dataset structure.")

    # Split data
    total_size = len(image_paths)
    test_size = int(total_size * split)
    valid_size = int(total_size * split)
    train_size = total_size - (test_size + valid_size)

    dataset = BrainTumorDataset(image_paths, mask_paths)
    train_dataset, valid_dataset, test_dataset = random_split(dataset, [train_size, valid_size, test_size])

    return train_dataset, valid_dataset, test_dataset

# Create DataLoader
def create_dataloaders(dataset_path, batch_size=16):
    train_dataset, valid_dataset, test_dataset = load_dataset(dataset_path)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, valid_loader, test_loader

# Main script
if __name__ == "__main__":
    dataset_path = "/kaggle/input/brain-tumor-dataset-includes-the-mask-and-images/data/data"

    # Create DataLoaders
    train_loader, valid_loader, test_loader = create_dataloaders(dataset_path, batch_size=16)

    # Print dataset sizes
    print(f"Train: {len(train_loader.dataset)} images")
    print(f"Validation: {len(valid_loader.dataset)} images")
    print(f"Test: {len(test_loader.dataset)} images")

    # Get a batch of images and masks
    sample_images, sample_masks = next(iter(train_loader))
    print(f"Sample batch shape - Images: {sample_images.shape}, Masks: {sample_masks.shape}")


Train: 2452 images
Validation: 306 images
Test: 306 images
Sample batch shape - Images: torch.Size([16, 3, 256, 256]), Masks: torch.Size([16, 1, 256, 256])


In [5]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define U-Net Model
class UNet(nn.Module):
    def __init__(self):
        super(UNet, self).__init__()

        # Encoder (Downsampling)
        self.enc1 = self.conv_block(3, 64)
        self.enc2 = self.conv_block(64, 128)
        self.enc3 = self.conv_block(128, 256)
        self.enc4 = self.conv_block(256, 512)

        # Bottleneck
        self.bottleneck = self.conv_block(512, 1024)

        # Decoder (Upsampling)
        self.upconv4 = self.upconv(1024, 512)
        self.dec4 = self.conv_block(1024, 512)

        self.upconv3 = self.upconv(512, 256)
        self.dec3 = self.conv_block(512, 256)

        self.upconv2 = self.upconv(256, 128)
        self.dec2 = self.conv_block(256, 128)

        self.upconv1 = self.upconv(128, 64)
        self.dec1 = self.conv_block(128, 64)

        # Final Convolution Layer
        self.final_conv = nn.Conv2d(64, 1, kernel_size=1)

    def conv_block(self, in_channels, out_channels):
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
        )

    def upconv(self, in_channels, out_channels):
        return nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2)

    def forward(self, x):
        # Encoder
        enc1 = self.enc1(x)
        enc2 = self.enc2(nn.MaxPool2d(2)(enc1))
        enc3 = self.enc3(nn.MaxPool2d(2)(enc2))
        enc4 = self.enc4(nn.MaxPool2d(2)(enc3))

        # Bottleneck
        bottleneck = self.bottleneck(nn.MaxPool2d(2)(enc4))

        # Decoder
        up4 = self.upconv4(bottleneck)
        up4 = torch.cat([up4, enc4], dim=1)
        dec4 = self.dec4(up4)

        up3 = self.upconv3(dec4)
        up3 = torch.cat([up3, enc3], dim=1)
        dec3 = self.dec3(up3)

        up2 = self.upconv2(dec3)
        up2 = torch.cat([up2, enc2], dim=1)
        dec2 = self.dec2(up2)

        up1 = self.upconv1(dec2)
        up1 = torch.cat([up1, enc1], dim=1)
        dec1 = self.dec1(up1)

        return torch.sigmoid(self.final_conv(dec1))  # Sigmoid for binary segmentation

# Instantiate Model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = UNet().to(device)


In [6]:
# Dice Coefficient Metric
def dice_coef(y_true, y_pred, smooth=1e-6):
    y_true = y_true.view(-1)
    y_pred = y_pred.view(-1)
    intersection = (y_true * y_pred).sum()
    return (2. * intersection + smooth) / (y_true.sum() + y_pred.sum() + smooth)

# Dice Loss
class DiceLoss(nn.Module):
    def __init__(self):
        super(DiceLoss, self).__init__()

    def forward(self, y_pred, y_true):
        return 1 - dice_coef(y_true, y_pred)

# Define Loss & Optimizer
criterion = nn.BCEWithLogitsLoss()
dice_loss = DiceLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)


In [7]:
from tqdm import tqdm

# Train function
def train(model, train_loader, optimizer, criterion, dice_loss, device):
    model.train()
    total_loss, total_dice = 0, 0

    for images, masks in tqdm(train_loader):
        images, masks = images.to(device), masks.to(device)

        optimizer.zero_grad()
        outputs = model(images)

        bce = criterion(outputs, masks)
        dice = dice_loss(outputs, masks)
        loss = bce + dice

        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        total_dice += dice_coef(masks, outputs).item()

    return total_loss / len(train_loader), total_dice / len(train_loader)

# Validation function
def validate(model, valid_loader, criterion, dice_loss, device):
    model.eval()
    total_loss, total_dice = 0, 0

    with torch.no_grad():
        for images, masks in tqdm(valid_loader):
            images, masks = images.to(device), masks.to(device)
            outputs = model(images)

            bce = criterion(outputs, masks)
            dice = dice_loss(outputs, masks)
            loss = bce + dice

            total_loss += loss.item()
            total_dice += dice_coef(masks, outputs).item()

    return total_loss / len(valid_loader), total_dice / len(valid_loader)


In [8]:
# Load Data
train_loader, valid_loader, test_loader = create_dataloaders("/kaggle/input/brain-tumor-dataset-includes-the-mask-and-images/data/data", batch_size=16)

# Training Loop
num_epochs = 20
best_valid_loss = float("inf")

for epoch in range(num_epochs):
    train_loss, train_dice = train(model, train_loader, optimizer, criterion, dice_loss, device)
    valid_loss, valid_dice = validate(model, valid_loader, criterion, dice_loss, device)

    print(f"Epoch {epoch+1}/{num_epochs} - Train Loss: {train_loss:.4f} - Train Dice: {train_dice:.4f} - Valid Loss: {valid_loss:.4f} - Valid Dice: {valid_dice:.4f}")

    # Save Best Model
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), "best_unet.pth")
        print("Model Saved!")


100%|██████████| 154/154 [02:59<00:00,  1.17s/it]
100%|██████████| 20/20 [00:13<00:00,  1.52it/s]


Epoch 1/20 - Train Loss: 1.7322 - Train Dice: 0.0931 - Valid Loss: 1.6815 - Valid Dice: 0.1192
Model Saved!


100%|██████████| 154/154 [02:25<00:00,  1.06it/s]
100%|██████████| 20/20 [00:07<00:00,  2.67it/s]


Epoch 2/20 - Train Loss: 1.6434 - Train Dice: 0.1358 - Valid Loss: 1.6134 - Valid Dice: 0.1547
Model Saved!


100%|██████████| 154/154 [02:29<00:00,  1.03it/s]
100%|██████████| 20/20 [00:07<00:00,  2.65it/s]


Epoch 3/20 - Train Loss: 1.5681 - Train Dice: 0.1850 - Valid Loss: 1.5390 - Valid Dice: 0.2094
Model Saved!


100%|██████████| 154/154 [02:32<00:00,  1.01it/s]
100%|██████████| 20/20 [00:07<00:00,  2.59it/s]


Epoch 4/20 - Train Loss: 1.4870 - Train Dice: 0.2461 - Valid Loss: 1.4411 - Valid Dice: 0.2829
Model Saved!


100%|██████████| 154/154 [02:31<00:00,  1.02it/s]
100%|██████████| 20/20 [00:07<00:00,  2.53it/s]


Epoch 5/20 - Train Loss: 1.3915 - Train Dice: 0.3256 - Valid Loss: 1.3440 - Valid Dice: 0.3657
Model Saved!


100%|██████████| 154/154 [02:32<00:00,  1.01it/s]
100%|██████████| 20/20 [00:07<00:00,  2.63it/s]


Epoch 6/20 - Train Loss: 1.2859 - Train Dice: 0.4208 - Valid Loss: 1.2491 - Valid Dice: 0.4545
Model Saved!


100%|██████████| 154/154 [02:36<00:00,  1.02s/it]
100%|██████████| 20/20 [00:08<00:00,  2.31it/s]


Epoch 7/20 - Train Loss: 1.1803 - Train Dice: 0.5195 - Valid Loss: 1.1329 - Valid Dice: 0.5646
Model Saved!


100%|██████████| 154/154 [02:38<00:00,  1.03s/it]
100%|██████████| 20/20 [00:07<00:00,  2.62it/s]


Epoch 8/20 - Train Loss: 1.1002 - Train Dice: 0.5959 - Valid Loss: 1.0570 - Valid Dice: 0.6379
Model Saved!


100%|██████████| 154/154 [02:33<00:00,  1.00it/s]
100%|██████████| 20/20 [00:08<00:00,  2.40it/s]


Epoch 9/20 - Train Loss: 1.0352 - Train Dice: 0.6586 - Valid Loss: 1.0164 - Valid Dice: 0.6761
Model Saved!


100%|██████████| 154/154 [02:35<00:00,  1.01s/it]
100%|██████████| 20/20 [00:07<00:00,  2.55it/s]


Epoch 10/20 - Train Loss: 0.9895 - Train Dice: 0.7030 - Valid Loss: 1.0017 - Valid Dice: 0.6902
Model Saved!


100%|██████████| 154/154 [02:32<00:00,  1.01it/s]
100%|██████████| 20/20 [00:07<00:00,  2.59it/s]


Epoch 11/20 - Train Loss: 0.9503 - Train Dice: 0.7411 - Valid Loss: 0.9879 - Valid Dice: 0.7033
Model Saved!


100%|██████████| 154/154 [02:31<00:00,  1.01it/s]
100%|██████████| 20/20 [00:07<00:00,  2.63it/s]


Epoch 12/20 - Train Loss: 0.9134 - Train Dice: 0.7773 - Valid Loss: 0.9365 - Valid Dice: 0.7544
Model Saved!


100%|██████████| 154/154 [02:33<00:00,  1.00it/s]
100%|██████████| 20/20 [00:10<00:00,  1.95it/s]


Epoch 13/20 - Train Loss: 0.8967 - Train Dice: 0.7936 - Valid Loss: 0.9362 - Valid Dice: 0.7542
Model Saved!


100%|██████████| 154/154 [02:32<00:00,  1.01it/s]
100%|██████████| 20/20 [00:07<00:00,  2.60it/s]


Epoch 14/20 - Train Loss: 0.8837 - Train Dice: 0.8064 - Valid Loss: 0.9091 - Valid Dice: 0.7810
Model Saved!


100%|██████████| 154/154 [02:31<00:00,  1.02it/s]
100%|██████████| 20/20 [00:07<00:00,  2.61it/s]


Epoch 15/20 - Train Loss: 0.8609 - Train Dice: 0.8288 - Valid Loss: 0.8947 - Valid Dice: 0.7953
Model Saved!


100%|██████████| 154/154 [02:31<00:00,  1.02it/s]
100%|██████████| 20/20 [00:07<00:00,  2.63it/s]


Epoch 16/20 - Train Loss: 0.8515 - Train Dice: 0.8380 - Valid Loss: 0.8911 - Valid Dice: 0.7986
Model Saved!


100%|██████████| 154/154 [02:31<00:00,  1.02it/s]
100%|██████████| 20/20 [00:07<00:00,  2.61it/s]


Epoch 17/20 - Train Loss: 0.8426 - Train Dice: 0.8467 - Valid Loss: 0.9259 - Valid Dice: 0.7639


100%|██████████| 154/154 [02:34<00:00,  1.01s/it]
100%|██████████| 20/20 [00:07<00:00,  2.61it/s]


Epoch 18/20 - Train Loss: 0.8358 - Train Dice: 0.8534 - Valid Loss: 0.9113 - Valid Dice: 0.7784


100%|██████████| 154/154 [02:32<00:00,  1.01it/s]
100%|██████████| 20/20 [00:07<00:00,  2.52it/s]


Epoch 19/20 - Train Loss: 0.8257 - Train Dice: 0.8632 - Valid Loss: 0.8684 - Valid Dice: 0.8210
Model Saved!


100%|██████████| 154/154 [02:36<00:00,  1.02s/it]
100%|██████████| 20/20 [00:07<00:00,  2.59it/s]

Epoch 20/20 - Train Loss: 0.8203 - Train Dice: 0.8686 - Valid Loss: 0.9151 - Valid Dice: 0.7747





In [9]:
# Load Best Model
model.load_state_dict(torch.load("best_unet.pth"))

# Evaluate on Test Data
test_loss, test_dice = validate(model, test_loader, criterion, dice_loss, device)
print(f"Test Loss: {test_loss:.4f} - Test Dice Score: {test_dice:.4f}")

  model.load_state_dict(torch.load("best_unet.pth"))
100%|██████████| 20/20 [00:14<00:00,  1.42it/s]

Test Loss: 0.9056 - Test Dice Score: 0.7844



