In [1]:
# https://www.kaggle.com/datasets/tombackert/brain-tumor-mri-data?select=brain-tumor-mri-dataset

import torch
from torch.utils.data import Dataset, DataLoader, random_split
from torch import nn
import os
import pathlib
# from skimage.io import imread
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt
import numpy as np

In [2]:
PATH_TO_DATA = "brain-tumor-mri-dataset"

In [3]:
class BrainTumorDataset(Dataset):
    def __init__(self, path_to_data, transform=None):
        self.paths = []
        for root, dirs, files in os.walk(PATH_TO_DATA):
            for file in files:
                file_path = os.path.join(root, file)
                self.paths.append(file_path)
        self.transform = transform
        self.label_to_int_map = {'gl': 0, 'me': 1, 'no': 2, 'pi': 3}

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        img_path = self.paths[idx]
        image = Image.open(img_path).convert('L')
        label = os.path.basename(self.paths[idx]).split("-")[-2]
        if self.transform:
            image = self.transform(image)
        return image, torch.tensor(self.label_to_int_map[label])

In [4]:
transform = transforms.Compose([
    transforms.Resize((256, 256)),  # Resize images
    transforms.ToTensor(),           # Convert to tensor
])

In [5]:
bt_data = BrainTumorDataset(PATH_TO_DATA, transform)

In [6]:
# Recommendation seems to be: 
# Use random_split if your dataset is balanced and you want simplicity.
# Use train_test_split if your dataset is imbalanced or if you need stratified splits.

gen = torch.Generator()
gen.manual_seed(42) 

train_size = int(0.6 * len(bt_data))
val_size = int(0.2 * len(bt_data))
test_size = len(bt_data) - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(bt_data, [train_size, val_size, test_size], generator=gen)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [7]:
class BrainTumorCNN(nn.Module):
    def __init__(self):
        super(BrainTumorCNN, self).__init__()
        self.pool = nn.MaxPool2d(2, 2)
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(16 * 61 * 61, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 4)

    def forward(self, x):
        x = self.pool(nn.functional.relu(self.conv1(x)))
        x = self.pool(nn.functional.relu(self.conv2(x)))
        x = self.flatten(x)
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [8]:
model = BrainTumorCNN()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [9]:
def save_checkpoint(model, optimizer, epoch, loss, path):
    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss,
    }, path)

def load_checkpoint(model, optimizer, path):
    checkpoint = torch.load(path)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']
    loss = checkpoint['loss']
    return model, optimizer, epoch, loss


In [10]:
# Train the model
model.train()
num_epochs = 10
best_loss = float('inf')

for epoch in range(num_epochs):
    train_loss = 0.0
    for i, data in enumerate(train_loader):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    train_loss /= len(train_loader)
        
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            val_loss += criterion(outputs, labels).item()
        val_loss /= len(val_loader)
        
        if val_loss < best_loss:
            best_loss = val_loss
            save_checkpoint(model, optimizer, epoch, best_loss, "best_model.tar")
        
    print(f'Epoch {epoch+1}, Validation Loss: {val_loss}')
    print(f'Epoch {epoch+1}, Training Loss: {train_loss}')


Epoch 1, Validation Loss: 0.520189705159929
Epoch 1, Training Loss: 0.7773304696436282
Epoch 2, Validation Loss: 0.38868822322951424
Epoch 2, Training Loss: 0.43386292347201594
Epoch 3, Validation Loss: 0.3675208863284853
Epoch 3, Training Loss: 0.29269141874931476
Epoch 4, Validation Loss: 0.34816826813750795
Epoch 4, Training Loss: 0.2004508423584479
Epoch 5, Validation Loss: 0.36137537890010407
Epoch 5, Training Loss: 0.134635661073305
Epoch 6, Validation Loss: 0.2999038034015232
Epoch 6, Training Loss: 0.111240846015237
Epoch 7, Validation Loss: 0.3745493582967255
Epoch 7, Training Loss: 0.05773407040612289
Epoch 8, Validation Loss: 0.35033918155564203
Epoch 8, Training Loss: 0.058982867906215014
Epoch 9, Validation Loss: 0.29538519328667057
Epoch 9, Training Loss: 0.025401497599927502
Epoch 10, Validation Loss: 0.36804674054599473
Epoch 10, Training Loss: 0.013252798399318838


In [11]:
# Evaluate the model
model, optimizer, epoch, loss = load_checkpoint(model, optimizer, "best_model.tar")
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total
print(f'Test Accuracy: {100 * accuracy}%')

Test Accuracy: 92.38826815642457%


In [12]:
if torch.cuda.is_available():
    print("GPU is available")
else:
    print("GPU is not available")

GPU is not available
