In [1]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("masoudnickparvar/brain-tumor-mri-dataset")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/masoudnickparvar/brain-tumor-mri-dataset?dataset_version_number=1...


100%|██████████| 149M/149M [00:00<00:00, 224MB/s] 

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/masoudnickparvar/brain-tumor-mri-dataset/versions/1


In [None]:
# utils
import os
import pandas as pd
from PIL import Image
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
# torch
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from torchvision import models

In [36]:
base_directory = '/root/.cache/kagglehub/datasets/masoudnickparvar/brain-tumor-mri-dataset/versions/1'
train, test = 'Training', 'Testing'
target_size = (224, 224)
random_state = 42
batch_size = 32
num_classes = 4
device = "cuda" if torch.cuda.is_available() else "cpu"
label_map = {
    'notumor': 0,        
    'glioma': 1,         
    'meningioma': 2,     
    'pituitary': 3       
}

In [37]:
categories = os.listdir(base_directory+'/'+train)
print(categories)

['meningioma', 'glioma', 'notumor', 'pituitary']


In [38]:
def create_dataset(path):
    my_list = []
    for category in categories:
        category_path = os.path.join(path, category)
        for file_name in os.listdir(category_path):
            file_path = os.path.join(category_path, file_name)
            # Ensure we're only adding image files
            if os.path.isfile(file_path) and file_name.lower().endswith(('.png', '.jpg', '.jpeg')):
                my_list.append([file_path, category])
    return pd.DataFrame(my_list, columns=['file_path', 'label'])

In [39]:
train_df = create_dataset(base_directory+'/'+train)
test_df = create_dataset(base_directory+'/'+test)

In [40]:
train_df['label'] = train_df['label'].map(label_map)
test_df['label'] = test_df['label'].map(label_map)

In [41]:
class ImageDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform
        
    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = self.dataframe.iloc[idx, 0]
        label = self.dataframe.iloc[idx, 1]
        img = Image.open(img_path).convert('RGB')  

        if self.transform:
            img = self.transform(img)
            
        return img, label

In [42]:
train_transform = transforms.Compose([

   transforms.Resize(target_size),
   transforms.RandomHorizontalFlip(),
   transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
   transforms.ColorJitter(brightness=(0.8, 1.2)),
   transforms.RandomRotation(10),
   transforms.ToTensor(),
   transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [43]:
test_df_split, val_df_split = train_test_split(test_df, test_size=0.5, random_state=random_state)
# Reset indices for consistency
test_df_split = test_df_split.reset_index(drop=True)
val_df_split = val_df_split.reset_index(drop=True)

In [44]:
train_dataset = ImageDataset(train_df, transform=train_transform)
val_dataset = ImageDataset(val_df_split, transform=test_transform)
test_dataset = ImageDataset(test_df_split, transform=test_transform)

# DataLoader
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [45]:
# Load pretrained ResNet-18 model
model = models.resnet18(pretrained=True)



In [46]:
# Freeze all layers except the final fully connected layer
for param in model.parameters():
    param.requires_grad = False

In [47]:
num_ftrs = model.fc.in_features
model.fc = nn.Sequential(
    nn.Linear(num_ftrs, 512),  
    nn.ReLU(),                
    nn.Dropout(0.5),           
    nn.Linear(512, 4)          
)

In [48]:
# Move model to device
model = model.to(device)

In [49]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, name):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

        train_loss = running_loss / len(train_loader)
        train_acc = 100 * correct_train / total_train

        # Validation phase
        model.eval()
        val_loss = 0.0
        correct_val = 0
        total_val = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total_val += labels.size(0)
                correct_val += (predicted == labels).sum().item()

        val_loss = val_loss / len(val_loader)
        val_acc = 100 * correct_val / total_val

        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)

        print(f"Epoch [{epoch+1}/{num_epochs}]")
        print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.2f}%")
        print(f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_acc:.2f}%")
        print("#" * 80)

    torch.save(model.state_dict(), f'best_brain_tumor_{name}.pth')

    return history

In [57]:
def test_model(model, test_loader):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()
    
    correct = 0
    total = 0
    
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            # Collect for later analysis
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    test_acc = 100 * correct / total

    print(f"Test Accuracy: {test_acc:.2f}%\n")
    
    # Print classification report
    print("Classification Report:\n")
    print(classification_report(all_labels, all_preds, target_names=categories))

In [51]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=1e-3)

In [52]:
print("Starting training...")
history = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=50, name='resnet18')

Starting training...
Epoch [1/50]
Train Loss: 0.6059, Train Accuracy: 76.16%
Val Loss: 0.4399, Val Accuracy: 82.62%
################################################################################
Epoch [2/50]
Train Loss: 0.4627, Train Accuracy: 81.83%
Val Loss: 0.4146, Val Accuracy: 83.99%
################################################################################
Epoch [3/50]
Train Loss: 0.4233, Train Accuracy: 84.35%
Val Loss: 0.3811, Val Accuracy: 83.54%
################################################################################
Epoch [4/50]
Train Loss: 0.4129, Train Accuracy: 84.17%
Val Loss: 0.3646, Val Accuracy: 85.82%
################################################################################
Epoch [5/50]
Train Loss: 0.3840, Train Accuracy: 85.21%
Val Loss: 0.3675, Val Accuracy: 86.13%
################################################################################
Epoch [6/50]
Train Loss: 0.3582, Train Accuracy: 86.40%
Val Loss: 0.3820, Val Accuracy: 85.06%
####

In [59]:
print("Loading model for testing...")
model.load_state_dict(torch.load('best_brain_tumor_resnet18.pth'))
test_model(model, test_loader)

Loading model for testing...
Test Accuracy: 92.21%

Classification Report:

              precision    recall  f1-score   support

  meningioma       0.97      0.99      0.98       213
      glioma       0.89      0.85      0.87       139
     notumor       0.84      0.86      0.85       152
   pituitary       0.97      0.95      0.96       151

    accuracy                           0.92       655
   macro avg       0.92      0.91      0.91       655
weighted avg       0.92      0.92      0.92       655



In [60]:
# Unfreeze layer4 and fc
for name, param in model.named_parameters():
    if 'layer4' in name or 'layer3' in name or 'fc' in name:
        param.requires_grad = True

In [61]:
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-5)

In [62]:
print("Starting training...")
history = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10, name='resnet18_finetuned')

Starting training...
Epoch [1/10]
Train Loss: 0.1997, Train Accuracy: 92.61%
Val Loss: 0.1911, Val Accuracy: 90.85%
################################################################################
Epoch [2/10]
Train Loss: 0.1568, Train Accuracy: 94.22%
Val Loss: 0.1642, Val Accuracy: 91.31%
################################################################################
Epoch [3/10]
Train Loss: 0.1419, Train Accuracy: 94.56%
Val Loss: 0.1389, Val Accuracy: 92.68%
################################################################################
Epoch [4/10]
Train Loss: 0.1173, Train Accuracy: 95.66%
Val Loss: 0.1413, Val Accuracy: 93.60%
################################################################################
Epoch [5/10]
Train Loss: 0.1130, Train Accuracy: 95.85%
Val Loss: 0.1121, Val Accuracy: 95.73%
################################################################################
Epoch [6/10]
Train Loss: 0.0946, Train Accuracy: 96.48%
Val Loss: 0.0930, Val Accuracy: 96.80%
####

In [64]:
print("Loading model for testing...")
model.load_state_dict(torch.load('best_brain_tumor_resnet18_finetuned.pth'))
test_model(model, test_loader)

Loading model for testing...
Test Accuracy: 97.10%

Classification Report:

              precision    recall  f1-score   support

  meningioma       0.99      1.00      0.99       213
      glioma       0.98      0.91      0.94       139
     notumor       0.92      0.97      0.95       152
   pituitary       0.99      0.99      0.99       151

    accuracy                           0.97       655
   macro avg       0.97      0.97      0.97       655
weighted avg       0.97      0.97      0.97       655

