In [1]:
# Imports
import os
import sys
import glob
import torch
import torchvision

import numpy as np
import datetime as dt
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt

from PIL import Image
from collections import Counter  # Add this import
from torch.utils.data import Dataset
from torch.autograd import Variable
from torch.optim import lr_scheduler

from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision import transforms, datasets, models
from os import listdir, makedirs, getcwd, remove
from os.path import isfile, join, abspath, exists, isdir, expanduser




In [2]:
# Set the device for training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
# pip install numpy==1.23.5


In [4]:
from torchvision.datasets import ImageFolder


# Modify the data transformation for grayscale images
transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.Grayscale(num_output_channels=3),  
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485], std=[0.229])  # Use single value for mean and std
])

train_dataset = ImageFolder('./data/original/Training', transform=transforms)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

val_dataset = ImageFolder('./data/original/Testing', transform=transforms)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)


In [5]:
# Compute class weights
def compute_class_weights(labels):
    label_counts = Counter(labels)
    class_weights = [1.0 / label_counts[label] for label in labels]
    return torch.tensor(class_weights, dtype=torch.float)

class Tumor(Dataset):
    def __init__(self, path, transform=None):
        self.classes = os.listdir(path)
        self.path = [f"{path}/{className}" for className in self.classes]
        self.file_list = [glob.glob(f"{x}/*") for x in self.path]
        self.transform = transform

        files = []
        for i, className in enumerate(self.classes):
            for fileName in self.file_list[i]:
                files.append([i, fileName])
        self.file_list = files

        # Compute class weights
        labels = [item[0] for item in self.file_list]
        self.class_weights = compute_class_weights(labels)

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        fileName = self.file_list[idx][1]
        classCategory = self.file_list[idx][0]
        im = Image.open(fileName)

        if self.transform:
            im = self.transform(im)

        return im, classCategory

In [6]:
data_path = "/kaggle/input/brain-tumor-mri-dataset"
train_path = join(data_path, "Training")
test_path = join(data_path,"Testing")

In [7]:
train_data = Tumor(train_path, transform=transforms)
test_data = Tumor(test_path, transform=transforms)

FileNotFoundError: [Errno 2] No such file or directory: '/kaggle/input/brain-tumor-mri-dataset/Training'

In [None]:
# step 3
# Load a batch of images and labels for visualization
data_iter = iter(train_loader)
images, labels = next(data_iter)

# Convert images to numpy arrays and denormalize
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
images = (images.numpy().transpose((0, 2, 3, 1)) * std + mean).clip(0, 1)

# Create a grid of images
num_images = len(images)
rows = int(np.ceil(num_images / 4))
fig, axes = plt.subplots(rows, 4, figsize=(15, 15))

# Plot images with labels
for i, ax in enumerate(axes.flat):
    if i < num_images:
        ax.imshow(images[i])
        ax.set_title(f'Label: {train_dataset.classes[labels[i]]}')
    ax.axis('off')

plt.tight_layout()
plt.show()

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split

validation_split = 0.2
random_seed = 42
shuffle_dataset = True  # Define shuffle_dataset variable here

# Creating data indices for training and validation splits
dataset_size = len(train_data)
indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))

if shuffle_dataset:
    np.random.seed(random_seed)
    np.random.shuffle(indices)

train_indices, val_indices = indices[split:], indices[:split]

In [None]:
batch_size=16

In [None]:
from torch.utils.data import SubsetRandomSampler, DataLoader

train_sampler = SubsetRandomSampler(train_indices)
val_sampler = SubsetRandomSampler(val_indices)

train_loader = DataLoader(train_data, batch_size=batch_size, sampler=train_sampler)
val_loader = DataLoader(train_data, batch_size=batch_size, sampler=val_sampler)
test_loader = DataLoader(test_data, batch_size=batch_size)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.models import resnet50
# Step 4: Define the ResNet-50 based classifier
class TumorClassifier(nn.Module):
    def __init__(self, num_classes):
        super(TumorClassifier, self).__init__()
        self.resnet50 = resnet50(pretrained=True)
        self.resnet50.fc = nn.Linear(self.resnet50.fc.in_features, num_classes)
        
    def forward(self, x):
        return self.resnet50(x)


In [None]:
!pip install efficientnet-pytorch


In [None]:
from efficientnet_pytorch import EfficientNet

# Step 4: Define the EfficientNet-based classifier
class TumorClassifier(nn.Module):
    def __init__(self, num_classes):
        super(TumorClassifier, self).__init__()
        self.efficientnet = EfficientNet.from_pretrained('efficientnet-b0')  # You can choose different variants
        num_ftrs = self.efficientnet._fc.in_features
        self.efficientnet._fc = nn.Linear(num_ftrs, num_classes)
        
    def forward(self, x):
        return self.efficientnet(x)

In [None]:
from torchvision.models import densenet121

# Step 4: Define the DenseNet-based classifier
class TumorClassifier(nn.Module):
    def __init__(self, num_classes):
        super(TumorClassifier, self).__init__()
        self.densenet = densenet121(pretrained=True)
        num_ftrs = self.densenet.classifier.in_features
        self.densenet.classifier = nn.Linear(num_ftrs, num_classes)
        
    def forward(self, x):
        return self.densenet(x)

In [None]:
import torch.optim as optim
# Create the model, move it to the device, define loss function and optimizer
model = TumorClassifier(num_classes=4)
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0002)


In [None]:
import numpy as np

def train(model, criterion, train_loader, val_loader, optimizer, num_epochs, patience):
    """Train a model with early stopping."""
   

    # Exponential moving average of the loss.
    ema_loss = None

    # Variables for early stopping
    best_loss = np.inf
    epochs_without_improvement = 0

    print('----- Training Loop -----')

    # Loop over epochs.
    for epoch in range(num_epochs):
        # Training phase
        model.train()

        # Loop over training data.
        for batch_idx, (features, target) in enumerate(train_loader):
            # Move data to the device.
            features = features.to(device)
            target = target.to(device)

            # Forward pass.
            output = model(features)
            loss = criterion(output, target)

            # Backward pass.
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Update exponential moving average of the loss.
            if ema_loss is None:
                ema_loss = loss.item()
            else:
                ema_loss += (loss.item() - ema_loss) * 0.01

        # Print training progress at the end of the epoch.
        print('Epoch: {} \tTraining Loss: {:.3f}'.format(epoch, ema_loss))

        # Validation phase
        model.eval()
        val_loss = 0.0

        with torch.no_grad():
            for val_data in val_loader:
                val_inputs, val_labels = val_data
                val_inputs = val_inputs.to(device)
                val_labels = val_labels.to(device)

                val_outputs = model(val_inputs)
                loss = criterion(val_outputs, val_labels)
                val_loss += loss.item()

        val_loss /= len(val_loader)

        # Check if validation loss has improved
        if val_loss < best_loss:
            best_loss = val_loss
            epochs_without_improvement = 0
        else:
            epochs_without_improvement += 1

        


In [None]:
# Train the model
num_epochs = 20
patience = 3
train(model, criterion, train_loader, val_loader, optimizer, num_epochs, patience)


In [None]:
def test(model, data_loader):
    """Measures the accuracy of a model on a data set."""
    # Make sure the model is in evaluation mode.
    model.eval()
    correct = 0
    total = 0

    print('----- Model Evaluation -----')
    # We do not need to maintain intermediate activations while testing.
    with torch.no_grad():
        # Loop over test data.
        for features, target in data_loader:
            # Move data to the device.
            features = features.to(device)
            target = target.to(device)

            # Forward pass.
            output = model(features)

            # Get the label corresponding to the highest predicted probability.
            _, predicted = torch.max(output.data, 1)

            # Count number of correct predictions.
            total += target.size(0)
            correct += (predicted == target).sum().item()

    # Calculate test accuracy.
    accuracy = 100 * correct / total
    print(f'Test accuracy: {correct} / {total} ({accuracy:.2f}%)')

    return accuracy


In [None]:
test(model, val_loader)

In [None]:
# Set the model to evaluation mode
model.eval()

# Make predictions on the test images
predictions = []
test_image_fileNames = []

# Loop over test data.
for image, label in test_loader:
    # Move data to the device.
    image = image.to(device)
    
    # Perform the prediction on the batch of images
    with torch.no_grad():
        output = model(image)
        _, predicted = torch.max(output.data, 1)
        
    # Convert the predicted labels to class names
    predicted_classes = [train_dataset.classes[p.item()] for p in predicted]
    
    # Append the predicted classes and image names to the lists
    predictions.extend(predicted_classes)
    test_image_fileNames.extend(label)

# Print the predictions for each image
for image_name, prediction in zip(test_image_fileNames, predictions):
    print(f"Image: {image_name}, Predicted Class: {prediction}")