In [25]:
# Imports
import os
import random
import numpy as np
import pandas as pd
import pickle
import seaborn as sns
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, random_split
from sklearn.metrics import confusion_matrix, classification_report

In [26]:
# Function to reset random seeds
def reset_random_seeds(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

In [27]:
# Custom Dataset Class
class MRIDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y.astype(int)  # Convert labels to integers
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return torch.tensor(self.X[idx], dtype=torch.float32).permute(2, 0, 1), torch.tensor(self.y[idx], dtype=torch.long)

In [28]:
class MRIModel(nn.Module):
    def __init__(self):
        super(MRIModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 100, kernel_size=3)
        self.pool1 = nn.MaxPool2d(kernel_size=2)
        self.dropout1 = nn.Dropout(0.5)
        self.conv2 = nn.Conv2d(100, 50, kernel_size=3)
        self.pool2 = nn.MaxPool2d(kernel_size=2)
        self.dropout2 = nn.Dropout(0.3)
        self.fc = nn.Linear(50 * 16 * 16, 3)  # Adjust the input size based on the output size after convolutions
        
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)  # Apply ReLU activation
        x = self.pool1(x)
        x = self.dropout1(x)
        x = self.conv2(x)
        x = F.relu(x)  # Apply ReLU activation
        x = self.pool2(x)
        x = self.dropout2(x)
        x = torch.flatten(x, 1)  # Flatten for fully connected layer
        x = self.fc(x)
        return x

In [29]:
# Load and preprocess data
path = "../data/processed/mri/"

def load_data():
    with open(f"{path}/img_train.pkl", "rb") as fh:
        data = pickle.load(fh)
    X_train_ = pd.DataFrame(data)["img_array"]

    with open(f"{path}/img_test.pkl", "rb") as fh:
        data = pickle.load(fh)
    X_test_ = pd.DataFrame(data)["img_array"]

    with open(f"{path}/img_y_train.pkl", "rb") as fh:
        data = pickle.load(fh)
    y_train = np.array(pd.DataFrame(data)["label"].values.astype(np.float32)).flatten()

    with open(f"{path}/img_y_test.pkl", "rb") as fh:
        data = pickle.load(fh)
    y_test = np.array(pd.DataFrame(data)["label"].values.astype(np.float32)).flatten()

    y_train = np.where(y_train == 2, -1, y_train)
    y_train = np.where(y_train == 1, 2, y_train)
    y_train = np.where(y_train == -1, 1, y_train)

    y_test = np.where(y_test == 2, -1, y_test)
    y_test = np.where(y_test == 1, 2, y_test)
    y_test = np.where(y_test == -1, 1, y_test)

    X_train = np.array([X for X in X_train_.values])
    X_test = np.array([X for X in X_test_.values])

    return X_train, X_test, y_train, y_test

In [None]:
# Train and evaluate the model
def train_and_evaluate(seed, model, train_loader, test_loader, device, best_acc, best_cm_path):
    reset_random_seeds(seed)
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    num_epochs = 50

    # Training loop
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        if epoch % (num_epochs/10) == 0:
            print(f"Epoch {epoch+1}/{num_epochs}, Loss: {train_loss/len(train_loader):.4f}")

    # Evaluation
    model.eval()
    y_true, y_pred = [], []
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            _, preds = torch.max(output, 1)
            y_true.extend(target.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())

    # Classification report
    cr = classification_report(y_true, y_pred, output_dict=True)
    acc = cr["accuracy"]
    print(f"Seed {seed} - Accuracy: {acc}")
    
    # Update best model accuracy and save its confusion matrix
    if acc > best_acc:
        best_acc = acc
        cm = confusion_matrix(y_true, y_pred)
        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=[0, 1, 2], yticklabels=[0, 1, 2])
        plt.xlabel('Predicted')
        plt.ylabel('True')
        plt.title(f'Confusion Matrix (Seed {seed}, Accuracy: {acc:.2f})')
        plt.savefig(best_cm_path)
        plt.close()  # Close the figure to avoid displaying in notebook
    
    return cr, best_acc

In [38]:
# Main driver code
X_train, X_test, y_train, y_test = load_data()

train_dataset = MRIDataset(X_train, y_train)
test_dataset = MRIDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MRIModel()

seeds = random.sample(range(1, 200), 5)
acc, precision, recall, f1 = [], [], [], []

best_accuracy = 0
best_cm_path = "../outputs/mri/mri_confusion_matrix.png"


for seed in seeds:
    cr, best_accuracy = train_and_evaluate(seed, model, train_loader, test_loader, device, best_accuracy, best_cm_path)
    acc.append(cr["accuracy"])
    precision.append(cr["macro avg"]["precision"])
    recall.append(cr["macro avg"]["recall"])
    f1.append(cr["macro avg"]["f1-score"])

print("Avg Accuracy:", np.mean(acc))
print("Avg Precision:", np.mean(precision))
print("Avg Recall:", np.mean(recall))
print("Avg F1:", np.mean(f1))

Epoch 1/50, Loss: 1.1496
Epoch 6/50, Loss: 1.0519
Epoch 11/50, Loss: 1.0432
Epoch 16/50, Loss: 0.9650
Epoch 21/50, Loss: 0.8559
Epoch 26/50, Loss: 0.7536
Epoch 31/50, Loss: 0.6712
Epoch 36/50, Loss: 0.6947
Epoch 41/50, Loss: 0.5192
Epoch 46/50, Loss: 0.4520
Seed 174 - Accuracy: 0.9210526315789473
Epoch 1/50, Loss: 0.4565
Epoch 6/50, Loss: 0.3574
Epoch 11/50, Loss: 0.3272
Epoch 16/50, Loss: 0.3126
Epoch 21/50, Loss: 0.2712
Epoch 26/50, Loss: 0.2291
Epoch 31/50, Loss: 0.2686
Epoch 36/50, Loss: 0.2276
Epoch 41/50, Loss: 0.2101
Epoch 46/50, Loss: 0.2066
Seed 12 - Accuracy: 0.9473684210526315
Epoch 1/50, Loss: 0.1880
Epoch 6/50, Loss: 0.2017
Epoch 11/50, Loss: 0.1381
Epoch 16/50, Loss: 0.1362
Epoch 21/50, Loss: 0.1695
Epoch 26/50, Loss: 0.1756
Epoch 31/50, Loss: 0.1492
Epoch 36/50, Loss: 0.1339
Epoch 41/50, Loss: 0.1077
Epoch 46/50, Loss: 0.1199
Seed 134 - Accuracy: 0.9473684210526315
Epoch 1/50, Loss: 0.1407
Epoch 6/50, Loss: 0.1181
Epoch 11/50, Loss: 0.1258
Epoch 16/50, Loss: 0.1347
Epoch