In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import numpy as np
import torch
import pickle
import json
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
from datetime import datetime
from sklearn import metrics
from torchvision import models
from torch.autograd import Variable
from pathlib import Path
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [None]:
INPUT_DIM = 224
MAX_PIXEL_VAL = 255
MEAN = 58.09
STDDEV = 49.73

In [None]:
class Dataset(data.Dataset):
    def __init__(self, datadirs, diagnosis, use_gpu):
        super().__init__()
        self.use_gpu = use_gpu

        label_dict = {}
        self.paths = []

        # Load metadata
        for i, line in enumerate(open('/content/drive/MyDrive/fastmri_data/kneeMRI/metadata.csv').readlines()):
            if i == 0:  # Skip the header
                continue
            line = line.strip().split(',')
            path = line[10]  # Assuming column 10 contains the paths in metadata.csv
            label = line[2]  # Assuming column 2 contains the diagnosis
            label_dict[path] = int(int(label) > diagnosis)

        # Populate dataset paths
        for dir in datadirs:
            for file in os.listdir(dir):
                full_path = os.path.join(dir, file)
                self.paths.append(full_path)

        # Ensure paths are aligned with metadata
        self.labels = []
        for path in self.paths:
            # Extract just the file name from the path
            file_name = os.path.basename(path)
            if file_name in label_dict:
                self.labels.append(label_dict[file_name])
            else:
                raise KeyError(f"File {file_name} not found in metadata!")

        # Set weights for weighted loss
        neg_weight = np.mean(self.labels)
        self.weights = [neg_weight, 1 - neg_weight]

    def weighted_loss(self, prediction, target):
        weights_npy = np.array([self.weights[int(t[0])] for t in target.data])
        weights_tensor = torch.FloatTensor(weights_npy)
        if self.use_gpu:
            weights_tensor = weights_tensor.cuda()
        loss = F.binary_cross_entropy_with_logits(prediction, target, weight=Variable(weights_tensor))
        return loss

    def __getitem__(self, index):
        path = self.paths[index]
        with open(path, 'rb') as file_handler:
            vol = pickle.load(file_handler).astype(np.int32)

        # Crop, standardize, normalize, and convert to RGB
        pad = int((vol.shape[2] - INPUT_DIM) / 2)
        vol = vol[:, pad:-pad, pad:-pad]
        vol = (vol - np.min(vol)) / (np.max(vol) - np.min(vol)) * MAX_PIXEL_VAL
        vol = (vol - MEAN) / STDDEV
        vol = np.stack((vol,) * 3, axis=1)

        vol_tensor = torch.FloatTensor(vol)
        label_tensor = torch.FloatTensor([self.labels[index]])
        return vol_tensor, label_tensor

    def __len__(self):
        return len(self.paths)

In [None]:
def load_data(diagnosis, use_gpu=False):
    base_path = '/content/drive/MyDrive/fastmri_data/kneeMRI'
    train_dirs = [os.path.join(base_path, dir_name) for dir_name in ['vol08','vol04','vol03','vol09','vol06','vol07']]
    valid_dirs = [os.path.join(base_path, dir_name) for dir_name in ['vol10','vol05']]
    test_dirs = [os.path.join(base_path, dir_name) for dir_name in ['vol01','vol02']]

    train_dataset = Dataset(train_dirs, diagnosis, use_gpu)
    valid_dataset = Dataset(valid_dirs, diagnosis, use_gpu)
    test_dataset = Dataset(test_dirs, diagnosis, use_gpu)

    train_loader = data.DataLoader(train_dataset, batch_size=1, num_workers=8, shuffle=True)
    valid_loader = data.DataLoader(valid_dataset, batch_size=1, num_workers=8, shuffle=False)
    test_loader = data.DataLoader(test_dataset, batch_size=1, num_workers=8, shuffle=False)

    return train_loader, valid_loader, test_loader

In [None]:
class MRNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = models.alexnet(pretrained=True)
        self.gap = nn.AdaptiveAvgPool2d(1)
        self.classifier = nn.Linear(256, 1)

    def forward(self, x):
        x = torch.squeeze(x, dim=0)
        x = self.model.features(x)
        x = self.gap(x).view(x.size(0), -1)
        x = torch.max(x, 0, keepdim=True)[0]
        x = self.classifier(x)
        return x

In [None]:
def run_model(model, loader, train=False, optimizer=None):
    preds, labels = [], []
    model.train() if train else model.eval()
    total_loss, num_batches = 0.0, 0

    for batch in loader:
        if train:
            optimizer.zero_grad()

        vol, label = batch
        if loader.dataset.use_gpu:
            vol, label = vol.cuda(), label.cuda()
        vol, label = Variable(vol), Variable(label)

        logit = model.forward(vol)
        loss = loader.dataset.weighted_loss(logit, label)
        total_loss += loss.item()

        pred = torch.sigmoid(logit)
        preds.append(pred.data.cpu().numpy()[0][0])
        labels.append(label.data.cpu().numpy()[0][0])

        if train:
            loss.backward()
            optimizer.step()

        num_batches += 1

    avg_loss = total_loss / num_batches
    fpr, tpr, _ = metrics.roc_curve(labels, preds)
    auc = metrics.auc(fpr, tpr)
    return avg_loss, auc, preds, labels

def evaluate(split, model_path, diagnosis, use_gpu):
    train_loader, valid_loader, test_loader = load_data(diagnosis, use_gpu)
    model = MRNet()
    state_dict = torch.load(model_path, map_location='cuda' if use_gpu else 'cpu')
    model.load_state_dict(state_dict)
    model = model.cuda() if use_gpu else model

    loader = {'train': train_loader, 'valid': valid_loader, 'test': test_loader}[split]
    loss, auc, _, _ = run_model(model, loader)
    print(f"{split} Loss: {loss:.4f}")
    print(f"{split} AUC: {auc:.4f}")

def train(diagnosis, epochs, learning_rate, use_gpu, rundir):
    train_loader, valid_loader, _ = load_data(diagnosis, use_gpu)
    model = MRNet().cuda() if use_gpu else MRNet()
    optimizer = torch.optim.Adam(model.parameters(), learning_rate, weight_decay=0.01)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, factor=0.3, threshold=1e-4)

    best_val_loss = float('inf')

    for epoch in range(epochs):
        train_loss, train_auc, _, _ = run_model(model, train_loader, train=True, optimizer=optimizer)
        val_loss, val_auc, _, _ = run_model(model, valid_loader)

        print(f"Epoch {epoch+1}: Train Loss: {train_loss:.4f}, Train AUC: {train_auc:.4f}")
        print(f"Validation Loss: {val_loss:.4f}, Validation AUC: {val_auc:.4f}")

        scheduler.step(val_loss)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            file_name = f"best_val_loss_{val_loss:.4f}_epoch_{epoch+1}.pth"
            save_path = Path(rundir) / file_name
            torch.save(model.state_dict(), save_path)
            print(f"Best model saved at epoch {epoch+1} with validation loss {val_loss:.4f}")

    final_model_path = Path(rundir) / 'injury_detection_model.pth'
    torch.save(model.state_dict(), final_model_path)
    print(f"Final model saved at {final_model_path}")

In [None]:
train(diagnosis=0, epochs=10, learning_rate=1e-5, use_gpu=torch.cuda.is_available(), rundir='/content/drive/MyDrive/fastmri_data/kneeMRI')

Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth
100%|██████████| 233M/233M [00:02<00:00, 88.8MB/s]


Epoch 1: Train Loss: 0.2640, Train AUC: 0.5265
Validation Loss: 0.2280, Validation AUC: 0.6671
Best model saved at epoch 1 with validation loss 0.2280




Epoch 2: Train Loss: 0.2420, Train AUC: 0.6884
Validation Loss: 0.2146, Validation AUC: 0.7291
Best model saved at epoch 2 with validation loss 0.2146




Epoch 3: Train Loss: 0.2249, Train AUC: 0.7754
Validation Loss: 0.2037, Validation AUC: 0.7560
Best model saved at epoch 3 with validation loss 0.2037




Epoch 4: Train Loss: 0.2076, Train AUC: 0.8197
Validation Loss: 0.1972, Validation AUC: 0.7663
Best model saved at epoch 4 with validation loss 0.1972




Epoch 5: Train Loss: 0.1833, Train AUC: 0.8728
Validation Loss: 0.1902, Validation AUC: 0.7838
Best model saved at epoch 5 with validation loss 0.1902




Epoch 6: Train Loss: 0.1661, Train AUC: 0.9027
Validation Loss: 0.1730, Validation AUC: 0.8327
Best model saved at epoch 6 with validation loss 0.1730




Epoch 7: Train Loss: 0.1510, Train AUC: 0.9258
Validation Loss: 0.1790, Validation AUC: 0.8458




Epoch 8: Train Loss: 0.1326, Train AUC: 0.9508
Validation Loss: 0.1620, Validation AUC: 0.8601
Best model saved at epoch 8 with validation loss 0.1620




Epoch 9: Train Loss: 0.1204, Train AUC: 0.9579
Validation Loss: 0.1559, Validation AUC: 0.8675
Best model saved at epoch 9 with validation loss 0.1559




Epoch 10: Train Loss: 0.0998, Train AUC: 0.9791
Validation Loss: 0.1514, Validation AUC: 0.8868
Best model saved at epoch 10 with validation loss 0.1514
Final model saved at /content/drive/MyDrive/fastmri_data/kneeMRI/injury_detection_model.pth
