In [None]:
import glob
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import pytorch_lightning as pl

In [None]:
# Input data
subject_ids = np.loadtxt("selected_samples_subset.txt", dtype=str)
left_hippo_dir = "LeftCSV_subset/"
left_hippo_files = glob.glob(left_hippo_dir+"*")
right_hippo_dir = "RightCSV_subset/"
right_hippo_files = glob.glob(right_hippo_dir+"*")
labels = pd.read_csv("adni_subset.csv",names=['ID','AD']) 

In [None]:
transform = transforms.Compose([
    transforms.Normalize(mean=[0.5], std=[0.5])
])

In [None]:
# Import Dataset
class HippocampusDataset(Dataset):
    def __init__(self, left_hippo_files, right_hippo_files, labels=None):
        self.left_hippo = [pd.read_csv(f, header=None, sep = " ").values for f in left_hippo_files]
        self.right_hippo = [pd.read_csv(f, header=None, sep = " ").values for f in right_hippo_files]
        self.labels = labels['AD']

    def __len__(self):
        return len(self.left_hippo)

    def __getitem__(self, idx):
        left = self.left_hippo[idx]
        right = self.right_hippo[idx]
        left = left.astype(np.float32)
        right = right.astype(np.float32)
        ##########################################################
        # TODO: Perform the following data preprocessing steps:
        # 1. Concatenate the left and right hippocampus data along the feature dimension to create a sample of shape (15000, 14).
        # 2. Transpose the sample to have a shape of (14, 15000).
        # 3. Convert the sample into a PyTorch tensor with dtype torch.float32.
        # 4. Normalize each feature channel by subtracting its mean and dividing by its standard deviation.
        ##############################################################
        # Replace "pass" statement with your code

        # Step 1: Concatenate left and right data
        sample = #pass

        # Step 2: Transpose the sample
        sample = #pass

        # Step 3: Convert to PyTorch tensor
        sample = #pass

        # Step 4: Normalize each feature channel
        mean = #pass
        std = #pass
        sample = #pass

        #############################################################
        # END OF YOUR CODE
        ##########################################################


        if self.labels is not None:
            label = self.labels.iloc[idx]
            return sample, label
        else:
            return sample

In [None]:
# Updated Model Class using 1D Convolutions
class HippoCNN(pl.LightningModule):
    def __init__(self):
        super(HippoCNN, self).__init__()
        ##########################################################
        # TODO: Initialize the following layers:
        # 1. Three 1D convolutional layers
        # 2. A MaxPool1d layer self.pool 
        # 3. A Dropout layer self.dropout with dropout probability 0.5
        # 4. Two fully connected layers:
        # Note: Ensure that you compute the correct input size for self.fc1 based on the output length after convolution and pooling layers.
        ##############################################################
        # Replace "pass" statement with your code

        self.conv1 = #pass
        self.conv2 = #pass
        self.conv3 = #pass
        self.pool = #pass
        self.dropout = #pass
        self.fc1 = #pass
        self.fc2 = #pass

        #############################################################
        # END OF YOUR CODE
        ##########################################################

        self.criterion = nn.CrossEntropyLoss()
        
    ##########################################################
    # TODO: Implement helper function `calculate_output_length` to compute the output length after a series of convolutional and pooling layers.
    # Steps:
    # 1. Initialize a variable `length` with the initial input length (e.g., 15000).
    # 2. Define a list `layers` containing tuples for each layer with the format:
    #    ('layer_type', kernel_size, stride)
    #    where `layer_type` is either 'conv' or 'pool'.
    # 3. For each layer in `layers`, update `length` using the output size formula:
    #    length = floor(((length - (kernel_size - 1) - 1) / stride) + 1)
    #    Note: Assume padding=0 and dilation=1.
    # 4. Return the final computed `length`.
    ##############################################################
    # Replace "pass" statement with your code

    def calculate_output_length(self):
        #pass
        return length

    #############################################################
    # END OF YOUR CODE
    ##########################################################

    
    def forward(self, x):
        x = F.relu(self.conv1(x))  
        x = self.pool(x)           
        x = F.relu(self.conv2(x))  
        x = self.pool(x)          
        x = F.relu(self.conv3(x)) 
        x = self.pool(x)           

        x = x.view(x.size(0), -1)  # Flatten
        x = self.dropout(x)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x.squeeze()

    def training_step(self, batch, batch_idx):
        data, labels = batch  
        outputs = self(data) 
        loss = self.criterion(outputs, labels)
        preds = torch.argmax(outputs, dim=1)
        acc = (preds == labels).float().mean()
        self.log('train_loss', loss)
        self.log('train_acc', acc)
        return loss


    def validation_step(self, batch, batch_idx):
        data, labels = batch
        outputs = self(data)
        loss = self.criterion(outputs, labels)
        preds = torch.argmax(outputs, dim=1)
        acc = (preds == labels).float().mean()
        self.log('val_loss', loss)
        self.log('val_acc', acc)
        

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer

In [None]:
data_df = pd.DataFrame({
    'ID': subject_ids,
    'LeftFile': left_hippo_files,
    'RightFile': right_hippo_files
})

# Merge data and labels on SubjectID
merged_df = pd.merge(data_df, labels, on='ID', how='inner')  # Keep only subjects with labels

# Now extract the filtered lists
filtered_left_files = merged_df['LeftFile'].tolist()
filtered_right_files = merged_df['RightFile'].tolist()

unique_labels = sorted(set(labels['AD']))
label_to_index = {label: idx for idx, label in enumerate(unique_labels)}
labels['AD'] = [label_to_index[label] for label in labels['AD']]

# Split into training and validation sets
train_left, test_left, train_right, test_right, train_labels, test_labels = train_test_split(
    filtered_left_files, filtered_right_files, labels, test_size=0.2, random_state=42
)
train_left, val_left, train_right, val_right, train_labels, val_labels = train_test_split(
    train_left, train_right, train_labels, test_size=0.2, random_state=42
)

In [None]:
# Create Datasets and DataLoaders
train_dataset = HippocampusDataset(train_left, train_right, train_labels)
val_dataset = HippocampusDataset(val_left, val_right, val_labels)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8)

# Initialize model
model = HippoCNN()

In [None]:
val_dataset = HippocampusDataset(val_left, val_right, val_labels, transform)

In [None]:
val_left[0]

In [None]:
# Trainer
trainer = pl.Trainer(max_epochs=50)

# Training
trainer.fit(model, train_loader, val_loader)

In [None]:
test_dataset = HippocampusDataset(test_left, test_right, test_labels)
test_loader = DataLoader(test_dataset, batch_size=32)

In [None]:
# Inference on Test Set
model.eval()
test_preds = []
with torch.no_grad():
    for batch in test_loader:
        data, _ = batch  # Unpack the batch; labels are ignored
        outputs = model(data)
        predictions = torch.argmax(outputs, dim=1)
        test_preds.extend(predictions.cpu().numpy())


In [None]:
# Compute Metrics

labels_list = test_labels['AD']

##########################################################
# TODO: Import the necessary evaluation metrics from sklearn.metrics:
# - accuracy_score
# - precision_score
# - recall_score
# - f1_score
# - roc_auc_score
#
# Then, compute the following metrics using `labels_list` and `test_preds`:
# 1. Accuracy
# 2. Precision (use 'weighted' average)
# 3. Recall (use 'weighted' average)
# 4. F1-Score (use 'weighted' average)
# 5. ROC-AUC (use 'weighted' average and 'ovr' for multi_class)
#
# Finally, print out each metric with four decimal places in the specified format.
##############################################################
# Replace "pass" statement with your code

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

accuracy = #pass
precision = #pass
recall = #pass
f1 = #pass
roc_auc = #pass

print(f'Test Accuracy: {accuracy:.4f}')
print(f'Test Precision: {precision:.4f}')
print(f'Test Recall: {recall:.4f}')
print(f'Test F1-Score: {f1:.4f}')
print(f'Validation ROC-AUC: {roc_auc:.4f}')

#############################################################
# END OF YOUR CODE
##########################################################
