In [None]:
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
from torchsummary import summary
from pprint import pprint
import json
import random
from sklearn.metrics import f1_score
from tqdm import trange

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!unzip '/content/drive/MyDrive/School/2025 Spring/Advanced ML/AML Project/Data/new/preprocessed_selected_features.zip'

Archive:  /content/drive/MyDrive/School/2025 Spring/Advanced ML/AML Project/Data/new/preprocessed_selected_features.zip
   creating: preprocessed_selected_features/
   creating: preprocessed_selected_features/test/
   creating: preprocessed_selected_features/train/
  inflating: preprocessed_selected_features/test/connectome_matrices.csv  
  inflating: preprocessed_selected_features/test/aux.csv  
  inflating: preprocessed_selected_features/train/labels.csv  
  inflating: preprocessed_selected_features/train/connectome_matrices.csv  
  inflating: preprocessed_selected_features/train/aux.csv  


In [4]:
def compute_leaderboard_f1_multiclass(y_true, y_pred):
    """
    Multiclass version of compute_leaderboard_f1_binary.
    Assumes class encoding:
        0 -> [ADHD=0, Sex_F=0]
        1 -> [ADHD=0, Sex_F=1]
        2 -> [ADHD=1, Sex_F=0]
        3 -> [ADHD=1, Sex_F=1]

    Returns:
    - average of two F1 scores:
        (1) ADHD F1 with extra weight on ADHD=1 & Sex_F=1
        (2) Sex_F F1 (unweighted)
    """
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    # Decode back to binary labels
    true_adhd = (y_true // 2)       # 2 or 3 → 1
    true_sex_f = (y_true % 2)       # 1 or 3 → 1
    pred_adhd = (y_pred // 2)
    pred_sex_f = (y_pred % 2)

    # ADHD: apply weight=2 if true_adhd=1 and true_sex_f=1
    weights = np.where((true_adhd == 1) & (true_sex_f == 1), 2, 1)
    f1_adhd = f1_score(true_adhd, pred_adhd, sample_weight=weights, average='binary')
    f1_sex_f = f1_score(true_sex_f, pred_sex_f)

    return (f1_adhd + f1_sex_f) / 2

In [5]:
y_true_temp = [0, 1, 2, 3]
y_pred_temp = [0, 1, 3, 3]
compute_leaderboard_f1_multiclass(y_true_temp, y_pred_temp)

0.9

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [7]:
TRAIN_X_PATH = './preprocessed_selected_features/train/aux.csv'
TRAIN_Y_PATH = './preprocessed_selected_features/train/labels.csv'

In [8]:
train_X_df = pd.read_csv(TRAIN_X_PATH)
train_y_df = pd.read_csv(TRAIN_Y_PATH)
train_X_df.set_index('participant_id', inplace=True)
train_y_df.set_index('participant_id', inplace=True)
train_y_df = train_y_df.reindex(train_X_df.index)

In [9]:
train_X_df.head()

Unnamed: 0_level_0,Barratt_Barratt_P1_Occ_20.0,Basic_Demos_Enroll_Year_2019,Barratt_Barratt_P1_Occ_35.0,Barratt_Barratt_P2_Occ_10.0,APQ_P_APQ_P_PM,Barratt_Barratt_P1_Edu_12.0,Barratt_Barratt_P2_Edu_21.0,Barratt_Barratt_P2_Edu_9.0,Barratt_Barratt_P1_Edu_15.0,Barratt_Barratt_P1_Edu_18.0,...,SDQ_SDQ_Prosocial,APQ_P_APQ_P_INV,Barratt_Barratt_P2_Edu_18.0,Barratt_Barratt_P1_Occ_5.0,APQ_P_APQ_P_OPD,Basic_Demos_Study_Site_3,Barratt_Barratt_P2_Edu_12.0,SDQ_SDQ_Peer_Problems,Barratt_Barratt_P2_Occ_45.0,Barratt_Barratt_P2_Occ_20.0
participant_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00aIpNTbG5uh,0,1,0,0,0.46875,0,1,0,0,0,...,0.9,0.8,0,0,0.333333,0,0,0.222222,1,0
00fV0OyyoLfw,0,0,0,0,0.71875,0,1,0,0,0,...,0.8,0.5,0,0,0.857143,0,0,0.333333,1,0
04X1eiS79T4B,0,0,0,0,0.65625,0,1,0,0,0,...,0.7,0.566667,0,0,0.52381,0,0,0.777778,1,0
05ocQutkURd6,0,0,0,0,0.46875,0,0,0,0,1,...,0.6,0.733333,1,0,0.380952,0,0,0.222222,0,0
06YUNBA9ZRLq,0,0,0,0,0.21875,1,1,0,0,0,...,0.4,0.5,0,0,0.714286,0,0,0.111111,1,0


In [10]:
train_y_df.head()

Unnamed: 0_level_0,ADHD_Outcome,Sex_F
participant_id,Unnamed: 1_level_1,Unnamed: 2_level_1
00aIpNTbG5uh,1,0
00fV0OyyoLfw,1,0
04X1eiS79T4B,0,1
05ocQutkURd6,0,1
06YUNBA9ZRLq,1,0


In [11]:
class Model(nn.Module):
    def __init__(self, input_dim, layer_dims, dropout=0.5, output_dim=4):
        super(Model, self).__init__()
        layers = []
        prev_dim = input_dim
        for dim in layer_dims:
            layers.append(nn.Linear(prev_dim, dim))
            layers.append(nn.ReLU())
            if dropout > 0:
                layers.append(nn.Dropout(dropout))
            prev_dim = dim
        layers.append(nn.Linear(prev_dim, output_dim))
        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        return self.layers(x)

In [12]:
temp_model = Model(100, [64, 32], dropout=0.3, output_dim=4).to(device)
summary(temp_model, (100,))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                   [-1, 64]           6,464
              ReLU-2                   [-1, 64]               0
           Dropout-3                   [-1, 64]               0
            Linear-4                   [-1, 32]           2,080
              ReLU-5                   [-1, 32]               0
           Dropout-6                   [-1, 32]               0
            Linear-7                    [-1, 4]             132
Total params: 8,676
Trainable params: 8,676
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.03
Estimated Total Size (MB): 0.04
----------------------------------------------------------------


In [13]:
X = np.array(train_X_df.values, dtype=np.float32)

In [14]:
X.shape

(1213, 59)

In [15]:
y_two_vars = train_y_df.values
y = np.array(y_two_vars[:, 0] * 2 + y_two_vars[:, 1], dtype=np.uint8)

In [16]:
y[:3], y_two_vars[:3]

(array([2, 2, 1], dtype=uint8),
 array([[1, 0],
        [1, 0],
        [0, 1]]))

In [17]:
layer_dims_list = [
    [64, 32, 16],
    [128, 64, 32],
    [128, 64, 32, 16],
    [256, 128, 64, 32],
    [256, 128, 64, 32, 16],
    [512, 256, 128, 64],
    [512, 256, 128, 64, 32],
    [512, 256, 128, 64, 32, 16],
]
dropouts = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5]

In [18]:
criterion = nn.CrossEntropyLoss()

In [19]:
seed = 42  # Choose any fixed number
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)  # If using CUDA

In [20]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

In [21]:
num_epochs = 200

results = {"-".join(map(str, layer_dims)): {} for layer_dims in layer_dims_list}
epoch_history = {"-".join(map(str, layer_dims)): {} for layer_dims in layer_dims_list}
for layer_dims in layer_dims_list:
    for dropout in dropouts:
        print("complexity:", layer_dims, "dropout rate:", dropout)
        f1_scores = []
        best_epochs = []

        for fold, (train_index, test_index) in enumerate(kf.split(X)):
            model = Model(
                input_dim=X.shape[1], layer_dims=layer_dims, dropout=dropout, output_dim=4
            ).to(device)
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]

            optimizer = optim.Adam(model.parameters(), lr=0.001)
            best_test_loss = float("inf")
            best_f1 = 0.0
            best_epoch = 0

            for epoch in range(num_epochs):
                model.train()
                optimizer.zero_grad()
                outputs = model(torch.tensor(X_train).to(device))
                loss = criterion(outputs, torch.tensor(y_train).to(device))
                loss.backward()
                optimizer.step()

                model.eval()
                with torch.no_grad():
                    test_outputs = model(torch.tensor(X_test).to(device))
                    test_loss = criterion(test_outputs, torch.tensor(y_test).to(device)).item()
                    predicted = torch.argmax(test_outputs.data, 1).cpu()
                    f1 = compute_leaderboard_f1_multiclass(y_test, predicted)

                if test_loss < best_test_loss:
                    best_test_loss = test_loss
                    best_f1 = f1
                    best_epoch = epoch


            f1_scores.append(float(best_f1))
            best_epochs.append(best_epoch)

        results["-".join(map(str, layer_dims))][dropout] = f1_scores
        epoch_history["-".join(map(str, layer_dims))][dropout] = best_epochs

complexity: [64, 32, 16] dropout rate: 0.0
complexity: [64, 32, 16] dropout rate: 0.1
complexity: [64, 32, 16] dropout rate: 0.2
complexity: [64, 32, 16] dropout rate: 0.3
complexity: [64, 32, 16] dropout rate: 0.4
complexity: [64, 32, 16] dropout rate: 0.5
complexity: [128, 64, 32] dropout rate: 0.0
complexity: [128, 64, 32] dropout rate: 0.1
complexity: [128, 64, 32] dropout rate: 0.2
complexity: [128, 64, 32] dropout rate: 0.3
complexity: [128, 64, 32] dropout rate: 0.4
complexity: [128, 64, 32] dropout rate: 0.5
complexity: [128, 64, 32, 16] dropout rate: 0.0
complexity: [128, 64, 32, 16] dropout rate: 0.1
complexity: [128, 64, 32, 16] dropout rate: 0.2
complexity: [128, 64, 32, 16] dropout rate: 0.3
complexity: [128, 64, 32, 16] dropout rate: 0.4
complexity: [128, 64, 32, 16] dropout rate: 0.5
complexity: [256, 128, 64, 32] dropout rate: 0.0
complexity: [256, 128, 64, 32] dropout rate: 0.1
complexity: [256, 128, 64, 32] dropout rate: 0.2
complexity: [256, 128, 64, 32] dropout rate

In [22]:
results_json = json.dumps(results, indent=4)
print(results_json)

{
    "64-32-16": {
        "0.0": [
            0.5900410494378012,
            0.4706040992448759,
            0.5151345755693582,
            0.4634476979197476,
            0.48510101010101014
        ],
        "0.1": [
            0.5831570312162687,
            0.4170731707317073,
            0.5580569587817557,
            0.4200913242009132,
            0.4467849223946785
        ],
        "0.2": [
            0.4421768707482993,
            0.4176904176904177,
            0.44467640918580376,
            0.46093514328808444,
            0.47141746566467996
        ],
        "0.3": [
            0.4416475972540046,
            0.433441148228635,
            0.44025157232704404,
            0.42417582417582417,
            0.5709750566893425
        ],
        "0.4": [
            0.4396355353075171,
            0.4673446796734468,
            0.44516129032258067,
            0.4380252642433617,
            0.43595505617977526
        ],
        "0.5": [
            0.4431818

In [23]:
full_results = {}
summary_results = {}
final_epoch_history = {}

for layer_dims in results.keys():
    for dropout in results[layer_dims].keys():
        full_results[layer_dims+'-'+str(dropout)] = results[layer_dims][dropout]
        summary_results[layer_dims+'-'+str(dropout)] = float(np.mean(results[layer_dims][dropout]))
        final_epoch_history[layer_dims+'-'+str(dropout)] = epoch_history[layer_dims][dropout]

summary_results = dict(sorted(summary_results.items(), key=lambda item: item[1], reverse=True))
keys = list(summary_results.keys())
final_epoch_history = dict(sorted(final_epoch_history.items(), key=lambda item: keys.index(item[0]), reverse=False))


with open("full_results.json", "w") as f:
    json.dump(results, f, indent=4)


with open("summary_results.json", "w") as f:
    json.dump(summary_results, f, indent=4)


with open("epoch_history.json", "w") as f:
    json.dump(final_epoch_history, f, indent=4)

In [24]:
best_layer_dims, best_dropout, n_epochs = [256, 128, 64, 32, 16], 0.4, 80

In [25]:
model = Model(
    input_dim=X.shape[1], layer_dims=best_layer_dims, dropout=best_dropout, output_dim=4
)
optimizer = optim.Adam(model.parameters(), lr=0.001)

model.train()
for epoch in trange(n_epochs):
    optimizer.zero_grad()
    outputs = model(torch.tensor(X))
    loss = criterion(outputs, torch.tensor(y))
    loss.backward()
    optimizer.step()

100%|██████████| 80/80 [00:01<00:00, 68.32it/s]


In [26]:
torch.save(model.state_dict(), "256-128-64-32-16-0.4.pth")

In [27]:
model.eval()
with torch.no_grad():
    yhat = model(torch.tensor(X))
predicted = torch.argmax(yhat, 1)
f1 = compute_leaderboard_f1_multiclass(y, predicted)
print(f1)

0.42492138364779874
