In [1]:
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
from torchsummary import summary
from pprint import pprint
import json
import random
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score
from tqdm import trange, tqdm

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!unzip '/content/drive/MyDrive/School/2025 Spring/Advanced ML/AML Project/Data/new/preprocessed_selected_features.zip'

Archive:  /content/drive/MyDrive/School/2025 Spring/Advanced ML/AML Project/Data/new/preprocessed_selected_features.zip
   creating: preprocessed_selected_features/
   creating: preprocessed_selected_features/test/
   creating: preprocessed_selected_features/train/
  inflating: preprocessed_selected_features/test/connectome_matrices.csv  
  inflating: preprocessed_selected_features/test/aux.csv  
  inflating: preprocessed_selected_features/train/labels.csv  
  inflating: preprocessed_selected_features/train/connectome_matrices.csv  
  inflating: preprocessed_selected_features/train/aux.csv  


In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
TRAIN_X_PATH = 'preprocessed_selected_features/train/connectome_matrices.csv'
TRAIN_Y_PATH = 'preprocessed_selected_features/train/labels.csv'

In [6]:
train_X_df = pd.read_csv(TRAIN_X_PATH)
train_y_df = pd.read_csv(TRAIN_Y_PATH)
train_X_df.set_index('participant_id', inplace=True)
train_y_df.set_index('participant_id', inplace=True)
train_y_df = train_y_df.reindex(train_X_df.index)

In [7]:
train_X_df.head()

Unnamed: 0_level_0,0throw_1thcolumn,0throw_2thcolumn,0throw_3thcolumn,0throw_4thcolumn,0throw_5thcolumn,0throw_6thcolumn,0throw_7thcolumn,0throw_8thcolumn,0throw_9thcolumn,0throw_10thcolumn,...,195throw_196thcolumn,195throw_197thcolumn,195throw_198thcolumn,195throw_199thcolumn,196throw_197thcolumn,196throw_198thcolumn,196throw_199thcolumn,197throw_198thcolumn,197throw_199thcolumn,198throw_199thcolumn
participant_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
70z8Q2xdTXM3,0.270399,0.580746,0.485892,0.064059,0.617299,0.337467,0.55023,-0.087434,0.579197,0.535364,...,0.305246,0.58361,0.5409,0.228036,0.343643,0.485184,0.019701,0.614717,0.52442,0.40429
WHWymJu6zNZi,0.745668,0.635297,0.560712,0.541223,0.439375,0.473556,0.129684,-0.095509,0.132499,0.314387,...,0.295154,-0.021363,0.000563,-0.119118,0.511165,0.396962,0.201877,0.664817,0.612853,0.557002
4PAQp1M6EyAo,-0.141711,0.503933,0.294476,0.697041,0.840358,0.476623,0.690517,0.215647,0.573338,0.43098,...,0.464666,-0.031043,-0.048386,0.092712,0.464166,0.337855,0.471782,0.50496,0.566427,0.691008
obEacy4Of68I,0.242208,0.829234,0.74409,0.627094,0.756269,0.699015,0.565864,0.463884,0.584879,0.295275,...,0.140506,-0.261833,0.269811,-0.023052,0.490709,0.684443,0.26087,0.373375,0.490076,0.617905
s7WzzDcmDOhF,0.275725,0.675102,0.702433,0.613503,0.804479,0.637771,0.28674,0.404671,0.33113,0.377236,...,-0.223803,0.010372,-0.154615,-0.602806,0.55511,-0.248578,0.25421,0.061103,0.132521,0.119855


In [8]:
train_y_df.head()

Unnamed: 0_level_0,ADHD_Outcome,Sex_F
participant_id,Unnamed: 1_level_1,Unnamed: 2_level_1
70z8Q2xdTXM3,1,0
WHWymJu6zNZi,1,1
4PAQp1M6EyAo,1,1
obEacy4Of68I,1,1
s7WzzDcmDOhF,1,1


In [9]:
class Model(nn.Module):
    def __init__(self, input_dim, layer_dims, dropout=0.5, output_dim=4):
        super(Model, self).__init__()
        layers = []
        prev_dim = input_dim
        for dim in layer_dims:
            layers.append(nn.Linear(prev_dim, dim))
            layers.append(nn.ReLU())
            if dropout > 0:
                layers.append(nn.Dropout(dropout))
            prev_dim = dim
        layers.append(nn.Linear(prev_dim, output_dim))
        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        return self.layers(x)

In [10]:
temp_model = Model(100, [64, 32], dropout=0.3, output_dim=4).to(device)
summary(temp_model, (100,))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                   [-1, 64]           6,464
              ReLU-2                   [-1, 64]               0
           Dropout-3                   [-1, 64]               0
            Linear-4                   [-1, 32]           2,080
              ReLU-5                   [-1, 32]               0
           Dropout-6                   [-1, 32]               0
            Linear-7                    [-1, 4]             132
Total params: 8,676
Trainable params: 8,676
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.03
Estimated Total Size (MB): 0.04
----------------------------------------------------------------


In [11]:
X = np.array(train_X_df.values, dtype=np.float32)

In [12]:
X.shape

(1213, 19900)

In [13]:
y_two_vars = train_y_df.values
y = np.array(y_two_vars[:, 0] * 2 + y_two_vars[:, 1], dtype=np.uint8)

In [14]:
y[:3], y_two_vars[:3]

(array([2, 3, 3], dtype=uint8),
 array([[1, 0],
        [1, 1],
        [1, 1]]))

In [None]:
layer_dims_list = [
    [64, 32, 16],
    [128, 64, 32],
    [128, 64, 32, 16],
    [256, 128, 64, 32],
    [256, 128, 64, 32, 16],
    [512, 256, 128, 64],
    [512, 256, 128, 64, 32],
    [512, 256, 128, 64, 32, 16],
]
dropouts = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5]

In [16]:
criterion = nn.CrossEntropyLoss()

In [17]:
seed = 42  # Choose any fixed number
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)  # If using CUDA

In [18]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

In [19]:
num_epochs = 200

results = {"-".join(map(str, layer_dims)): {} for layer_dims in layer_dims_list}
epoch_history = {"-".join(map(str, layer_dims)): {} for layer_dims in layer_dims_list}
for layer_dims in layer_dims_list:
    for dropout in dropouts:
        print("complexity:", layer_dims, "dropout rate:", dropout)
        f1_scores = []
        best_epochs = []

        for fold, (train_index, test_index) in enumerate(kf.split(X)):
            model = Model(
                input_dim=X.shape[1], layer_dims=layer_dims, dropout=dropout, output_dim=4
            ).to(device)
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]

            optimizer = optim.Adam(model.parameters(), lr=0.001)
            best_test_loss = float("inf")
            best_f1 = 0.0
            best_epoch = 0

            for epoch in trange(num_epochs):
                model.train()
                optimizer.zero_grad()
                outputs = model(torch.tensor(X_train).to(device))
                loss = criterion(outputs, torch.tensor(y_train).to(device))
                loss.backward()
                optimizer.step()

                model.eval()
                with torch.no_grad():
                    test_outputs = model(torch.tensor(X_test).to(device))
                    test_loss = criterion(test_outputs, torch.tensor(y_test).to(device)).item()
                    predicted = torch.argmax(test_outputs.data, 1).cpu()
                    f1 = f1_score(y_test, predicted, average="macro")

                if test_loss < best_test_loss:
                    best_test_loss = test_loss
                    best_f1 = f1
                    best_epoch = epoch


            f1_scores.append(float(best_f1))
            best_epochs.append(best_epoch)
        print(f1_scores)
        results["-".join(map(str, layer_dims))][dropout] = f1_scores
        epoch_history["-".join(map(str, layer_dims))][dropout] = best_epochs

complexity: [64, 32, 16] dropout rate: 0.0


100%|██████████| 200/200 [00:16<00:00, 11.81it/s]
100%|██████████| 200/200 [00:16<00:00, 12.31it/s]
100%|██████████| 200/200 [00:16<00:00, 12.19it/s]
100%|██████████| 200/200 [00:17<00:00, 11.72it/s]
100%|██████████| 200/200 [00:16<00:00, 11.81it/s]


[0.18174100941503318, 0.1558073654390935, 0.1725067385444744, 0.16056338028169015, 0.2526053074384982]
complexity: [128, 64, 32] dropout rate: 0.0


100%|██████████| 200/200 [00:16<00:00, 11.89it/s]
100%|██████████| 200/200 [00:17<00:00, 11.57it/s]
100%|██████████| 200/200 [00:17<00:00, 11.50it/s]
100%|██████████| 200/200 [00:16<00:00, 11.78it/s]
100%|██████████| 200/200 [00:17<00:00, 11.68it/s]


[0.16960317460317462, 0.19521286356546472, 0.2100162726198324, 0.1601123595505618, 0.26139988196725783]
complexity: [128, 64, 32, 16] dropout rate: 0.0


100%|██████████| 200/200 [00:17<00:00, 11.29it/s]
100%|██████████| 200/200 [00:16<00:00, 11.85it/s]
100%|██████████| 200/200 [00:16<00:00, 11.97it/s]
100%|██████████| 200/200 [00:17<00:00, 11.36it/s]
100%|██████████| 200/200 [00:17<00:00, 11.41it/s]


[0.22595870206489677, 0.25507243516362055, 0.1725067385444744, 0.23682966286718182, 0.16574585635359115]
complexity: [256, 128, 64, 32] dropout rate: 0.0


100%|██████████| 200/200 [00:17<00:00, 11.22it/s]
100%|██████████| 200/200 [00:18<00:00, 11.05it/s]
100%|██████████| 200/200 [00:17<00:00, 11.30it/s]
100%|██████████| 200/200 [00:18<00:00, 10.93it/s]
100%|██████████| 200/200 [00:17<00:00, 11.27it/s]


[0.16568732193732194, 0.2556390977443609, 0.2575507679620658, 0.2293488601452311, 0.2501594896331738]
complexity: [256, 128, 64, 32, 16] dropout rate: 0.0


100%|██████████| 200/200 [00:17<00:00, 11.45it/s]
100%|██████████| 200/200 [00:18<00:00, 10.83it/s]
100%|██████████| 200/200 [00:17<00:00, 11.25it/s]
100%|██████████| 200/200 [00:17<00:00, 11.30it/s]
100%|██████████| 200/200 [00:17<00:00, 11.20it/s]


[0.15482954545454544, 0.1558073654390935, 0.21235412474849094, 0.1954022988505747, 0.2129996852376456]
complexity: [512, 256, 128, 64] dropout rate: 0.0


100%|██████████| 200/200 [00:19<00:00, 10.51it/s]
100%|██████████| 200/200 [00:19<00:00, 10.12it/s]
100%|██████████| 200/200 [00:19<00:00, 10.34it/s]
100%|██████████| 200/200 [00:20<00:00,  9.64it/s]
100%|██████████| 200/200 [00:20<00:00,  9.60it/s]


[0.23004199432942815, 0.2630141521145411, 0.27838468836428976, 0.16835016835016836, 0.19693743028090457]
complexity: [512, 256, 128, 64, 32] dropout rate: 0.0


100%|██████████| 200/200 [00:20<00:00,  9.80it/s]
100%|██████████| 200/200 [00:20<00:00,  9.82it/s]
100%|██████████| 200/200 [00:19<00:00, 10.12it/s]
100%|██████████| 200/200 [00:20<00:00,  9.74it/s]
100%|██████████| 200/200 [00:20<00:00,  9.95it/s]


[0.2291883963494133, 0.18638850889192887, 0.2576592553799276, 0.22006613296206057, 0.2597485697079193]
complexity: [512, 256, 128, 64, 32, 16] dropout rate: 0.0


100%|██████████| 200/200 [00:20<00:00,  9.79it/s]
100%|██████████| 200/200 [00:19<00:00, 10.09it/s]
100%|██████████| 200/200 [00:20<00:00,  9.79it/s]
100%|██████████| 200/200 [00:19<00:00, 10.01it/s]
100%|██████████| 200/200 [00:20<00:00,  9.82it/s]

[0.15482954545454544, 0.1558073654390935, 0.2759221772379667, 0.1601123595505618, 0.16574585635359115]





In [20]:
results_json = json.dumps(results, indent=4)
print(results_json)

{
    "64-32-16": {
        "0.0": [
            0.18174100941503318,
            0.1558073654390935,
            0.1725067385444744,
            0.16056338028169015,
            0.2526053074384982
        ]
    },
    "128-64-32": {
        "0.0": [
            0.16960317460317462,
            0.19521286356546472,
            0.2100162726198324,
            0.1601123595505618,
            0.26139988196725783
        ]
    },
    "128-64-32-16": {
        "0.0": [
            0.22595870206489677,
            0.25507243516362055,
            0.1725067385444744,
            0.23682966286718182,
            0.16574585635359115
        ]
    },
    "256-128-64-32": {
        "0.0": [
            0.16568732193732194,
            0.2556390977443609,
            0.2575507679620658,
            0.2293488601452311,
            0.2501594896331738
        ]
    },
    "256-128-64-32-16": {
        "0.0": [
            0.15482954545454544,
            0.1558073654390935,
            0.212354124748

In [21]:
full_results = {}
summary_results = {}
final_epoch_history = {}

for layer_dims in results.keys():
    for dropout in results[layer_dims].keys():
        full_results[layer_dims+'-'+str(dropout)] = results[layer_dims][dropout]
        summary_results[layer_dims+'-'+str(dropout)] = float(np.mean(results[layer_dims][dropout]))
        final_epoch_history[layer_dims+'-'+str(dropout)] = epoch_history[layer_dims][dropout]

summary_results = dict(sorted(summary_results.items(), key=lambda item: item[1], reverse=True))
keys = list(summary_results.keys())
final_epoch_history = dict(sorted(final_epoch_history.items(), key=lambda item: keys.index(item[0]), reverse=False))


with open("full_results.json", "w") as f:
    json.dump(results, f, indent=4)


with open("summary_results.json", "w") as f:
    json.dump(summary_results, f, indent=4)


with open("epoch_history.json", "w") as f:
    json.dump(final_epoch_history, f, indent=4)

In [23]:
best_layer_dims, best_dropout, n_epochs = [256,128,64,32], 0, 20

In [24]:
model = Model(
    input_dim=X.shape[1], layer_dims=best_layer_dims, dropout=best_dropout, output_dim=4
)
optimizer = optim.Adam(model.parameters(), lr=0.001)

model.train()
for epoch in trange(n_epochs):
    optimizer.zero_grad()
    outputs = model(torch.tensor(X))
    loss = criterion(outputs, torch.tensor(y))
    loss.backward()
    optimizer.step()

100%|██████████| 20/20 [00:06<00:00,  2.94it/s]


In [25]:
torch.save(model.state_dict(), "256-128-64-32-0.0.pth")

In [26]:
model.eval()
with torch.no_grad():
    yhat = model(torch.tensor(X))
predicted = torch.argmax(yhat, 1)
f1 = f1_score(y, predicted, average="macro")
print(f1)

0.34458596554316834
