## Load the required libraries

In [2]:
import numpy as np
import pandas as pd
import matplotlib as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.data import Data, Dataset, DataLoader
from torch.utils.data import Dataset
from torch_geometric.nn import GCNConv, global_mean_pool

## Load & Explore the Data

From the `TRAIN_NEW` folder, we will load:
- Functional MRI connectome data
- Quantitative metadata (e.g., test scores)
- Categorical metadata (e.g., demographics)
- Targets: ADHD diagnosis & Sex

In [3]:
# === Load TRAIN data ===
train_path = "/Users/Haley/Desktop/WiDs Datathon/widsdatathon2025/TRAIN_NEW"
connectome_train = pd.read_csv(f"{train_path}/TRAIN_FUNCTIONAL_CONNECTOME_MATRICES_new_36P_Pearson.csv")
quant_meta_train = pd.read_excel(f"{train_path}/TRAIN_QUANTITATIVE_METADATA_new.xlsx")
cat_meta_train = pd.read_excel(f"{train_path}/TRAIN_CATEGORICAL_METADATA_new.xlsx")
targets_train = pd.read_excel(f"{train_path}/TRAINING_SOLUTIONS.xlsx")


# Check shapes
print("Train Connectome:", connectome_train.shape)
print("Train Quantitative metadata:", quant_meta_train.shape)
print("Train Categorical metadata:", cat_meta_train.shape)
print("Train Targets:", targets_train.shape)

# === Load TEST data ===
test_path = "/Users/Haley/Desktop/WiDs Datathon/widsdatathon2025/TEST"
connectome_test = pd.read_csv(f"{test_path}/TEST_FUNCTIONAL_CONNECTOME_MATRICES.csv")
quant_meta_test = pd.read_excel(f"{test_path}/TEST_QUANTITATIVE_METADATA.xlsx")
cat_meta_test = pd.read_excel(f"{test_path}/TEST_CATEGORICAL.xlsx")


# Check shapes
print("Test Connectome:", connectome_test.shape)
print("Test Quantitative metadata:", quant_meta_test.shape)
print("Test Categorical metadata:", cat_meta_test.shape)

Train Connectome: (1213, 19901)
Train Quantitative metadata: (1213, 19)
Train Categorical metadata: (1213, 10)
Train Targets: (1213, 3)
Test Connectome: (304, 19901)
Test Quantitative metadata: (304, 19)
Test Categorical metadata: (304, 10)


In [4]:
def flatten_to_square_matrix(flattened_fcm, size=200):
    # Ensure the length of the flattened matrix corresponds to the upper triangular part of a 192x192 matrix
    num_elements = len(flattened_fcm)
    expected_elements = (size * (size - 1)) // 2

    if num_elements != expected_elements:
        raise ValueError(f"Flattened matrix size mismatch. Expected {expected_elements} elements, got {num_elements}")

    # Initialize a square matrix (size x size) filled with zeros
    matrix = np.zeros((size, size))

    # Extract the upper triangular indices (i, j) where i < j
    indices = np.triu_indices(size, k=1)  # k=1 excludes diagonal (i != j)

    # Assign the flattened values to the upper triangular part of the matrix
    matrix[indices] = flattened_fcm
    matrix.T[indices] = flattened_fcm  # Symmetric part: Copy to the lower triangle

    return matrix

# Example for the first participant
flattened_fcm = connectome_train.iloc[0, 1:].values  # Skip the participant_id column
fcm_matrix = flatten_to_square_matrix(flattened_fcm)
print(fcm_matrix.shape)  # Should print (200, 200)

(200, 200)


In [5]:
# Assuming train_FCM is a pandas DataFrame with participant IDs and flattened FCMs
connectivity_matrices = []

for i in range(len(connectome_train)):
    flattened_fcm = connectome_train.iloc[i, 1:].values  # Skip the participant_id column
    fcm_matrix = flatten_to_square_matrix(flattened_fcm)
    connectivity_matrices.append(fcm_matrix)

connectivity_matrices = np.array(connectivity_matrices)
print("Train Connectomes:",connectivity_matrices.shape)  # Should print (N, 200, 200), where N is the number of participants


connectivity_matrices_test = []

for i in range(len(connectome_test)):
    flattened_fcm = connectome_test.iloc[i, 1:].values  # Skip the participant_id column
    fcm_matrix = flatten_to_square_matrix(flattened_fcm)
    connectivity_matrices_test.append(fcm_matrix)

connectivity_matrices_test = np.array(connectivity_matrices_test)
print("Test Connectomes:",connectivity_matrices_test.shape) 

Train Connectomes: (1213, 200, 200)
Test Connectomes: (304, 200, 200)


Add channel: inputs of shape (batch_size, 1, d, d) — that "1" is because CNNs expect a channel dimension (like a color channel in images)

In [6]:
# --- 3. Metadata (numeric) ---
# Socio-demographic and questionnaire data (after preprocessing, e.g., mean-imputation + one-hot encoding)
# Shape: (n_subjects, n_features)
train_metadata = pd.read_csv("/Users/Haley/Desktop/WiDs Datathon/WiDs Notebooks/meta_train.csv")
train_metadata = train_metadata.drop(columns = ['participant_id'])
print("Train metadata:",train_metadata.shape)
# Same for test set
test_metadata = pd.read_csv("/Users/Haley/Desktop/WiDs Datathon/WiDs Notebooks/meta_test.csv")
test_metadata = test_metadata.drop(columns = ['participant_id'])
print("Test metadata:",test_metadata.shape)
print("Columns:",train_metadata.columns)


Train metadata: (1213, 33)
Test metadata: (304, 33)
Columns: Index(['PreInt_Demos_Fam_Child_Ethnicity', 'PreInt_Demos_Fam_Child_Race',
       'MRI_Track_Scan_Location', 'Barratt_Barratt_P1_Edu',
       'Barratt_Barratt_P1_Occ', 'Barratt_Barratt_P2_Edu',
       'Barratt_Barratt_P2_Occ', 'Basic_Demos_Enroll_Year_2016',
       'Basic_Demos_Enroll_Year_2017', 'Basic_Demos_Enroll_Year_2018',
       'Basic_Demos_Enroll_Year_2019', 'Basic_Demos_Enroll_Year_2020',
       'Basic_Demos_Study_Site_2', 'Basic_Demos_Study_Site_3',
       'Basic_Demos_Study_Site_4', 'EHQ_EHQ_Total', 'ColorVision_CV_Score',
       'APQ_P_APQ_P_CP', 'APQ_P_APQ_P_ID', 'APQ_P_APQ_P_INV',
       'APQ_P_APQ_P_OPD', 'APQ_P_APQ_P_PM', 'APQ_P_APQ_P_PP',
       'SDQ_SDQ_Conduct_Problems', 'SDQ_SDQ_Difficulties_Total',
       'SDQ_SDQ_Emotional_Problems', 'SDQ_SDQ_Externalizing',
       'SDQ_SDQ_Generating_Impact', 'SDQ_SDQ_Hyperactivity',
       'SDQ_SDQ_Internalizing', 'SDQ_SDQ_Peer_Problems', 'SDQ_SDQ_Prosocial',
       'MR

# MLP layer for metadata

In [7]:
class MetadataBranch(nn.Module):
    def __init__(self, input_dim=33, hidden_dims=[64, 32], dropout_rate=0.3):
        super(MetadataBranch, self).__init__()
        
        # First dense layer: 33 -> 64
        self.fc1 = nn.Linear(input_dim, hidden_dims[0])
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout_rate)
        
        # Second dense layer: 64 -> 32
        self.fc2 = nn.Linear(hidden_dims[0], hidden_dims[1])
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(dropout_rate)
        
        # Output: metadata latent vector (size 32 here)
        self.output_dim = hidden_dims[-1]
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        
        return x


# Edge-to-Edge layer

In [8]:
import torch
import torch.nn as nn

class E2EBlock(nn.Module):
    def __init__(self, in_planes, planes, kernel_size1, kernel_size2, bias=False):
        super(E2EBlock, self).__init__()
        self.cnn1 = nn.Conv2d(in_planes, planes, kernel_size=kernel_size1, bias=bias)
        self.cnn2 = nn.Conv2d(in_planes, planes, kernel_size=kernel_size2, bias=bias)
        
    def forward(self, x):
        a = self.cnn1(x)  # (batch, planes, height, 1)
        b = self.cnn2(x)  # (batch, planes, 1, width)
        
        # Match the dimensions by repeating
        a = a.repeat(1, 1, 1, b.shape[3])  # repeat along width
        b = b.repeat(1, 1, a.shape[2], 1)  # repeat along height

        return a + b


In [7]:


class E2EBlock(nn.Module):
    def __init__(self, in_planes, planes, kernel_size1, kernel_size2, bias=False):
        """
        Flexible E2EBlock to allow different kernel sizes.
        
        Args:
        - in_planes: input channels (1 for first layer, 32 for second)
        - planes: output channels
        - kernel_size1: kernel size for first conv (e.g., (1, d) or (1, 1))
        - kernel_size2: kernel size for second conv (e.g., (d, 1) or (1, 1))
        """
        super(E2EBlock, self).__init__()
        self.cnn1 = nn.Conv2d(in_planes, planes, kernel_size=kernel_size1, bias=bias)
        self.cnn2 = nn.Conv2d(in_planes, planes, kernel_size=kernel_size2, bias=bias)
        
    def forward(self, x):
        a = self.cnn1(x)  # conv1 output
        b = self.cnn2(x)  # conv2 output

        # Expand spatial dimensions to match before summing
        a = a.expand(-1, -1, b.shape[2], b.shape[3])
        b = b.expand(-1, -1, a.shape[2], a.shape[3])

        return a + b



# BrainNetCNN Core

(input/output layer table goes here)

# Combined model

In [9]:
class BrainNetCNNWithMetadata(nn.Module):
    def __init__(self, example, metadata_dim, num_classes=2):
        super(BrainNetCNNWithMetadata, self).__init__()
        d = example.size(3)  # connectome dimension (dxd)

        # Brain branch
        self.e2econv1 = E2EBlock(1, 32, kernel_size1=(1, d), kernel_size2=(d, 1))
        self.e2econv2 = E2EBlock(32, 64, kernel_size1=(1, 1), kernel_size2=(1, 1))

        self.e2n = nn.Conv2d(64, 1, kernel_size=(1,d))
        self.n2g = nn.Conv2d(1, 256, kernel_size=(d, 1))

        self.fc1 = nn.Linear(256, 128)
        self.fc2 = nn.Linear(128, 30)

        # Metadata branch
        self.metadata_fc1 = nn.Linear(metadata_dim, 64)
        self.metadata_fc2 = nn.Linear(64, 30)

        # Combined final layer
        self.fc_combined = nn.Linear(30 + 30, num_classes)

        # Common stuff
        self.dropout = nn.Dropout(p=0.5)
        self.act = nn.LeakyReLU(negative_slope=0.33)

    def forward(self, brain_x, metadata_x):
        # Brain branch
        x = self.act(self.e2econv1(brain_x)) 
        x = self.act(self.e2econv2(x))
        x = self.act(self.e2n(x))
        x = self.dropout(self.act(self.n2g(x)))

        x = x.view(x.size(0), -1)  
        x = self.dropout(self.act(self.fc1(x)))
        x = self.dropout(self.act(self.fc2(x)))  # Shape: (batch, 30)

        # Metadata branch
        m = self.dropout(self.act(self.metadata_fc1(metadata_x)))
        m = self.dropout(self.act(self.metadata_fc2(m)))             # Shape: (batch, 30)

        # Combine
        combined = torch.cat([x, m], dim=1)  # Shape: (batch, 60)

        # Final prediction
        out = self.fc_combined(combined)     # Shape: (batch, num_classes)

        return out


Loss function: Binary Cross-entropy with logits

In [10]:
loss_fn = torch.nn.BCEWithLogitsLoss()

# Load the data

In [11]:
class BrainDataset(Dataset):
    def __init__(self, connectomes_np, metadata_df, targets_df):
        """
        connectomes_np: numpy array of shape (N_samples, nodes, nodes)
        metadata_df: pandas DataFrame of shape (N_samples, num_meta_features)
        targets_df: pandas DataFrame of shape (N_samples, 2)
        """
        self.connectomes = torch.from_numpy(connectomes_np).float()
        self.metadata = torch.from_numpy(metadata_df.to_numpy()).float()
        self.targets = torch.from_numpy(targets_df).float()
        
        # Add channel dimension if missing
        if self.connectomes.ndim == 3:
            self.connectomes = self.connectomes.unsqueeze(1)  # (N_samples, 1, nodes, nodes)

    def __len__(self):
        return len(self.connectomes)

    def __getitem__(self, idx):
        connectome = self.connectomes[idx]      # Tensor (1, nodes, nodes)
        metadata = self.metadata[idx]            # Tensor (num_meta_features,)
        target = self.targets[idx]               # Tensor (2,)
        return connectome, metadata, target


Convert target_train to an array

In [12]:
targets_train = pd.read_excel(f"{train_path}/TRAINING_SOLUTIONS.xlsx")
targets_train = targets_train[["ADHD_Outcome","Sex_F"]] # drop participant ID
targets_train1 = targets_train.to_numpy()
targets_train1

array([[1, 1],
       [1, 0],
       [1, 0],
       ...,
       [0, 1],
       [0, 0],
       [0, 0]])

In [13]:
# Create Dataset
dataset = BrainDataset(connectivity_matrices, train_metadata, targets_train1)

# Create DataLoader
from torch.utils.data import DataLoader

dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# In your training loop:
# for connectome_batch, metadata_batch, target_batch in dataloader:
    # connectome_batch -> (32, 1, 82, 82)
    # metadata_batch -> (32, 33)
    # target_batch -> (32, 2)
    # ...


# Train loop

In [14]:
brain_x, metadata_x, targets = next(iter(dataloader))  # get one batch
brain_x
model = BrainNetCNNWithMetadata(brain_x, metadata_dim=33, num_classes=2)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
loss_fn = nn.BCEWithLogitsLoss()
num_epochs = 15
for epoch in range(num_epochs):
    model.train()
    
    for brain_x, metadata_x, targets in dataloader:
        optimizer.zero_grad()
        
        outputs = model(brain_x, metadata_x)  # (batch_size, 2)
        
        loss = loss_fn(outputs, targets.float())  # IMPORTANT: targets must be float (not long)
        
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch} - Loss: {loss.item():.4f}")


: 

Turn outputs (raw logits) into probabilities

In [None]:
outputs = model(brain_x, metadata_x)
probs = torch.sigmoid(outputs)  # (batch_size, 2)

adhd_probs = probs[:, 0]  # probability of ADHD
sex_probs = probs[:, 1]   # probability of Male (or whatever label)


Threshold set @ 0.5

In [None]:
adhd_preds = (adhd_probs > 0.5).int()
sex_preds = (sex_probs > 0.5).int()

In [7]:
# --- 1. Functional connectome matrices (your graphs) ---
# List or array of shape [n_subjects, n_nodes, n_nodes]
train_connectomes =  connectivity_matrices # shape (n_subjects, n_nodes, n_nodes)
test_connectomes = connectivity_matrices_test   # same for test

# --- 2. Target Variables (binary) ---
# ADHD diagnosis labels: 0 = no ADHD, 1 = ADHD
adhd_labels = targets_train['ADHD_Outcome']
sex_labels = targets_train['Sex_F'] # shape (n_subjects,)

# --- 4. Participant IDs (optional, for submission file) ---
# This is just the list of unique participant IDs, e.g., "v1nMpCoLGU0V"
# test_participant_ids = np.load('path_to_your_test_participant_ids.npy')  # shape (n_test_subjects,)

all_columns = list(set(train_metadata.columns) | set(test_metadata.columns))
print(len(test_metadata.columns))
# --- 5. Normalize Metadata ---
# Always normalize metadata before feeding into model
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
train_metadata = scaler.fit_transform(train_metadata)
test_metadata = scaler.transform(test_metadata)


# --- 6. Create Datasets and DataLoaders ---
train_dataset = ConnectomeDataset(train_connectomes, adhd_labels, sex_labels, train_metadata)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# test_dataset = ConnectomeDataset(test_connectomes, np.zeros(len(test_connectomes)), np.zeros(len(test_connectomes)), test_metadata)
# test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


NameError: name 'connectivity_matrices' is not defined

In [None]:
def matrix_to_edges(matrix, threshold=0.0):
    edge_index = []
    edge_weight = []
    for i in range(matrix.shape[0]):
        for j in range(matrix.shape[1]):
            if matrix[i, j] > threshold and i != j:
                edge_index.append([i, j])
                edge_weight.append(matrix[i, j])
    edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()
    edge_weight = torch.tensor(edge_weight, dtype=torch.float)
    return edge_index, edge_weight

def compute_node_features(matrix):
    degree = matrix.sum(axis=1)
    node_features = torch.tensor(degree, dtype=torch.float).unsqueeze(1)  # [n_nodes, 1]
    return node_features


In [None]:
from torch_geometric.data import Data, Dataset

class ConnectomeDataset(Dataset):
    def __init__(self, connectomes, adhd_labels, sex_labels, metadata, threshold=0.0):
        super().__init__()
        self.connectomes = connectomes
        self.adhd_labels = adhd_labels
        self.sex_labels = sex_labels
        self.metadata = metadata
        self.threshold = threshold

    def len(self):
        return len(self.connectomes)

    def get(self, idx):
        matrix = self.connectomes[idx]
        edge_index, edge_weight = matrix_to_edges(matrix, threshold=self.threshold)

        x = compute_node_features(matrix)  # now using degree features

        y_adhd = torch.tensor(self.adhd_labels[idx], dtype=torch.long)
        y_sex = torch.tensor(self.sex_labels[idx], dtype=torch.long)

        meta = torch.tensor(self.metadata[idx], dtype=torch.float)

        data = Data(x=x, edge_index=edge_index, edge_attr=edge_weight)
        data.y_adhd = y_adhd
        data.y_sex = y_sex
        data.metadata = meta
        data.num_nodes = matrix.shape[0]
        return data


In [None]:
# import torch
# import torch.nn.functional as F
# from torch_geometric.nn import GCNConv, global_mean_pool

# class GNNWithMetadata(torch.nn.Module):
#     def __init__(self, hidden_channels, metadata_dim):
#         super().__init__()
#         self.conv1 = GCNConv(1, hidden_channels)  # input dim is 1 (degree feature)
#         self.conv2 = GCNConv(hidden_channels, hidden_channels)

#         self.meta_mlp = torch.nn.Sequential(
#             torch.nn.Linear(metadata_dim),
#             torch.nn.ReLU(),
#             torch.nn.Linear(1, hidden_channels)
#         )

#         self.lin_adhd = torch.nn.Linear(hidden_channels + 32, 2)
#         self.lin_sex = torch.nn.Linear(hidden_channels + 32, 2)

#     def forward(self, x, edge_index, batch, metadata):
#         x = self.conv1(x, edge_index)
#         x = F.relu(x)
#         x = self.conv2(x, edge_index)
#         x = F.relu(x)

#         x = global_mean_pool(x, batch)

#         meta_embedding = self.meta_mlp(metadata)

#         fused = torch.cat([x, meta_embedding], dim=1)

#         adhd_out = self.lin_adhd(fused)
#         sex_out = self.lin_sex(fused)

#         return adhd_out, sex_out


In [None]:
class GNNWithMetadata(torch.nn.Module):
    def __init__(self, hidden_channels, metadata_dim):
        super().__init__()
        self.conv1 = GCNConv(1, hidden_channels)  # input dim is 1 (degree feature)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)

        self.meta_mlp = torch.nn.Sequential(
            torch.nn.Linear(metadata_dim, hidden_channels),  # Fix metadata input size
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_channels, hidden_channels)  # Hidden size matches GCN output
        )

        self.lin_adhd = torch.nn.Linear(hidden_channels + hidden_channels, 2)
        self.lin_sex = torch.nn.Linear(hidden_channels + hidden_channels, 2)

    def forward(self, x, edge_index, batch, metadata):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        x = F.relu(x)

        x = global_mean_pool(x, batch)  # Global pooling of node features

        metadata = metadata.view(metadata.size(0), -1)  # Flatten if needed
        meta_embedding = self.meta_mlp(metadata)

        fused = torch.cat([x, meta_embedding], dim=1)

        adhd_out = self.lin_adhd(fused)
        sex_out = self.lin_sex(fused)

        return adhd_out, sex_out


In [None]:
def train(model, loader, optimizer, device):
    model.train()
    total_loss = 0
    for data in loader:
        data = data.to(device)
        optimizer.zero_grad()

        adhd_out, sex_out = model(data.x, data.edge_index, data.batch, data.metadata)

        loss_adhd = F.cross_entropy(adhd_out, data.y_adhd)
        loss_sex = F.cross_entropy(sex_out, data.y_sex)

        loss = loss_adhd + loss_sex
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    return total_loss / len(loader)


In [None]:
def predict(model, loader, device):
    model.eval()
    adhd_preds = []
    sex_preds = []
    participant_ids = []

    with torch.no_grad():
        for data in loader:
            data = data.to(device)
            adhd_out, sex_out = model(data.x, data.edge_index, data.batch, data.metadata)

            adhd_probs = F.softmax(adhd_out, dim=1)[:, 1]
            sex_probs = F.softmax(sex_out, dim=1)[:, 1]

            adhd_preds.append(adhd_probs.cpu())
            sex_preds.append(sex_probs.cpu())
            participant_ids.extend(data.participant_id)  # if available

    adhd_preds = torch.cat(adhd_preds).numpy()
    sex_preds = torch.cat(sex_preds).numpy()

    return participant_ids, adhd_preds, sex_preds


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

metadata_dim = train_metadata.shape[1]

model = GNNWithMetadata(hidden_channels=128, metadata_dim=metadata_dim).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)

train_dataset = ConnectomeDataset(train_connectomes, adhd_labels, sex_labels, train_metadata)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

for epoch in range(1, 51):
    loss = train(model, train_loader, optimizer, device)
    print(f"Epoch {epoch}, Loss: {loss:.4f}")


RuntimeError: mat1 and mat2 shapes cannot be multiplied (1056x1 and 33x128)

In [None]:
train_metadata.shape[1]

33

In [None]:
import torch
import torch.nn.functional as F

def train(model, loader, optimizer, device):
    model.train()  # Set the model to training mode
    total_loss = 0

    for data in loader:
        data = data.to(device)  # Move data to the appropriate device
        optimizer.zero_grad()  # Clear previous gradients

        # Forward pass: compute predicted outputs by passing inputs to the model
        adhd_out, sex_out = model(data.x, data.edge_index, data.batch, data.metadata)

        # Compute the loss
        adhd_loss = F.cross_entropy(adhd_out, data.y[:, 0])  # ADHD label is the first column of y
        sex_loss = F.cross_entropy(sex_out, data.y[:, 1])    # Sex label is the second column of y
        loss = adhd_loss + sex_loss  # Combine both losses

        # Backward pass: compute gradients
        loss.backward()

        # Update model parameters
        optimizer.step()

        total_loss += loss.item()  # Accumulate the loss

    return total_loss / len(loader)  # Return the average loss for this epoch


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize the model and optimizer
model = GNNWithMetadata().to(device)  # Removed the hidden_channels parameter
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)

# Assuming 'train_connectomes', 'adhd_labels', 'sex_labels', 'train_metadata' are pre-defined
train_dataset = ConnectomeDataset(train_connectomes, adhd_labels, sex_labels, train_metadata)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Train the model for 50 epochs
for epoch in range(1, 51):
    loss = train(model, train_loader, optimizer, device)
    print(f"Epoch {epoch}, Loss: {loss:.4f}")


RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x1056 and 32x16)

In [None]:
from sklearn.metrics import precision_recall_fscore_support
import numpy as np

def compute_weighted_f1(y_true_adhd, y_pred_adhd, y_true_sex, y_pred_sex, sex_true):
    # Normal F1 scores
    _, _, f1_adhd, _ = precision_recall_fscore_support(y_true_adhd, y_pred_adhd, average='binary')
    _, _, f1_sex, _ = precision_recall_fscore_support(y_true_sex, y_pred_sex, average='binary')

    # Now handle special weighting: 2x weight for Female ADHD cases
    special_cases = (y_true_adhd == 1) & (sex_true == 1)
    if special_cases.sum() > 0:
        # Compute precision, recall, f1 only on female ADHD cases
        special_precision, special_recall, special_f1, _ = precision_recall_fscore_support(
            y_true_adhd[special_cases], y_pred_adhd[special_cases], average='binary'
        )
        # Boost f1_adhd by averaging
        f1_adhd = (f1_adhd + special_f1) / 2

    final_score = (f1_adhd + f1_sex) / 2

    return final_score


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

metadata_dim = train_metadata.shape[1]

model = GNNWithMetadata(hidden_channels=128, metadata_dim=metadata_dim).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)

# train_dataset = ConnectomeDataset(train_connectomes, train_adhd_labels, train_sex_labels, train_metadata)
# train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

for epoch in range(1, 51):
    loss = train(model, train_loader, optimizer, device)
    print(f"Epoch {epoch}, Loss: {loss:.4f}")

In [None]:

# Assuming connectivity_matrices is a numpy array with shape (N, 192, 192)
connectome_data_tensor = torch.tensor(connectivity_matrices, dtype=torch.float32).unsqueeze(1)  # Add channel dimension

# Ensure the tabular data is also in tensor format
train_X_tabular_tensor = torch.tensor(train_metadata.values, dtype=torch.float32)

# Assuming Y_train_tensor is the target tensor (ADHD and Sex)
Y_train_tensor = torch.tensor(targets_train.values, dtype=torch.float32)

# Now create the TensorDataset
train_dataset = TensorDataset(connectome_data_tensor, train_X_tabular_tensor, Y_train_tensor)

# Create the DataLoader
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)


class CCNN(nn.Module):
    def __init__(self, num_tabular_features, num_classes=2):
        super(CCNN, self).__init__()
        
        # 2D Convolutional layers for the connectome data (200x200 images)
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)  # Output: (32, 200, 200)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)       # Output: (32, 100, 100)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1) # Output: (64, 100, 100)
        
        # Fully connected layers for the tabular data
        self.fc1 = nn.Linear(num_tabular_features, 128)
        
        # Final fully connected layer to output predictions for ADHD and Sex
        # Adjusting the input size after flattening the output of convolution layers
        self.fc2 = nn.Linear(64 * 50 * 50 + 128, num_classes)  # Flattened output size: 64 * 50 * 50 = 160000

    def forward(self, x_connectome, x_tabular):
        # Apply convolutional layers on the connectome data
        x = self.pool(F.relu(self.conv1(x_connectome)))  # Output: (32, 100, 100)
        x = self.pool(F.relu(self.conv2(x)))             # Output: (64, 50, 50)
        
        # Flatten the 2D output of the convolutional layers
        x_connectome_flat = x.view(x.size(0), -1)  # Flatten to (N, 64*50*50)
        
        # Pass tabular data through the fully connected layer
        x_tabular = F.relu(self.fc1(x_tabular))
        
        # Concatenate the flattened connectome data with the tabular data
        x_combined = torch.cat((x_connectome_flat, x_tabular), dim=1)
        
        # Final fully connected layer for classification
        x = self.fc2(x_combined)
        return x


# Instantiate the model
model = CCNN(num_tabular_features=train_X_tabular.shape[1])

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 10  # You can adjust this
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_preds = 0
    total_preds = 0
    
    for batch_idx, (connectome_data, tabular_data, labels) in enumerate(train_loader):
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(connectome_data, tabular_data)
        
        # Calculate loss
        loss = criterion(outputs, labels)
        loss.backward()
        
        # Update weights
        optimizer.step()
        
        # Track statistics
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total_preds += labels.size(0)
        correct_preds += (predicted == labels).sum().item()
    
    # Print statistics
    # print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Accuracy: {correct_preds/total_preds:.4f}')


AttributeError: 'numpy.ndarray' object has no attribute 'values'