In [6]:
import pandas as pd
from src.causal_graph import *
from src.scm.feedforward_ncm import FF_NCM
from src.scm.distribution import *
from torch.utils.data import DataLoader
from src.data import NCMDataset

import torch
import torch.nn as nn
import torch.optim as optim

df = pd.read_csv('df_dep.csv')
df.head(10)

Unnamed: 0,Gender,Age,Academic Pressure,Work Pressure,CGPA,Study Satisfaction,Sleep Duration,Dietary Habits,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Family History of Mental Illness,Depression,degree_level,degree_cluster,degree_emb_pca_1,degree_emb_pca_2,degree_emb_pca_3,degree_emb_pca_4,degree_emb_pca_5
0,0,33,5.0,0.0,8.97,2,1,1.0,1,3,1,0,1,1,4,0.253879,0.243325,0.343663,-0.337468,-0.045405
1,0,31,3.0,0.0,7.03,5,0,1.0,0,9,1,1,0,1,1,0.221702,0.239069,0.015781,0.184283,0.126231
2,0,29,2.0,0.0,5.7,3,0,1.0,0,4,1,0,0,3,2,0.061047,-0.276469,0.533775,-0.294401,0.217425
3,0,30,3.0,0.0,9.54,4,1,1.0,0,1,2,0,0,1,4,0.235426,0.208007,0.092394,0.084074,0.243518
4,1,30,2.0,0.0,8.04,4,0,0.0,0,0,1,1,0,0,0,-0.69218,0.08292,-0.029387,-0.020745,-0.018673
5,0,33,3.0,0.0,7.03,4,0,1.0,1,10,2,1,0,1,4,0.268271,0.238251,-0.039036,0.147155,0.202336
6,1,19,2.0,0.0,8.52,4,0,0.0,0,6,2,1,0,0,0,-0.69218,0.08292,-0.029387,-0.020745,-0.018673
7,0,25,5.0,0.0,6.51,2,0,0.0,1,2,5,1,1,2,2,-0.032905,-0.350849,0.032525,0.216158,0.021605
8,1,20,5.0,0.0,7.25,3,1,1.0,1,10,3,0,1,0,0,-0.69218,0.08292,-0.029387,-0.020745,-0.018673
9,0,19,2.0,0.0,7.83,2,1,0.0,0,6,3,0,0,0,0,-0.69218,0.08292,-0.029387,-0.020745,-0.018673


# Causal Model

In [7]:
X = 'Gender'

Z = [
    'Age',
    'Family History of Mental Illness',
    'Sleep Duration'
]

W = [
    'Academic Pressure',
    'Work Pressure',
    'CGPA',
    'Study Satisfaction',
    'Dietary Habits',
    'Have you ever had suicidal thoughts ?',
    'Work/Study Hours',
    'Financial Stress',
    'degree_level',
    'degree_cluster',
    'degree_emb_pca_1',
    'degree_emb_pca_2',
    'degree_emb_pca_3',
    'degree_emb_pca_4',
    'degree_emb_pca_5'
]
Y = 'Depression'

In [8]:
standard_fairness_model = create_expanded_sfm(X, Z, W, Y)
ncm = FF_NCM(standard_fairness_model)

In [9]:
def train_ncm(model, dataloader, loss_fns, optimizer, device, num_epochs=10):
    model.to(device)
    model.train()
    for epoch in range(1, num_epochs+1):
        epoch_loss = 0.0
        for batch in dataloader:
            # Move batch data to device
            batch = {k: v.to(device) for k, v in batch.items()}
            batch_size = next(iter(batch.values())).shape[0]

            # Sample exogenous noise for this batch
            u = model.pu.sample(n=batch_size, device=device)

            # Accumulate loss across all nodes
            total_loss = 0.0
            for v in model.v:
                # Gather observed parent values
                pa_keys = model.cg.pa[v]
                pa_vals = {k: batch[k] for k in pa_keys}
                # Gather exogenous noise components for node v
                u_keys = model.cg.v2c2[v]
                u_vals = {k: u[k] for k in u_keys}

                # Forward pass through the structural function f[v]
                pred_v = model.f[v](pa=pa_vals, u=u_vals)

                # Compute node-level loss
                loss_v = loss_fns[v](pred_v, batch[v])
                total_loss += loss_v

            # Backpropagate and update weights
            optimizer.zero_grad()
            total_loss.backward()
            optimizer.step()

            epoch_loss += total_loss.item()

        avg_loss = epoch_loss / len(dataloader)
        print(f"Epoch {epoch}/{num_epochs}, Loss: {avg_loss:.4f}")

    return model

In [None]:
variables = [X] + Z + W + [Y]

dataset = NCMDataset(df, variables)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

loss_fns = {}
binary_vars = [X, 'Family History of Mental Illness', 'Have you ever had suicidal thoughts ?', Y]
for v in variables:
    if v in binary_vars:
        loss_fns[v] = nn.BCELoss()
    else:
        loss_fns[v] = nn.MSELoss()

torch.manual_seed(0)
optimizer = optim.Adam(ncm.parameters(), lr=1e-3)

# Train for a few epochs for quick testing
num_epochs = 3
trained_ncm = train_ncm(ncm, dataloader, loss_fns, optimizer, 'cpu', num_epochs)

# Compute accuracy on Y
def compute_accuracy(model, dataloader, device, target_var):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in dataloader:
            batch = {k: v.to(device) for k, v in batch.items()}
            batch_size = next(iter(batch.values())).shape[0]
            u = model.pu.sample(n=batch_size, device=device)
            preds = model.f[target_var](
                pa={k: batch[k] for k in model.cg.pa[target_var]},
                u={k: u[k] for k in model.cg.v2c2[target_var]}
            )
            labels = batch[target_var]
            pred_labels = (preds > 0.5).float()
            correct += (pred_labels == labels).sum().item()
            total += labels.numel()
    return correct / total

acc = compute_accuracy(trained_ncm, dataloader, 'cpu', Y)
print(f"Final accuracy for {Y}: {acc:.4f}")


Epoch 1/3, Loss: 759.9215
Epoch 2/3, Loss: 759.6433
Epoch 3/3, Loss: 759.7705
Final accuracy for Depression: 0.8462
