In [1]:
import pandas as pd
from src.causal_graph import *
from src.scm.feedforward_ncm import FF_NCM
from src.scm.distribution import *
from torch.utils.data import DataLoader
from torch.utils.data._utils.collate import default_collate
from src.data import NCMDataset,tt_split

from src.training.train import train_ncm, print_accuracy

import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.discriminant_analysis import StandardScaler
from sklearn.model_selection import train_test_split

df = pd.read_csv('data/df_exam.csv')
df.head(10)

Unnamed: 0,Gender,EthnicGroup,ParentEduc,LunchType,TestPrep,ParentMaritalStatus,PracticeSport,IsFirstChild,NrSiblings,TransportMeans,WklyStudyHours,MathScore,ReadingScore,WritingScore
0,1,1,3,1,0,0,1,1,4.0,0,0,87,93,91
1,0,2,1,1,0,3,1,1,0.0,0,1,76,78,75
2,1,1,1,1,0,3,2,1,1.0,0,1,73,84,79
3,1,1,1,1,1,2,0,0,1.0,1,1,85,93,89
4,0,1,1,0,0,3,1,1,1.0,1,2,41,43,39
5,0,3,0,0,1,0,1,0,3.0,1,2,65,64,68
6,0,3,1,1,0,1,1,1,1.0,0,1,40,52,43
7,1,1,0,1,0,3,2,0,1.0,1,1,66,82,74
8,0,0,1,1,1,0,1,1,1.0,1,2,80,73,71
9,1,0,3,1,0,1,1,1,2.0,1,0,48,53,58


# Causal Model

In [2]:
df.columns

Index(['Gender', 'EthnicGroup', 'ParentEduc', 'LunchType', 'TestPrep',
       'ParentMaritalStatus', 'PracticeSport', 'IsFirstChild', 'NrSiblings',
       'TransportMeans', 'WklyStudyHours', 'MathScore', 'ReadingScore',
       'WritingScore'],
      dtype='object')

In [3]:
X = 'Gender'

Z = [
    'EthnicGroup',
    'ParentMaritalStatus',
    'IsFirstChild', 
    'NrSiblings',
    'TransportMeans',
]

W = [
    'ParentEduc',
    'LunchType',
    'TestPrep',
    'PracticeSport',
    'WklyStudyHours',
    'ReadingScore',
    'WritingScore'
]
Y = 'MathScore'
variables = [X] + Z + W + [Y]

In [4]:
binary_cols = ['Gender', 'LunchType', 'TestPrep', 'IsFirstChild']

train_df, test_df = tt_split(df, binary_cols)
train_df.head()

Unnamed: 0,Gender,EthnicGroup,ParentEduc,LunchType,TestPrep,ParentMaritalStatus,PracticeSport,IsFirstChild,NrSiblings,TransportMeans,WklyStudyHours,MathScore,ReadingScore,WritingScore
0,1,-1.034705,-0.999477,0,0,0.823963,-0.335256,0,0.600533,-0.849768,0.160755,-1.147853,-0.10917,-0.170846
1,0,1.591101,0.141864,0,0,0.823963,1.166696,1,-1.47692,-0.849768,-1.348116,0.349971,-0.582762,-0.429507
2,0,-1.034705,0.141864,1,1,-0.717546,1.166696,0,0.600533,1.176791,-1.348116,-0.236134,-0.041514,-0.041516
3,1,-0.159436,-0.999477,1,0,-0.717546,1.166696,0,-0.784435,1.176791,0.160755,-0.236134,-0.312138,0.087815
4,1,-1.034705,-0.999477,0,0,0.823963,-0.335256,1,-0.784435,1.176791,0.160755,-3.817888,-3.086034,-2.951453


In [5]:
train_dataloader = NCMDataset(train_df, variables).get_dataloader()
test_dataloader = NCMDataset(test_df, variables).get_dataloader()

torch.manual_seed(0)

ncm = FF_NCM(create_expanded_sfm_graph(X, Z, W, Y))
optimizer = optim.Adam(ncm.parameters(), lr=1e-3)

# Train for a few epochs for quick testing
num_epochs = 3
train_ncm(ncm, train_dataloader, optimizer, 'cpu', num_epochs)

print_accuracy(X, ncm, train_dataloader, test_dataloader)
print_accuracy(Y, ncm, train_dataloader, test_dataloader)

Epoch 1/3, Loss: 0.1013
Epoch 2/3, Loss: 0.0724
Epoch 3/3, Loss: 0.0720
Final train accuracy for Gender: 0.8793
Final test accuracy  for Gender: 0.9016
Final train accuracy for MathScore: 0.8273
Final test accuracy  for MathScore: 0.7766


In [6]:
for z in Z:
    print_accuracy(z, ncm, train_dataloader, test_dataloader)

Final train accuracy for EthnicGroup: 0.8808
Final test accuracy  for EthnicGroup: 0.7161
Final train accuracy for ParentMaritalStatus: 0.5338
Final test accuracy  for ParentMaritalStatus: 0.6699
Final train accuracy for IsFirstChild: 0.7760
Final test accuracy  for IsFirstChild: 0.8160
Final train accuracy for NrSiblings: 0.7495
Final test accuracy  for NrSiblings: 0.6750
Final train accuracy for TransportMeans: 0.4647
Final test accuracy  for TransportMeans: 0.3100


In [7]:
for w in W:
    print_accuracy(w, ncm, train_dataloader, test_dataloader)

Final train accuracy for ParentEduc: 0.7563
Final test accuracy  for ParentEduc: 0.6410
Final train accuracy for LunchType: 0.7649
Final test accuracy  for LunchType: 0.9106
Final train accuracy for TestPrep: 0.9015
Final test accuracy  for TestPrep: 0.8824
Final train accuracy for PracticeSport: 0.6735
Final test accuracy  for PracticeSport: 0.7359
Final train accuracy for WklyStudyHours: 0.8587
Final test accuracy  for WklyStudyHours: 0.7159
Final train accuracy for ReadingScore: 0.7637
Final test accuracy  for ReadingScore: 0.6131
Final train accuracy for WritingScore: 0.6414
Final test accuracy  for WritingScore: 0.8438
