In [1]:
from src.scm.mlp import MLP
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
import kagglehub

# Download latest version
path = kagglehub.dataset_download("adilshamim8/student-performance-and-learning-style")

print("Path to dataset files:", path)
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

  from .autonotebook import tqdm as notebook_tqdm


Path to dataset files: /Users/Hanita/.cache/kagglehub/datasets/adilshamim8/student-performance-and-learning-style/versions/1


In [2]:
for filename in os.listdir(path):
    if filename.endswith(".csv"):  # Check if it's a CSV file
        filepath = os.path.join(path, filename)
        try:
            df_spls = pd.read_csv(filepath)
            print(f"Successfully read {filename} into a pandas DataFrame:")
            # print(df.head()) # Display first few rows of the dataframe
            break # Stop after reading the first CSV file found
        except Exception as e:
            print(f"Error reading {filename}: {e}")

# If no CSV file is found
else:
    print("No CSV files found in the specified directory.")

Successfully read student_performance_large_dataset.csv into a pandas DataFrame:


In [3]:
df_spls['Gender'].isna().sum()

df_spls.Final_Grade.value_counts()

df_spls['Gender'] = df_spls['Gender'].map({'Male': 0, 'Female': 1, 'Other': 2, np.nan: 3})
df_spls['Final_Grade'] = df_spls['Final_Grade'].map({'A': 0, 'B': 1, 'C': 2, 'D': 3, 'F': 4, np.nan: 5})
# One‐hot Preferred_Learning_Style
df = pd.get_dummies(df_spls, 
                    columns=['Preferred_Learning_Style', 
                             'Participation_in_Discussions', 
                             'Use_of_Educational_Tech', 
                             'Self_Reported_Stress_Level'], 
                    drop_first=True, dtype=int)

In [4]:
df.head(10)

Unnamed: 0,Student_ID,Age,Gender,Study_Hours_per_Week,Online_Courses_Completed,Assignment_Completion_Rate (%),Exam_Score (%),Attendance_Rate (%),Time_Spent_on_Social_Media (hours/week),Sleep_Hours_per_Night,Final_Grade,Preferred_Learning_Style_Kinesthetic,Preferred_Learning_Style_Reading/Writing,Preferred_Learning_Style_Visual,Participation_in_Discussions_Yes,Use_of_Educational_Tech_Yes,Self_Reported_Stress_Level_Low,Self_Reported_Stress_Level_Medium
0,S00001,18,1,48,14,100,69,66,9,8,2,1,0,0,1,1,0,0
1,S00002,29,1,30,20,71,40,57,28,8,3,0,1,0,0,1,0,1
2,S00003,20,1,47,11,60,43,79,13,7,3,1,0,0,0,1,1,0
3,S00004,23,1,13,0,63,70,60,24,10,1,0,0,0,1,1,1,0
4,S00005,19,1,24,19,59,63,93,26,8,2,0,0,0,1,1,0,1
5,S00006,28,1,26,5,63,54,80,25,8,3,1,0,0,1,0,0,0
6,S00007,19,1,49,13,91,44,66,30,10,3,0,1,0,1,1,1,0
7,S00008,27,0,14,5,88,56,76,4,6,2,0,1,0,1,1,1,0
8,S00009,22,0,45,16,52,78,70,26,9,1,0,0,1,0,0,1,0
9,S00010,28,2,35,7,100,55,100,5,9,2,0,0,0,0,0,0,1


In [5]:
X_cols = ['Gender']
Z_cols = ['Age']
W_cols = [
    'Study_Hours_per_Week',
    # all the one‐hot learning‐style cols +...
    *[c for c in df.columns if c.startswith('Preferred_Learning_Style_')],

    'Online_Courses_Completed',

    *[c for c in df.columns if c.startswith('Participation_in_Discussions')],

    'Assignment_Completion_Rate (%)',
    'Attendance_Rate (%)',

    *[c for c in df.columns if c.startswith('Use_of_Educational_Tech')],
    *[c for c in df.columns if c.startswith('Self_Reported_Stress_Level')],
    'Time_Spent_on_Social_Media (hours/week)',
    'Sleep_Hours_per_Night'
]

Y_cols = ['Final_Grade']

In [6]:
from src.causal_graph import *

sfm = create_expanded_sfm('X',Z_cols,W_cols,'Y')

In [7]:
from src.scm.feedforward_ncm import FF_NCM
from src.scm.distribution import *

ncm = FF_NCM(sfm)
print(ncm.f)

ModuleDict(
  (Age): MLP(
    (nn): Sequential(
      (0): Linear(in_features=1, out_features=128, bias=True)
      (1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
      (2): ReLU()
      (3): Linear(in_features=128, out_features=128, bias=True)
      (4): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
      (5): ReLU()
      (6): Linear(in_features=128, out_features=1, bias=True)
      (7): Sigmoid()
    )
  )
  (X): MLP(
    (nn): Sequential(
      (0): Linear(in_features=1, out_features=128, bias=True)
      (1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
      (2): ReLU()
      (3): Linear(in_features=128, out_features=128, bias=True)
      (4): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
      (5): ReLU()
      (6): Linear(in_features=128, out_features=1, bias=True)
      (7): Sigmoid()
    )
  )
  (Use_of_Educational_Tech_Yes): MLP(
    (nn): Sequential(
      (0): Linear(in_features=3, out_features=128, bias=True)
      (1): LayerNorm((128,