In [None]:
import os
import pickle
import numpy as np
import pandas as pd

# Predictors 
from debiased.predictors.decision_tree import DTClassifier
from debiased.predictors.logistic_regression import LogisticRegressionClassifier
from debiased.predictors.oulad_smotenn import SmoteENNRFBoostClassifier
from debiased.predictors.portugal_garf import GARFClassifier
from debiased.predictors.xuetangx_svc import StandardScalingSVCClassifier

# Pre-Processing
from debiased.mitigation.preprocessing.alabdulmohsin import AlabdulmohsinPreProcessor
from debiased.mitigation.preprocessing.calders import CaldersPreProcessor
from debiased.mitigation.preprocessing.chakraborty import ChakrabortyPreProcessor
from debiased.mitigation.preprocessing.cock import CockPreProcessor
from debiased.mitigation.preprocessing.dablain import DablainPreProcessor
from debiased.mitigation.preprocessing.iosifidis_resampledattribute import IosifidisResamplingAttributePreProcessor
from debiased.mitigation.preprocessing.iosifidis_resampletarget import IosifidisResamplingTargetPreProcessor
from debiased.mitigation.preprocessing.iosifidis_smoteattribute import IosifidisSmoteAttributePreProcessor
from debiased.mitigation.preprocessing.iosifidis_smotetarget import IosifidisSmoteTargetPreProcessor
from debiased.mitigation.preprocessing.lahoti import LahotiPreProcessor
from debiased.mitigation.preprocessing.li import LiPreProcessor
from debiased.mitigation.preprocessing.zelaya_over import ZelayaOverPreProcessor
from debiased.mitigation.preprocessing.zelaya_smote import ZelayaSMOTEPreProcessor
from debiased.mitigation.preprocessing.zelaya_over import ZelayaOverPreProcessor
from debiased.mitigation.preprocessing.zemel import ZemelPreProcessor

# In-Processing
from debiased.mitigation.inprocessing.chakraborty_in import ChakrabortyInProcessor
from debiased.mitigation.inprocessing.chen import ChenInProcessor
from debiased.mitigation.inprocessing.gao import GaoInProcessor
from debiased.mitigation.inprocessing.grari2 import Grari2InProcessor
from debiased.mitigation.inprocessing.islam import IslamInProcessor
from debiased.mitigation.inprocessing.kilbertus import KilbertusInProcessor
from debiased.mitigation.inprocessing.liu import LiuInProcessor
from debiased.mitigation.inprocessing.zafar import ZafarInProcessor

# Post Processing
from debiased.mitigation.postprocessing.kamiranpost import KamiranPostProcessor
from debiased.mitigation.postprocessing.pleiss import PleissPostProcessor
from debiased.mitigation.postprocessing.snel import SnelPostProcessor

# Scorer
from debiased.crossvalidation.scorers.binary_scorer import BinaryClfScorer
from debiased.crossvalidation.scorers.fairness_binary_scorer import BinaryFairnessScorer

2025-07-19 23:17:22.472586: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
seaborn not found, pip install seaborn to use plots functions
  warn_deprecated('vmap', 'torch.vmap')


# Data
Here is a script to transform the datasets we gave you into features, targets, and demographic lists. 
To load your own dataset, skip the following cell, and move on to the next one! 

In [2]:
# Read data
dataset_path = '../data/student-performance-math/data_dictionary.pkl'
with open(dataset_path, 'rb') as fp:
    dataset = pickle.load(fp)

# Format dataset
lids = [lid for lid in dataset['data']]
features = [
    dataset['data'][lid]['features'] for lid in lids
]
labels = [
    dataset['data'][lid]['binary_label'] for lid in lids
]
demographics = [
    {'sex': dataset['data'][lid]['sex'], 'famsize': dataset['data'][lid]['famsize']} for lid in lids
]

In [3]:
mitigating_attributes = 'sex.famsize'
mitigating_scores = ['sex', 'famsize']
discriminated = '_0_1._0_0'

To integrate your own dataset, create one list for the:
- features (one list per student)
- labels (one integer per student)
- demographics (one dictionary with all demographic attributes as values, and the demographic as key per student)

In [None]:
dataset_path = ''
dataset = # read function
features = [
    # one list per student
]
labels = [
    # one integer/float per student
]
demographics = [
    # one dictionary per student
]


# Predictors

**We have already implemented a few base predictors which were implemented for open-source educationnal datasets that you can use:**
- ```DTClassifier```
    - decision tree
- ```LogisticRegressionClassifier```
    - eedi
    - logistic regression
- ```SmoteENNRFBoostClassifier```
    - oulad
    - smote to rebalance the classes + RF
- ```GARFClassifier```
    - portuguese datasets
    - genetic algorithm for best set + RF
- ```StandardScalingSVCClassifier```
    - xuetangx
    - scaling followed by a classifier


  
**You can call the following functions on those 5 classifiers:**
- ```fit(x_train, y_train, xval (can be an empty list), yval (can be an empty list))```: trains the classifier
- ```predict(features)```: returns the predicted class

- ```predict_proba(features)```: returns the probability of the instance belonging to each class according to the classifier

## Train predictors

In [4]:
clf_dt = DTClassifier(max_depth=5)
clf_dt.fit(features[:275], labels[:275])

clf_lr = LogisticRegressionClassifier(penalty='l2', C=100, solver='liblinear')
clf_lr.fit(features[:275], labels[:275])

clf_smoterf = SmoteENNRFBoostClassifier(n_estimators=500, max_depth=15)
clf_smoterf.fit(features[:275], labels[:275])

clf_garf = GARFClassifier()
clf_garf.fit(features[:275], labels[:275])

clf_svc = StandardScalingSVCClassifier(kernel='linear', C=0.001)
clf_svc.fit(features[:275], labels[:275])



In [5]:
dt_pred = clf_dt.predict(features[275:])
lr_pred = clf_lr.predict(features[275:])
smoterf_pred = clf_smoterf.predict(features[275:])
garf_pred = clf_garf.predict(features[275:])
svc_pred = clf_svc.predict(features[275:])

dt_proba = clf_dt.predict_proba(features[275:])
lr_proba = clf_lr.predict_proba(features[275:])
smoterf_proba = clf_smoterf.predict_proba(features[275:])
garf_proba = clf_garf.predict_proba(features[275:])
svc_proba = clf_svc.predict_proba(features[275:])

You can add your own classifier here! 

For compatibility with: 
- *pre-processing* methods, your classifier needs to:
    - be able to receive the features and ground truths as input (as the usual convention)
- *in-processing* methods, your classifier needs to:
    - the in-processing predictor will fully replace your predictor
- *post-processing* methods, your classifier needs to:
    - output the raw predictions (probabilities) for each instance of the test set
    - output the predicted class for each instance of the test set (can be inferred from the raw predictions)
    - output the demographic attribute of each instance (this should normally be possible from the data, without passing through the predictor, unless your predictor re-arranges the order of its predictions)
    - the ground truth for each instance (this should normally be possible from the data, without passing through the predictor, unless your predictor re-arranges the order of its predictions)

In [None]:
your_predictor = xxx
your_predictor.fit(...)

## Scores

In [6]:
fairness = BinaryFairnessScorer(mitigating_scores, discriminated, np.unique(labels))
scores = fairness.get_fairness_scores(labels[275:], svc_pred, svc_proba, demographics[275:])
print('scores per group for the SVC:', scores['sex']['roc'])

scores per group for the SVC: {0: 0.6465028355387523, 1: 0.5846267553584626, 'all': 0.6118861607142857}


# Pre-processing

In [7]:
pre_processor = AlabdulmohsinPreProcessor(mitigating_attributes, discriminated)
new_x, new_y, new_demo = pre_processor.fit_transform(features, labels, demographics)

pre_processor = CaldersPreProcessor(mitigating_attributes, discriminated)
new_x, new_y, new_demo = pre_processor.fit_transform(features, labels, demographics)

# pre_processor = ChakrabortyPreProcessor(mitigating_attributes, discriminated)
# new_x, new_y, new_demo = pre_processor.fit_transform(features, labels, demographics)

# pre_processor = CockPreProcessor(mitigating_attributes, discriminated)
# new_x, new_y, new_demo = pre_processor.fit_transform(features, labels, demographics)

# pre_processor = ZelayaOverPreProcessor(mitigating_attributes, discriminated)
# new_x, new_y, new_demo = pre_processor.fit_transform(features, labels, demographics)

# pre_processor = DablainPreProcessor(mitigating_attributes, discriminated)
# new_x, new_y, new_demo = pre_processor.fit_transform(features, labels, demographics)

# pre_processor = ZelayaOverPreProcessor(mitigating_attributes, discriminated)
# new_x, new_y, new_demo = pre_processor.fit_transform(features, labels, demographics)

# pre_processor = IosifidisResamplingAttributePreProcessor(mitigating_attributes, discriminated)
# new_x, new_y, new_demo = pre_processor.fit_transform(features, labels, demographics)

# pre_processor = ZelayaOverPreProcessor(mitigating_attributes, discriminated)
# new_x, new_y, new_demo = pre_processor.fit_transform(features, labels, demographics)

# pre_processor = IosifidisResamplingTargetPreProcessor(mitigating_attributes, discriminated)
# new_x, new_y, new_demo = pre_processor.fit_transform(features, labels, demographics)

# pre_processor = IosifidisSmoteAttributePreProcessor(mitigating_attributes, discriminated)
# new_x, new_y, new_demo = pre_processor.fit_transform(features, labels, demographics)

# pre_processor = IosifidisSmoteTargetPreProcessor(mitigating_attributes, discriminated)
# new_x, new_y, new_demo = pre_processor.fit_transform(features, labels, demographics)

# pre_processor = LahotiPreProcessor(mitigating_attributes, discriminated)
# new_x, new_y, new_demo = pre_processor.fit_transform(features, labels, demographics)

# pre_processor = IosifidisSmoteTargetPreProcessor(mitigating_attributes, discriminated)
# new_x, new_y, new_demo = pre_processor.fit_transform(features, labels, demographics)

# pre_processor = LahotiPreProcessor(mitigating_attributes, discriminated)
# new_x, new_y, new_demo = pre_processor.fit_transform(features, labels, demographics)

# pre_processor = LiPreProcessor(mitigating_attributes, discriminated)
# new_x, new_y, new_demo = pre_processor.fit_transform(features, labels, demographics)

# pre_processor = ZelayaOverPreProcessor(mitigating_attributes, discriminated)
# new_x, new_y, new_demo = pre_processor.fit_transform(features, labels, demographics)

# pre_processor = ZelayaSMOTEPreProcessor(mitigating_attributes, discriminated)
# new_x, new_y, new_demo = pre_processor.fit_transform(features, labels, demographics)

# pre_processor = ZemelPreProcessor(mitigating_attributes, discriminated)
# new_x, new_y, new_demo = pre_processor.fit_transform(features, labels, demographics)

ADMM Iteration 0: 	  0050100%
primal residual:  1.5837910935312212
dual residual:  17.287620977652185 

395 instances were massaged! (100.0%)


## Scores

In [8]:
pre_processor = CaldersPreProcessor(mitigating_attributes, discriminated)
new_x, new_y, new_demo = pre_processor.fit_transform(features, labels, demographics)

# clf_svc = StandardScalingSVCClassifier(kernel='linear', C=0.001)
clf_svc = GARFClassifier()
_ = clf_svc.fit(new_x[:275], new_y[:275])
pre_svc_pred = clf_svc.predict(features[275:])
pre_svc_proba = clf_svc.predict_proba(features[275:])

fairness = BinaryFairnessScorer(mitigating_scores, discriminated, np.unique(labels))
pre_scores = fairness.get_fairness_scores(labels[275:], pre_svc_pred, pre_svc_proba, demographics[275:])
print('scores per group for the SVC:', scores['sex']['roc'])
print('scores per group for the preprocessed with Calders SVC:', pre_scores['sex']['roc'])

pre_processor = AlabdulmohsinPreProcessor(mitigating_attributes, discriminated)
new_x, new_y, new_demo = pre_processor.fit_transform(features, labels, demographics)

# clf_svc = StandardScalingSVCClassifier(kernel='linear', C=0.001)
clf_svc = GARFClassifier()
_ = clf_svc.fit(new_x[:275], new_y[:275])
pre_svc_pred = clf_svc.predict(features[275:])
pre_svc_proba = clf_svc.predict_proba(features[275:])

fairness = BinaryFairnessScorer(mitigating_scores, discriminated, np.unique(labels))
prealabd_scores = fairness.get_fairness_scores(labels[275:], pre_svc_pred, pre_svc_proba, demographics[275:])
print('scores per group for the pre-processed with Alabdulmohsin SVC:', prealabd_scores['sex']['roc'])

scores per group for the SVC: {0: 0.6465028355387523, 1: 0.5846267553584626, 'all': 0.6118861607142857}
scores per group for the preprocessed with Calders SVC: {0: 0.717391304347826, 1: 0.6921655580192165, 'all': 0.7004743303571429}
ADMM Iteration 0: 	  0050100%
primal residual:  1.5703455127470975
dual residual:  17.28701487350002 

395 instances were massaged! (100.0%)
scores per group for the pre-processed with Alabdulmohsin SVC: {0: 0.6370510396975425, 1: 0.5809312638580931, 'all': 0.6018415178571429}


# In processing

In [9]:
in_processor = ChakrabortyInProcessor(mitigating_attributes, discriminated)
in_processor.fit(features[:275], labels[:275], demographics[:275])

# in_processor = ChenInProcessor(mitigating_attributes, discriminated)
# in_processor.fit(features[:275], labels[:275], demographics[:275])

# in_processor = GaoInProcessor(mitigating_attributes, discriminated)
# in_processor.fit(features[:275], labels[:275], demographics[:275])

# in_processor = Grari2InProcessor(mitigating_attributes, discriminated)
# in_processor.fit(features[:275], labels[:275], demographics[:275])

# in_processor = IslamInProcessor(mitigating_attributes, discriminated)
# in_processor.fit(features[:275], labels[:275], demographics[:275])

# in_processor = KilbertusInProcessor(mitigating_attributes, discriminated)
# in_processor.fit(features[:275], labels[:275], demographics[:275])

# in_processor = LiuInProcessor(mitigating_attributes, discriminated)
# in_processor.fit(features[:275], labels[:275], demographics[:275])

# in_processor = ZafarInProcessor(mitigating_attributes, discriminated)
# in_processor.fit(features[:275], labels[:275], demographics[:275])



In [10]:
in_pred, _ = in_processor.predict(features[275:], labels[275:], demographics[275:])
in_proba = in_processor.predict_proba(features[275:], demographics[275:])

fairness = BinaryFairnessScorer(mitigating_scores, discriminated, np.unique(labels))
in_scores = fairness.get_fairness_scores(labels[275:], in_pred, in_proba, demographics[275:])
print('scores per group for the SVC:', scores['sex']['roc'])
print('scores per group for the in-processed with Chakraborty:', pre_scores['sex']['roc'])


scores per group for the SVC: {0: 0.6465028355387523, 1: 0.5846267553584626, 'all': 0.6118861607142857}
scores per group for the in-processed with Chakraborty: {0: 0.717391304347826, 1: 0.6921655580192165, 'all': 0.7004743303571429}


# Post-Processing

In [11]:
# clf_svc = StandardScalingSVCClassifier(kernel='linear', C=0.001)
clf_svc = GARFClassifier()
_ = clf_svc.fit(new_x[:275], new_y[:275])
pre_svc_pred = clf_svc.predict(features[275:])
pre_svc_proba = clf_svc.predict_proba(features[275:])
post_processor = KamiranPostProcessor(mitigating_attributes, discriminated)
post_predictions, post_probas = post_processor.fit_transform(
    clf_dt, features[275:], labels[275:], pre_svc_pred, pre_svc_proba, demographics[275:]
)


fairness = BinaryFairnessScorer(mitigating_scores, discriminated, np.unique(labels))
pre_scores = fairness.get_fairness_scores(labels[275:], post_predictions, post_probas, demographics[275:])
print('scores per group for the SVC:', scores['sex']['roc'])
print('scores per group for the postprocessed from SVC to kamiran:', pre_scores['sex']['roc'])

scores per group for the SVC: {0: 0.6465028355387523, 1: 0.5846267553584626, 'all': 0.6118861607142857}
scores per group for the postprocessed from SVC to kamiran: {0: 0.553875236294896, 1: 0.5384331116038432, 'all': 0.5491071428571429}


In [None]:
# post_processor = KamiranPostProcessor(mitigating_attributes, discriminated)
# post_predictions, post_probas = post_processor.fit_transform(
#     clf_dt, features[275:], labels[275:], dt_pred, dt_probas, demographics[275:]
# )

# post_processor = PleissPostProcessor(mitigating_attributes, discriminated)
# post_predictions, post_probas = post_processor.fit_transform(
#     #clf_dt, features[275:], labels[275:], dt_pred, dt_probas, demographics[275:]
# )

# post_processor = SnelPostProcessor(mitigating_attributes, discriminated)
# post_predictions, post_probas = post_processor.fit_transform(
#     clf_dt, features[275:], labels[275:], dt_pred, dt_probas, demographics[275:]
# )