In [32]:
import sys
print("Chemin de l'exécutable Python :", sys.executable)
print("Version de Python :", sys.version)

Chemin de l'exécutable Python : c:\Users\micha\AppData\Local\Programs\Python\Python311\python.exe
Version de Python : 3.11.1 (tags/v3.11.1:a7a450f, Dec  6 2022, 19:58:39) [MSC v.1934 64 bit (AMD64)]


In [38]:
%matplotlib inline
# Load all necessary packages
# import sys
# sys.path.append("../")
import numpy as np
from tqdm import tqdm

from aif360.datasets import BinaryLabelDataset
from aif360.datasets import AdultDataset, GermanDataset, CompasDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.metrics import ClassificationMetric
from aif360.algorithms.preprocessing.reweighing import Reweighing
from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions\
        import load_preproc_data_adult, load_preproc_data_german, load_preproc_data_compas
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
import numpy as np
from IPython.display import Markdown, display
import matplotlib.pyplot as plt

1. - Preprocess the data
- binarize Age. 
- Split the data into train, validation, test sets
- train a classifier
- Measure the performance of the classifier on the test set. 

In [None]:
import random
random_seed = 42
random.seed(random_seed)  
np.random.seed(random_seed)  
# A -------------------------------- Import dataaset and binarize age ----------------------------------------------

# Import dataset
dataset_orig = AdultDataset()

# Convert to dataframe to visualize it
data, _ = dataset_orig.convert_to_dataframe()

# Copy only to binarize age (Threshold = 40 years)
binarised = data.copy()
binarised['age'] = binarised['age'].apply(lambda x: 0 if x <= 40 else 1) # 1 if greater than 40

# B ----------------------------- Split into train, validation and test --------------------------------------------

# Separate into train (70%) and validation+test (30%)

dataset_orig_train, dataset_orig_vt = dataset_orig.split([0.7], shuffle=True)
# Separate into validation and test

dataset_orig_valid, dataset_orig_test = dataset_orig_vt.split([0.5], shuffle=True)
# C ----------------------------- Train a classifier --------------------------------------------

# Define first the target and our features (Target is income-per-year (1 --> >50 k))

# 1. Normalise training data
scale_orig = StandardScaler()
X_train = scale_orig.fit_transform(dataset_orig_train.features)  # Features
y_train = dataset_orig_train.labels.ravel()  # Target (income-per-year)
w_train = dataset_orig_train.instance_weights.ravel()  

# 2. Train the classifier
the_classifier = LogisticRegression()
the_classifier.fit(X_train, y_train, sample_weight=w_train) 

# 3. Predictions
y_train_pred = the_classifier.predict(X_train)  # Predict labels on training set

# Positive class (Here, >50K)
pos_ind = np.where(the_classifier.classes_ == dataset_orig_train.favorable_label)[0][0] 

dataset_orig_train_pred = dataset_orig_train.copy()
dataset_orig_train_pred.labels = y_train_pred  # Replace labels by the predictions

# D ----------------------------- Performances on the test set --------------------------------------------
dataset_orig_test_pred = dataset_orig_test.copy(deepcopy=True)

# Normalise as in the training and extract labels of test set
X_test = scale_orig.transform(dataset_orig_test_pred.features)
y_test = dataset_orig_test_pred.labels

# Predict probability for class
dataset_orig_test_pred.scores = the_classifier.predict_proba(X_test)[:, pos_ind].reshape(-1, 1)

print("Test set scores (first 10 predictions):")
print(dataset_orig_test_pred.scores[:10])

from sklearn.metrics import accuracy_score, precision_score, recall_score

# Convert scores to binary predictions (threshold = 0.5)
y_test_pred = (dataset_orig_test_pred.scores > 0.5).astype(int)

# Compute main metrics
print(f"Test Accuracy: {accuracy_score(y_test, y_test_pred)}")
print(f"Test Precision: {precision_score(y_test, y_test_pred)}")
print(f"Test Recall: {recall_score(y_test, y_test_pred)}")




Test set scores (first 10 predictions):
[[0.05110014]
 [0.01196516]
 [0.05237648]
 [0.64457542]
 [0.61894245]
 [0.48253575]
 [0.01588735]
 [0.00462869]
 [0.00106118]
 [0.26590193]]
Test Accuracy: 0.8438974056603774
Test Precision: 0.7225392296718973
Test Recall: 0.6019013666072489


2. - Assess the group fairness of the classifier, assuming the protected attributes are Age, Sex. 
- apply a technique to ensure the classifier is fair.

In [None]:
random_seed = 42
random.seed(random_seed) 
np.random.seed(random_seed)

# A -------- Assess the group fairness of the classifier, assuming the protected attributes are Age, Sex ------------ 

privileged_groups = [{'sex': 1}]  # Men are priviledged
unprivileged_groups = [{'sex': 0}]  

test_predicted = dataset_orig_test.copy()
test_predicted.labels = y_test_pred  # Prédictions du classifier

print("\nInstance weights before reweighting:")
print(test_predicted.instance_weights[:10])  
# Compute fairness of the classifier (test set)
metric_test_classifier = BinaryLabelDatasetMetric(test_predicted, 
                                                  unprivileged_groups=unprivileged_groups,
                                                  privileged_groups=privileged_groups)

display(Markdown("#### Test set fairness metrics (classifier):"))
print("Statistical Parity Difference (classifier):", metric_test_classifier.statistical_parity_difference())

# B ---------------- apply a technique to ensure the classifier is fair  ----------------------------------------------

RW = Reweighing(unprivileged_groups=unprivileged_groups, 
                privileged_groups=privileged_groups)
dataset_train_transf = RW.fit_transform(dataset_orig_train)

print("\nInstance weights after reweighting:")
print(dataset_train_transf.instance_weights[:10])  

metric_transf_train = BinaryLabelDatasetMetric(dataset_train_transf, 
                                               unprivileged_groups=unprivileged_groups,
                                               privileged_groups=privileged_groups)
print("\n")
display(Markdown("#### Transformed training dataset"))
print("Difference in mean outcomes between unprivileged and privileged groups (Sex):")
print(metric_transf_train.statistical_parity_difference())

# C ------------------------- Train our 'fair classifier' on the new dataset ----------------------------------------------
# Normalise transformed data
X_train_rw = scale_orig.fit_transform(dataset_train_transf.features)  # Features corrigées
y_train_rw = dataset_train_transf.labels.ravel()  # Labels corrigés
w_train_rw = dataset_train_transf.instance_weights.ravel()  # Poids ajustés

# Train model again with transformed data
fair_classifier = LogisticRegression(random_state=random_seed)
fair_classifier.fit(X_train_rw, y_train_rw, sample_weight=w_train_rw)

# Predictions 
y_train_pred_fair = fair_classifier.predict(X_train_rw)

dataset_train_transf_pred = dataset_train_transf.copy()
dataset_train_transf_pred.labels = y_train_pred_fair

# D ------------------------------- Performances on test set (Fair classifier) --------------------------------------
X_test_rw = scale_orig.transform(dataset_orig_test.features)
y_test = dataset_orig_test.labels

# Predictions with the fair classifier
dataset_orig_test_pred_fair = dataset_orig_test.copy()
y_test_pred_fair = fair_classifier.predict(X_test_rw)

dataset_orig_test_pred_fair.scores = fair_classifier.predict_proba(X_test_rw)[:, pos_ind].reshape(-1, 1)

# Display scores for 10 first instances
print("\nTest set scores (first 10 predictions) for the fair classifier:")
print(dataset_orig_test_pred_fair.scores[:10])

y_test_pred_fair_bin = (dataset_orig_test_pred_fair.scores > 0.5).astype(int)

# Main metrics
print("Main classifier :\n")
print(f"Test Accuracy: {accuracy_score(y_test, y_test_pred)}")
print(f"Test Precision: {precision_score(y_test, y_test_pred)}")
print(f"Test Recall: {recall_score(y_test, y_test_pred)}")
print("\nFair classifier :\n")
print(f"Fair Classifier - Test Accuracy: {accuracy_score(y_test, y_test_pred_fair_bin)}")
print(f"Fair Classifier - Test Precision: {precision_score(y_test, y_test_pred_fair_bin)}")
print(f"Fair Classifier - Test Recall: {recall_score(y_test, y_test_pred_fair_bin)}")



Instance weights before reweighting:
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]


#### Test set fairness metrics (classifier):

Statistical Parity Difference (classifier): -0.17871986305566923

Instance weights after reweighting:
[0.8457202  1.09594426 0.78982139 1.09594426 1.09594426 0.78982139
 0.78982139 1.09594426 0.8457202  1.09594426]




#### Transformed training dataset

Difference in mean outcomes between unprivileged and privileged groups (Sex):
5.551115123125783e-17

Test set scores (first 10 predictions) for the fair classifier:
[[0.0344237 ]
 [0.00917653]
 [0.12897345]
 [0.56631903]
 [0.54080346]
 [0.41756688]
 [0.03856527]
 [0.00350017]
 [0.0005973 ]
 [0.20684067]]
Main classifier :

Test Accuracy: 0.8438974056603774
Test Precision: 0.7225392296718973
Test Recall: 0.6019013666072489

Fair classifier :

Fair Classifier - Test Accuracy: 0.8365271226415094
Fair Classifier - Test Precision: 0.7235202492211839
Fair Classifier - Test Recall: 0.5519904931669638
