# Training the adverserial debiaser



In [1]:
# downloading the packages
!pip install aif360
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.datasets import StandardDataset
from aif360.algorithms.preprocessing import DisparateImpactRemover
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import pandas as pd
import numpy as np
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.metrics import ClassificationMetric
from sklearn.metrics import precision_score, recall_score, f1_score
from google.colab import drive
from aif360.algorithms.inprocessing import AdversarialDebiasing
from sklearn.preprocessing import StandardScaler
from aif360.algorithms.inprocessing import AdversarialDebiasing
import tensorflow.compat.v1 as tf
import joblib
import cloudpickle
import pickle
tf.disable_v2_behavior()

# mount collab to drive 
drive.mount("/content/drive")
%cd '/content/drive/My Drive/Proj 1'

# reading dataset
data = pd.read_csv('biased_admit_dataset.csv')
data=data[["x1", "x2",'p',"g","admitted"]] # changed
data

# converting to AIF360's dataset
dataset = StandardDataset(data,protected_attribute_names=['g'],
    privileged_classes=[['A']], categorical_features=["x1","x2"],
    features_to_keep=['x1','x2','p'],label_name="admitted", favorable_classes=[1]) # changed

# Get the dataset and split into train and test
dataset_orig_train, dataset_orig_test = dataset.split([0.7], shuffle=True)
# scale the binary features
scale_orig = StandardScaler()
dataset_orig_train.features = scale_orig.fit_transform(dataset_orig_train.features)
dataset_orig_test.features = scale_orig.transform(dataset_orig_test.features)
# Metric for the original dataset
privileged_groups = [{'g': "A"}]
unprivileged_groups = [{'g':"D"}]

# definig the train and test
metric_orig_train = BinaryLabelDatasetMetric(dataset_orig_train, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)
metric_orig_test = BinaryLabelDatasetMetric(dataset_orig_test, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)


# Create a TensorFlow session
sess = tf.Session()

# Train an adversarial debiasing model
adversarial_debiasing = AdversarialDebiasing(unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups,
                                             scope_name='adversary',
                                             sess=sess,
                                             adversary_loss_weight=0.4,
                                             batch_size=256
                                             )
adversarial_debiasing = adversarial_debiasing.fit(dataset_orig_train) # save this

# Transform training data and align features
dataset_transf_train = adversarial_debiasing.predict(dataset_orig_train)
dataset_transf_test = adversarial_debiasing.predict(dataset_orig_test)



Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


pip install 'aif360[LawSchoolGPA]'
pip install 'aif360[Reductions]'
pip install 'aif360[Reductions]'
pip install 'aif360[Reductions]'
Instructions for updating:
non-resource variables are not supported in the long term


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/My Drive/Proj 1


  group_cond = np.logical_and(group_cond, X[:, index] == val)
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


epoch 0; iter: 0; batch classifier loss: 0.649110; batch adversarial loss: 0.767680
epoch 1; iter: 0; batch classifier loss: 0.390120; batch adversarial loss: 0.809895
epoch 2; iter: 0; batch classifier loss: 0.405202; batch adversarial loss: 0.905052
epoch 3; iter: 0; batch classifier loss: 0.543127; batch adversarial loss: 0.973929
epoch 4; iter: 0; batch classifier loss: 0.708691; batch adversarial loss: 1.003316
epoch 5; iter: 0; batch classifier loss: 0.723572; batch adversarial loss: 0.953539
epoch 6; iter: 0; batch classifier loss: 0.765089; batch adversarial loss: 0.936528
epoch 7; iter: 0; batch classifier loss: 0.868509; batch adversarial loss: 0.945619
epoch 8; iter: 0; batch classifier loss: 0.916315; batch adversarial loss: 0.912159
epoch 9; iter: 0; batch classifier loss: 0.835036; batch adversarial loss: 0.903934
epoch 10; iter: 0; batch classifier loss: 0.676049; batch adversarial loss: 0.848512
epoch 11; iter: 0; batch classifier loss: 0.710928; batch adversarial loss:

# Converting back to dataframe

In [6]:
df_train, y=(dataset_transf_train.convert_to_dataframe())
df_test, y=(dataset_transf_test.convert_to_dataframe())

# Training the logistic regression

In [7]:

from sklearn.linear_model import LogisticRegression

# Fit a logistic regression model to the original training data
lr_orig = LogisticRegression()
lr_orig.fit(dataset_orig_train.features, dataset_orig_train.labels.ravel())

y_pred_orig = lr_orig.predict(dataset_orig_test.features)
score_function_values_original=lr_orig.decision_function(dataset_orig_test.features)


# Fit a logistic regression model to the transformed training data
lr_transf = LogisticRegression()
lr_transf.fit(dataset_transf_train.features, dataset_transf_train.labels.ravel()) # save this.
# Make predictions on the transformed test data
y_pred_transf = lr_transf.predict(dataset_transf_test.features)  
score_function_values_trans=lr_transf.decision_function(dataset_transf_test.features)

# save the logistic regression weights. 

with open('lr_transf.pkl', 'wb') as f:
    pickle.dump(lr_transf, f)

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Compute metrics for the original model
acc_orig = accuracy_score(dataset_orig_test.labels.ravel(), y_pred_orig)
prec_orig = precision_score(dataset_orig_test.labels.ravel(), y_pred_orig)
rec_orig = recall_score(dataset_orig_test.labels.ravel(), y_pred_orig)
f1_orig = f1_score(dataset_orig_test.labels.ravel(), y_pred_orig)

# Compute metrics for the transformed model
acc_transf = accuracy_score(dataset_transf_test.labels.ravel(), y_pred_transf)
prec_transf = precision_score(dataset_transf_test.labels.ravel(), y_pred_transf)
rec_transf = recall_score(dataset_transf_test.labels.ravel(), y_pred_transf)
f1_transf = f1_score(dataset_transf_test.labels.ravel(), y_pred_transf)

# Print the computed metrics
print("Original model:")
print("Accuracy: {:.4f}, Precision: {:.4f}, Recall: {:.4f}, F1-score: {:.4f}".format(acc_orig, prec_orig, rec_orig, f1_orig))
print("\nTransformed model:")
print("Accuracy: {:.4f}, Precision: {:.4f}, Recall: {:.4f}, F1-score: {:.4f}".format(acc_transf, prec_transf, rec_transf, f1_transf))

Original model:
Accuracy: 0.9077, Precision: 0.9412, Recall: 0.7298, F1-score: 0.8221

Transformed model:
Accuracy: 0.9957, Precision: 1.0000, Recall: 0.9207, F1-score: 0.9587


# Calculating statistical parity

In [8]:
df_test["predicted"]=y_pred_transf
def calculate_statistical_parity(df):
  # calculate proportion of positive outcomes for sensitive group
  priviledge_group = df[df['g'] == 1]
  priviledge_prop = priviledge_group['predicted'].mean()

  # calculate proportion of positive outcomes for non-sensitive group
  non_priviledge_group = df[df['g'] == 0]
  non_priviledge_prop = non_priviledge_group['predicted'].mean()

  # calculate difference between the two proportions
  statistical_parity = priviledge_prop - non_priviledge_prop

  return('Statistical parity: {}'.format (statistical_parity))

print("After adverserial debiasing,", calculate_statistical_parity(df_test))

df_test_org, y=(dataset_orig_test.convert_to_dataframe())
df_test_org["predicted"]=list(y_pred_orig)
print("Before adverserial debiasing", calculate_statistical_parity(df_test_org))

After adverserial debiasing, Statistical parity: -0.09266666666666666
Before adverserial debiasing Statistical parity: 0.3506666666666667


# calculating equalized odds

In [16]:
from sklearn.metrics import confusion_matrix
def fnr_and_fpr(df):
  y_true,y_pred=df["admitted"],df["predicted"]
  # Assuming y_true and y_pred are arrays of true and predicted labels, respectively
  cm = confusion_matrix(y_true, y_pred)
  # Extract the TN, FP, FN, TP values from the confusion matrix
  tn = cm[0, 0]
  fp = cm[0, 1]
  fn = cm[1, 0]
  tp = cm[1, 1]
  tpr = tp / (tp + fn)
  fpr = fp / (fp + tn)
  return("TPR {}, FPR {}".format(tpr, fpr))

priviledge_orginal=df_test_org[df_test_org['g'] == 1]
non_priviledge_original=df_test_org[df_test_org['g'] == 0]
print("BEFORE: priviledge: {}, non-prvilege: {}".format(fnr_and_fpr(priviledge_orginal),fnr_and_fpr(non_priviledge_original)))

priviledge_trans=df_test[df_test['g'] == 1]
non_priviledge_trans=df_test[df_test['g'] == 0]
print("AFTER: priviledge: {}, non-prvilege: {}".format(fnr_and_fpr(non_priviledge_trans), fnr_and_fpr(priviledge_trans)))

BEFORE: priviledge: TPR 0.8141210374639769, FPR 0.04714640198511166, non-prvilege: TPR 0.4098360655737705, FPR 0.0015186028853454822
AFTER: priviledge: TPR 0.9294871794871795, FPR 0.0, non-prvilege: TPR 0.75, FPR 0.0


In [19]:
# getting the differences to be the equalized odds

## TPR 
### Before debiasing 
print("TPR diff Before debiasing",(0.8141210374639769-0.4098360655737705))
### After debiasing
print("TPR diff after debiasing",(0.9294871794871795-0.75))


## FPR
## Before debiasing
print("FPR diff before debiasing",(0.04714640198511166-0.0015186028853454822))
## After debiasing
print("FPR diff after debiasing",(0-0))


TPR diff Before debiasing 0.4042849718902064
TPR diff after debiasing 0.17948717948717952
FPR diff before debiasing 0.04562779909976618
FPR diff after debiasing 0
