In [28]:
#Import necessary libraries
!pip install aif360
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import OneHotEncoder, LabelEncoder





Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


we first load the census income dataset and encode the target variable 'income' into 'y' with binary values 0 and 1. We also encode the sensitive attribute 'gender' into 's' with binary values 0 and 1 for female and male, respectively. We then split the data into training and testing sets and fit a logistic regression model using the features 'age', 'fnlwgt', and 'education-num'. We evaluate the model's accuracy and fairness with respect to gender using the EO and DI metrics.

In [21]:
#Load income dataset

df = pd.read_csv('/content/adult.csv')

In [22]:

print(df.columns)

Index(['age', 'workclass', 'fnlwgt', 'education', 'educational-num',
       'marital-status', 'occupation', 'relationship', 'race', 'gender',
       'capital-gain', 'capital-loss', 'hours-per-week', 'native-country',
       'income'],
      dtype='object')


In [26]:
df.head(8)

Unnamed: 0,age,workclass,fnlwgt,education,educational-num,marital-status,occupation,relationship,race,gender,capital-gain,capital-loss,hours-per-week,native-country,income
0,25,Private,226802,11th,7,Never-married,Machine-op-inspct,Own-child,Black,Male,0,0,40,United-States,<=50K
1,38,Private,89814,HS-grad,9,Married-civ-spouse,Farming-fishing,Husband,White,Male,0,0,50,United-States,<=50K
2,28,Local-gov,336951,Assoc-acdm,12,Married-civ-spouse,Protective-serv,Husband,White,Male,0,0,40,United-States,>50K
3,44,Private,160323,Some-college,10,Married-civ-spouse,Machine-op-inspct,Husband,Black,Male,7688,0,40,United-States,>50K
4,18,?,103497,Some-college,10,Never-married,?,Own-child,White,Female,0,0,30,United-States,<=50K
5,34,Private,198693,10th,6,Never-married,Other-service,Not-in-family,White,Male,0,0,30,United-States,<=50K
6,29,?,227026,HS-grad,9,Never-married,?,Unmarried,Black,Male,0,0,40,United-States,<=50K
7,63,Self-emp-not-inc,104626,Prof-school,15,Married-civ-spouse,Prof-specialty,Husband,White,Male,3103,0,32,United-States,>50K


In [9]:
print("Shape before preprocessing:", df.shape)

Shape before preprocessing: (48842, 15)


In [27]:
# 2. Preprocess the dataset
# Encode categorical variables
#!pip install scikit-learn


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [30]:
onehotenc = OneHotEncoder()

In [32]:
#Extract feature columns (X)
X = df[['age', 'workclass', 'fnlwgt', 'education', 'educational-num',
       'marital-status', 'occupation', 'relationship', 'race','gender',
       'capital-gain', 'capital-loss', 'hours-per-week', 'native-country']]

In [33]:
#Extract target column y from the dataset
y = df['income']

In [34]:
#Split the dataset into training and testing sets (80% training, 20% testing) using train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Remove the "gender" column from the training and testing sets to evaluate fairness 
X_train_no_gender = X_train.drop("gender",axis=1)
X_test_no_gender = X_test.drop("gender",axis=1)

In [35]:
#Encode the target variable (income) using LabelEncoder
le = LabelEncoder()
le.fit(y_train)
y_train_encodeur = le.transform(y_train)
y_test_encodeur = le.transform(y_test)

In [36]:
print(y_test_encodeur.shape)

(9769,)


In [43]:
# Fit the onehotencoder to the training dataset
onehotenc.fit(X.drop("gender",axis=1))

In [38]:
X_train_encodeur = onehotenc.transform(X_train_no_gender)

In [39]:
X_test_encodeur = onehotenc.transform(X_test_no_gender)

In [40]:
#Create a logistic regression model and fit it to the training data
model_utilisé = LogisticRegression(random_state=0,max_iter=1000)


In [41]:
model_utilisé.fit(X_train_encodeur,y_train_encodeur)

In [88]:
# 4. Make predictions and evaluate the model
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix







# Print a classification report
print("\nClassification Report:\n", classification_report(y_test_encodeur, y_pred))

# Print a confusion matrix
print("\nConfusion Matrix:\n", confusion_matrix(y_test_encodeur, y_pred))



Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.95      0.91      7479
           1       0.77      0.59      0.66      2290

    accuracy                           0.86      9769
   macro avg       0.83      0.77      0.79      9769
weighted avg       0.86      0.86      0.85      9769


Confusion Matrix:
 [[7076  403]
 [ 949 1341]]


In [45]:
# Make predictions on the test set
y_pred = model_utilisé.predict(X_test_encodeur)

In [46]:
# Calculate accuracy, precision, recall, and F1 score
accuracy = accuracy_score(y_test_encodeur, y_pred)
precision = precision_score(y_test_encodeur, y_pred)
recall = recall_score(y_test_encodeur, y_pred)
f1 = f1_score(y_test_encodeur, y_pred)

In [47]:

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 score:", f1)

Accuracy: 0.8747057017094892
Precision: 0.7808219178082192
Recall: 0.6471615720524018
F1 score: 0.7077363896848137


In [48]:

# Print a classification report
print("\nClassification Report:\n", classification_report(y_test_encodeur, y_pred))


Classification Report:
               precision    recall  f1-score   support

           0       0.90      0.94      0.92      7479
           1       0.78      0.65      0.71      2290

    accuracy                           0.87      9769
   macro avg       0.84      0.80      0.81      9769
weighted avg       0.87      0.87      0.87      9769



In [49]:

# Print a confusion matrix
print("\nConfusion Matrix:\n", confusion_matrix(y_test_encodeur, y_pred))


Confusion Matrix:
 [[7063  416]
 [ 808 1482]]


In [56]:
# Make predictions on the test set
y_pred = model_utilisé.predict(X_test_encodeur)

In [60]:
# Add true labels to the test set
X_test['income'] = y_test


In [61]:
# Separate the test set by gender
X_test_male = X_test[X_test['gender'] == 'Male']
X_test_female = X_test[X_test['gender'] == 'Female']

In [62]:
# Calculate true positive rates (TPR) for each gender
TPR_male = recall_score(X_test_male['income'], X_test_male['predictions'], pos_label='>50K')
TPR_female = recall_score(X_test_female['income'], X_test_female['predictions'], pos_label='>50K')

In [63]:
# Calculate Equal Opportunity (EO) difference
EO_difference = abs(TPR_male - TPR_female)


In [64]:
# Calculate Disparate Impact (DI)
DI_male = sum(X_test_male['predictions'] == '>50K') / len(X_test_male)
DI_female = sum(X_test_female['predictions'] == '>50K') / len(X_test_female)
DI = min(DI_male / DI_female, DI_female / DI_male)

In [65]:
print("Equal Opportunity (EO) difference:", EO_difference)
print("Disparate Impact (DI):", DI)

Equal Opportunity (EO) difference: 0.036913766091078326
Disparate Impact (DI): 0.31016960171451174


The model does not treat the two groups equally in terms of positive predictions (as indicated by our low DI value)

# Evaluate fairness (EO/DI) using Random Forest Classifier

In [66]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import recall_score

In [70]:
#Split the dataset into training and testing sets (80% training, 20% testing) using train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Remove the "gender" column from the training and testing sets to evaluate fairness 
X_train_no_gender = X_train.drop("gender",axis=1)
X_test_no_gender = X_test.drop("gender",axis=1)

In [71]:
#Encode the target variable (income) using LabelEncoder
le = LabelEncoder()
le.fit(y_train)
y_train_encodeur = le.transform(y_train)
y_test_encodeur = le.transform(y_test)

In [72]:
# Fit the onehotencoder to the training dataset
onehotenc.fit(X.drop("gender",axis=1))

In [73]:
X_train_encodeur = onehotenc.transform(X_train_no_gender)

In [74]:
X_test_encodeur = onehotenc.transform(X_test_no_gender)

In [75]:
# Train a Random Forest classifier
model = RandomForestClassifier(random_state=0)
model.fit(X_train_encodeur, y_train_encodeur)

In [89]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix


In [90]:
# Print a classification report
print("\nClassification Report:\n", classification_report(y_test_encodeur, y_pred))

# Print a confusion matrix
print("\nConfusion Matrix:\n", confusion_matrix(y_test_encodeur, y_pred))



Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.95      0.91      7479
           1       0.77      0.59      0.66      2290

    accuracy                           0.86      9769
   macro avg       0.83      0.77      0.79      9769
weighted avg       0.86      0.86      0.85      9769


Confusion Matrix:
 [[7076  403]
 [ 949 1341]]


In [76]:
#Make predictions on the test set
y_pred = model.predict(X_test_encodeur)

In [85]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Make predictions on the test set
y_pred = model.predict(X_test_encodeur)

# Calculate accuracy, precision, recall, and F1 score
accuracy = accuracy_score(y_test_encodeur, y_pred)
precision = precision_score(y_test_encodeur, y_pred, pos_label=le.transform(['>50K'])[0])
recall = recall_score(y_test_encodeur, y_pred, pos_label=le.transform(['>50K'])[0])
f1 = f1_score(y_test_encodeur, y_pred, pos_label=le.transform(['>50K'])[0])

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)


Accuracy: 0.8616030299928344
Precision: 0.7689220183486238
Recall: 0.585589519650655
F1 Score: 0.6648487853247396


In [91]:
# Add predictions to the test set
X_test['predictions'] = le.inverse_transform(y_pred)

In [92]:
# Add true labels to the test set
X_test['income'] = y_test

In [93]:
# Separate the test set by gender
X_test_male = X_test[X_test['gender'] == 'Male']
X_test_female = X_test[X_test['gender'] == 'Female']

In [94]:
# Calculate true positive rates (TPR) for each gender
TPR_male = recall_score(X_test_male['income'], X_test_male['predictions'], pos_label='>50K')
TPR_female = recall_score(X_test_female['income'], X_test_female['predictions'], pos_label='>50K')


In [95]:
# Calculate Equal Opportunity (EO) difference
EO_difference = abs(TPR_male - TPR_female)

In [96]:
# Calculate Disparate Impact (DI)
DI_male = sum(X_test_male['predictions'] == '>50K') / len(X_test_male)
DI_female = sum(X_test_female['predictions'] == '>50K') / len(X_test_female)
DI = min(DI_male / DI_female, DI_female / DI_male)

In [97]:
print("Equal Opportunity (EO) difference:", EO_difference)
print("Disparate Impact (DI):", DI)

Equal Opportunity (EO) difference: 0.13353619332157912
Disparate Impact (DI): 0.25886856426747074
