In [1]:
import pandas as pd
import warnings
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.exceptions import ConvergenceWarning

warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

In [2]:
# Set up parameters
dataset_name = "demo"
y_label = "Policy_Status"
csv_file_path = f"{dataset_name}.csv"

# Load the CSV file as a DataFrame
df = pd.read_csv(csv_file_path)


In [3]:
df.head()

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,Insurance_History,Property_Area,Policy_Status
0,Male,Yes,1,Graduate,No,4583,1508.0,1,Rural,0
1,Male,Yes,0,Graduate,Yes,3000,0.0,1,Urban,1
2,Male,Yes,0,Not Graduate,No,2583,2358.0,1,Urban,1
3,Male,No,0,Graduate,No,6000,0.0,1,Urban,1
4,Male,Yes,2,Graduate,Yes,5417,4196.0,1,Urban,1


In [4]:

# Encode Male as 1, Female as 0
df['Gender'] = df['Gender'].map({'Male': 1, 'Female': 0})

# Replace the categorical values with the numeric equivalents
categoricalFeatures = ['Property_Area', 'Married', 'Dependents', 'Education', 'Self_Employed']

# Iterate through the list of categorical features and one hot encode them.
for feature in categoricalFeatures:
    onehot = pd.get_dummies(df[feature], prefix=feature)
    df = df.drop(feature, axis=1)
    df = df.join(onehot)
    
X = df.drop(columns=[y_label])
y = df[y_label].values

In [5]:
scaler = StandardScaler()
data_std = scaler.fit_transform(X)
x_train,x_test,y_train,y_test = train_test_split(X, y, test_size=0.2, random_state = 0)

In [6]:
# Liblinear is a solver that is very fast for small datasets, like ours
model = LogisticRegression(solver='liblinear', class_weight='balanced', max_iter=100)

# Fit the model to the training data
model.fit(x_train, y_train)

y_pred = model.predict(x_test)

In [7]:
# Evaluate the model
print("Test performance: ", end="")
print(f"Accuracy; {accuracy_score(y_test, y_pred):.5f}", end=", ")
print(f"Precision: {precision_score(y_test, y_pred):.5f}", end=", ")
print(f"F1 Score: {f1_score(y_test, y_pred):.5f}", end=", ")
print(f"Recall: {recall_score(y_test, y_pred):.5f}")

Test performance: Accuracy; 0.69792, Precision: 0.73529, F1 Score: 0.77519, Recall: 0.81967


In [8]:
# Import necessary methods from the fairlearn library
from fairlearn.metrics import demographic_parity_difference

# Calculate the demographic parity difference
dpd = demographic_parity_difference(y_test, y_pred, sensitive_features=x_test['Gender'], method='between_groups')

# Print the demographic parity difference
print("Demographic Parity Difference: ", dpd)

Demographic Parity Difference:  0.28890543559195836


In [9]:
from fairlearn.reductions import ExponentiatedGradient, DemographicParity

# Define the fairness constraint (demographic parity)
constraint = DemographicParity()

# Define the mitigation method (Exponentiated Gradient)
mitigator = ExponentiatedGradient(model, constraint, max_iter=1000)

# Fit the mitigated model to the training data
mitigator.fit(x_train, y_train, sensitive_features = x_train["Gender"])

# Predict on the test set using the mitigated model
y_pred_mitigated = mitigator.predict(x_test)

# Evaluate the mitigated model
print("Test performance (mitigated): ", end="")
print(f"Accuracy: {accuracy_score(y_test, y_pred_mitigated):.5f}", end=", ")
print(f"Precision: {precision_score(y_test, y_pred_mitigated):.5f}", end=", ")
print(f"F1 Score: {f1_score(y_test, y_pred_mitigated):.5f}", end=", ")
print(f"Recall: {recall_score(y_test, y_pred_mitigated):.5f}")

# Calculate the demographic parity difference for the mitigated model
dpd_mitigated = demographic_parity_difference(y_test, y_pred_mitigated, sensitive_features = x_test[['Gender']], method='between_groups')

print("Demographic Parity Difference (mitigated): ", dpd_mitigated)

Test performance (mitigated): Accuracy: 0.71875, Precision: 0.72973, F1 Score: 0.80000, Recall: 0.88525
Demographic Parity Difference (mitigated):  0.1355174981384959
