In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report


In [None]:
df = pd.read_excel("data.xlsx")

In [None]:
X = df.drop(columns=['HiringDecision', 'Gender'])
y = df['HiringDecision']
gender = df['Gender']

X_train, X_test, y_train, y_test, gender_train, gender_test = train_test_split(
    X, y, gender, test_size=0.2, random_state=42, stratify=gender
)


In [None]:
clf = RandomForestClassifier()
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

print(classification_report(y_test, y_pred))


In [None]:
def demographic_parity(preds, genders):
    male_accept_rate = np.mean(preds[genders == 'Male'])
    female_accept_rate = np.mean(preds[genders == 'Female'])
    return male_accept_rate, female_accept_rate, abs(male_accept_rate - female_accept_rate)

male_rate, female_rate, dp_diff = demographic_parity(y_pred, gender_test.reset_index(drop=True))
print("Demographic Parity:")
print(f"Male Accept Rate: {male_rate:.2f}")
print(f"Female Accept Rate: {female_rate:.2f}")
print(f"Difference: {dp_diff:.2f}")
print("X_test shape:", X_test.shape)
print("shap_values shape:", np.array(shap_values).shape)



In [None]:
explainer = shap.TreeExplainer(clf)
shap_values = explainer.shap_values(X_test)


X_test = pd.DataFrame(X_test, columns=X.columns)

shap.summary_plot(shap_values[:, :, 1], X_test)



In [None]:
sample_weights = gender_train.apply(lambda g: 1.5 if g == 'Female' else 1.0).values


In [None]:
clf_debiased = RandomForestClassifier()
clf_debiased.fit(X_train, y_train, sample_weight=sample_weights)

In [None]:
y_pred_debiased = clf_debiased.predict(X_test)

print("📋 Classification Report (After dilution):")
print(classification_report(y_test, y_pred_debiased))


In [None]:
male_rate, female_rate, dp_diff = demographic_parity(y_pred_debiased, gender_test.reset_index(drop=True))
print(" Demographic Parity After dilution:")
print(f"Male Accept Rate: {male_rate:.2f}")
print(f"Female Accept Rate: {female_rate:.2f}")
print(f"Difference: {dp_diff:.2f}")
