# AI Hiring Bias Analysis (Reduction)

## Import

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics

# Fairlearn imports
from fairlearn.reductions import ExponentiatedGradient, EqualizedOdds, TruePositiveRateParity
from fairlearn.metrics import MetricFrame, selection_rate, true_positive_rate, false_positive_rate

import warnings
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv("cleaned_fair_recrutment_dataset.csv")
df.info()

<class 'pandas.DataFrame'>
RangeIndex: 119971 entries, 0 to 119970
Data columns (total 17 columns):
 #   Column                Non-Null Count   Dtype  
---  ------                --------------   -----  
 0   Candidate_ID          119971 non-null  int64  
 1   Gender                119971 non-null  str    
 2   Age                   119971 non-null  int64  
 3   Education_Level       119971 non-null  str    
 4   Experience_Years      119971 non-null  int64  
 5   Skill_Score           119971 non-null  int64  
 6   Aptitude_Test_Score   119971 non-null  int64  
 7   Technical_Test_Score  119971 non-null  int64  
 8   Communication_Score   119971 non-null  int64  
 9   Certifications_Count  119971 non-null  float64
 10  Previous_Companies    119971 non-null  int64  
 11  Interview_Score       119971 non-null  int64  
 12  Location              119971 non-null  str    
 13  Job_Role_Applied      119971 non-null  str    
 14  Expected_Salary       119971 non-null  int64  
 15  Hiring_Deci

## Modeling (Reduction)

We will enforce Equal Opportunity for reduction. 

In [3]:
# data processing - convert categorical to numeric

# Age_Group: Ordinal encoding
df["Age_Group"] = df["Age_Group"].map({
    "18-25": 0, 
    "26-35": 1, 
    "36-45": 2, 
    "46-55": 3, 
    "55+": 4
})

# Gender: Male=0, Female=1, Other=2
df["Gender"] = df["Gender"].map({"Male": 0, "Female": 1, "Other": 2})

# Education_Level: Ordinal encoding (higher education = higher value)
df["Education_Level"] = df["Education_Level"].map({
    "High School": 0, 
    "Diploma": 1, 
    "Bachelors": 2, 
    "Masters": 3, 
    "PhD": 4
})

# Location: Rural=0, Semi-Urban=1, Urban=2
df["Location"] = df["Location"].map({"Rural": 0, "Semi-Urban": 1, "Urban": 2})

# Job_Role_Applied: Label encoding
df["Job_Role_Applied"] = df["Job_Role_Applied"].map({
    "Data Analyst": 0, 
    "HR Executive": 1, 
    "Manager": 2, 
    "ML Engineer": 3, 
    "Software Engineer": 4
})

df

Unnamed: 0,Candidate_ID,Gender,Age,Education_Level,Experience_Years,Skill_Score,Aptitude_Test_Score,Technical_Test_Score,Communication_Score,Certifications_Count,Previous_Companies,Interview_Score,Location,Job_Role_Applied,Expected_Salary,Hiring_Decision,Age_Group
0,1,0,50,2,19,48,75,70,65,2.0,5,92,2,1,22214,1,3
1,2,2,36,2,18,9,53,46,25,2.0,4,77,1,0,130094,0,2
2,3,1,58,3,11,1,85,62,72,1.0,3,90,2,3,78652,0,4
3,4,0,48,3,0,74,35,79,67,6.0,3,9,2,1,144618,0,3
4,5,0,37,2,0,64,99,25,38,7.0,5,24,2,0,133865,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
119966,119996,0,51,2,12,36,68,55,98,2.0,6,82,2,4,115884,1,3
119967,119997,0,54,3,11,35,79,0,60,2.0,7,99,2,4,121326,0,3
119968,119998,0,30,2,8,18,56,60,22,6.0,0,1,0,2,63072,0,1
119969,119999,0,30,2,6,67,99,4,46,0.0,3,66,0,2,108627,1,1


In [4]:
# read data x and y, a(sensitive)
x = df.drop(["Hiring_Decision", "Candidate_ID", "Gender", "Age", "Age_Group"], axis=1)
y = df["Hiring_Decision"]
a = df["Gender"]

# split dataset
x_train, x_test, y_train, y_test, a_train, a_test = train_test_split(x, y, a, test_size=0.2, random_state=1)


In [5]:
# Reduction with 1 Sensitive Feature: Gender
base_estimator = DecisionTreeClassifier(random_state=1)

# Fairlearn reduction - Equal Opportunity constraint
constraint = TruePositiveRateParity()  # Equal Opportunity = TPR parity

mitigator = ExponentiatedGradient(
    estimator=base_estimator,
    constraints=constraint
)

mitigator.fit(x_train, y_train, sensitive_features=a_train)

y_pred = mitigator.predict(x_test)
print("Overall Accuracy (Reduction): ", metrics.accuracy_score(y_test, y_pred))


Overall Accuracy (Reduction):  0.9602833923734111


In [6]:
# Fairness evaluation using MetricFrame
metric_fns = {
    "Accuracy": metrics.accuracy_score,
    "Precision": metrics.precision_score,
    "Recall (TPR)": metrics.recall_score,
    "Selection Rate": selection_rate,
    "TPR": true_positive_rate,
    "FPR": false_positive_rate
}

metric_frame = MetricFrame(
    metrics=metric_fns,
    y_true=y_test,
    y_pred=y_pred,
    sensitive_features=a_test
)

# Display metrics by group
print("="*60)
print("FAIRNESS METRICS BY GENDER (After Reduction)")
print("="*60)
print(metric_frame.by_group.round(4).to_string())

# Display differences
print("\n" + "="*60)
print("DISPARITY (Difference between groups)")
print("="*60)
diff = metric_frame.difference()
for metric_name, val in diff.items():
    status = "✓ Fair" if val < 0.05 else "⚠ Unfair" if val < 0.1 else "✗ Biased"
    print(f"{metric_name:20s}: {val:.4f}  {status}")

# Fairness criteria
print("\n" + "="*60)
print("FAIRNESS CRITERIA EVALUATION")
print("="*60)
tpr_diff = diff["TPR"]
fpr_diff = diff["FPR"]
ppv_diff = diff["Precision"]
sr_diff = diff["Selection Rate"]
print(f"Equal Opportunity (TPR parity):      {'✓ PASS' if tpr_diff < 0.05 else '✗ FAIL'} (diff={tpr_diff:.4f})")
print(f"Equalized Odds (TPR + FPR parity):   {'✓ PASS' if tpr_diff < 0.05 and fpr_diff < 0.05 else '✗ FAIL'} (TPR={tpr_diff:.4f}, FPR={fpr_diff:.4f})")
print(f"Predictive Parity (PPV parity):      {'✓ PASS' if ppv_diff < 0.05 else '✗ FAIL'} (diff={ppv_diff:.4f})")
print(f"Demographic Parity (Selection Rate): {'✓ PASS' if sr_diff < 0.05 else '✗ FAIL'} (diff={sr_diff:.4f})")


FAIRNESS METRICS BY GENDER (After Reduction)
        Accuracy  Precision  Recall (TPR)  Selection Rate     TPR     FPR
Gender                                                                   
0         0.9613     0.9495        0.9029          0.2539  0.9029  0.0175
1         0.9584     0.8806        0.9561          0.2603  0.9561  0.0409
2         0.9615     0.9017        0.9399          0.2467  0.9399  0.0318

DISPARITY (Difference between groups)
Accuracy            : 0.0031  ✓ Fair
Precision           : 0.0689  ⚠ Unfair
Recall (TPR)        : 0.0532  ⚠ Unfair
Selection Rate      : 0.0136  ✓ Fair
TPR                 : 0.0532  ⚠ Unfair
FPR                 : 0.0234  ✓ Fair

FAIRNESS CRITERIA EVALUATION
Equal Opportunity (TPR parity):      ✗ FAIL (diff=0.0532)
Equalized Odds (TPR + FPR parity):   ✗ FAIL (TPR=0.0532, FPR=0.0234)
Predictive Parity (PPV parity):      ✗ FAIL (diff=0.0689)
Demographic Parity (Selection Rate): ✓ PASS (diff=0.0136)


**Conclusion**

After adjusting for equal chance (TPR equality) for gender, the overall accuracy remained similar (~0.96), while the TPR difference was close to but slightly above our set threshold of 0.05 (difference ≈ 0.0558). Since the baseline model is based on a synthetic and balanced dataset and is inherently very fair, the adjustments only resulted in minor changes and fluctuations in the group metrics. This is a reasonable overall result.