## 0. Imports

In [5]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import json
import datetime

# Fairness
from fairlearn.metrics import demographic_parity_difference, equalized_odds_difference
from scipy import stats

sns.set_theme(style='whitegrid', palette='muted')


## 1. Gender Disparate Impact

We define the Disparate Impact (DI) ratio as the project description did:

$$DI = \frac{\text{Approval rate (unprivileged group)}}{\text{Approval rate (privileged group)}}$$

A DI ratio below 0.8 indicates potential illegal disparate impact under US EEOC guidelines (the four-fifths rule).
We treat Male as the privileged group, since their historical approval rate is higher.

In [6]:
df_gendered = df[df['gender'].isin(['Male', 'Female'])].copy()

gender_stats = (
    df_gendered
    .groupby('gender')['loan_approved']
    .agg(total='count', approved='sum')
    .assign(approval_rate=lambda x: x['approved'] / x['total'])
)

print(gender_stats.to_string(), '\n')

rate_male   = gender_stats.loc['Male',   'approval_rate']
rate_female = gender_stats.loc['Female', 'approval_rate']

DI = rate_female / rate_male
print(f'Male approval rate  : {rate_male:.3f}  ({rate_male*100:.1f}%)')
print(f'Female approval rate: {rate_female:.3f}  ({rate_female*100:.1f}%)')
print(f'\nDisparate Impact ratio (Female / Male) = {DI:.4f}')

threshold = 0.8
if DI < threshold:
    print(f'DI = {DI:.4f} < {threshold} → FAILS the four-fifths rule. '
          'Potential unlawful disparate impact on female applicants.')
else:
    print(f'DI = {DI:.4f} ≥ {threshold} → Passes the four-fifths rule.')

NameError: name 'df' is not defined

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(11, 4))

# Left: approval rate by gender
ax = axes[0]
bars = ax.bar(gender_stats.index, gender_stats['approval_rate'] * 100)
ax.axhline(rate_male * 100 * 0.8, color='red', linestyle='--',
           label=f'Four-fifths threshold ({rate_male*100*0.8:.1f}%)')
ax.set_ylabel('Approval Rate (%)')
ax.set_title('Loan Approval Rate by Gender')
ax.legend(fontsize=9)

# Right: count breakdown
ax2 = axes[1]
gender_stats[['approved', 'total']].plot(kind='bar', ax=ax2)
ax2.set_title('Application Counts by Gender')
ax2.set_ylabel('Count')
ax2.legend(['Approved', 'Total Applications'])

plt.suptitle(f'Gender Fairness  |  DI Ratio = {DI:.3f}  (threshold 0.80)',
             fontsize=13, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig('../notebooks/figures/fig1_gender_disparate_impact.png', dpi=150, bbox_inches='tight')
plt.show()