In [None]:
import joblib
from src.preprocessing import get_preprocessed_data

In [None]:
model = joblib.load(r"../models/black_box_xgboost.pkl")
df, _, _, true_labels, pct_afroamericans = get_preprocessed_data()

## Fairness

### Discussions

Only variable we have : Pct_afro_american

What we want : binary variable assessing whether or not the person is afro-american

4 possibilities to classify people as afro-american :
- If they live in an area with over 50% AAs
- If they live in an area with more AAs than average
- If they are among the people living in the most densely populated areas
- Randomly based on probabilities

In [None]:
from src.utils import quantile_binary, random_binary, over_pct_binary

In [None]:
is_afroamerican_50_pct = over_pct_binary(pct_afroamericans, .5)
is_afroamerican_avg = over_pct_binary(pct_afroamericans, pct_afroamericans.mean())
is_afroamerican_qb = quantile_binary(pct_afroamericans, pct_afroamericans.mean())
is_afroamerican_rb = random_binary(pct_afroamericans)

means = [
    is_afroamerican_50_pct.mean(),
    is_afroamerican_avg.mean(),
    is_afroamerican_qb.mean(),
    is_afroamerican_rb.mean()
]
names = [
    "Over 50%",
    "Over Average",
    "Quantile-based",
    "Random-based"
]

### Plot

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.barh(names[::-1], means[::-1])
plt.xlabel("Proportion of individuals classified as Afro-American")
plt.title("Different definitions of Afro-American individuals")
plt.show()

## Fairness tests and plots

In [None]:
from src.fairness import fairness_test_statistic, fairness_partial_dependance_plots
import numpy as np

### Fairness tests

In [None]:
fairness_50_pct = fairness_test_statistic(df, model, is_afroamerican_50_pct)
fairness_avg = fairness_test_statistic(df, model, is_afroamerican_avg)
fairness_qb = fairness_test_statistic(df, model, is_afroamerican_qb)
fairness_rb = fairness_test_statistic(df, model, is_afroamerican_rb)

fairnesses = [ fairness_50_pct, fairness_avg, fairness_qb, fairness_rb ]

In [None]:
plt.barh(names[::-1], np.log10(fairnesses[::-1]))
plt.xlabel("Log of Fairness Test Statistic (p-value)")
plt.title("Fairness Test Statistics by Group")
plt.show()

### FPDPs

In [None]:
fairness_partial_dependance_plots(df, model, is_afroamerican_rb, n_points=10, file_dir="../results/fairness/fpdps", threshold=.05)