In [None]:
from fairlearn.reductions import GridSearch
from fairlearn.reductions import DemographicParity

import numpy as np
import pandas as pd

from sklearn.linear_model import LogisticRegression

from tempeh.configurations import datasets
dataset = datasets['lawschool_passbar']()

In [None]:
unmitigated_predictor = LogisticRegression(solver='liblinear', fit_intercept=True)

dataset.y_train = dataset.y_train.transpose().squeeze()

unmitigated_predictor.fit(dataset.X_train, dataset.y_train)

In [None]:
import matplotlib.pyplot as plt
from fairlearn.metrics import group_zero_one_loss, group_mean_prediction

In [None]:
y_pred_unmitigated = unmitigated_predictor.predict(dataset.X_test)

unmitigated_error = group_zero_one_loss(dataset.y_test, y_pred_unmitigated, dataset.race_test)
print(unmitigated_error.overall, unmitigated_error.by_group['white'], unmitigated_error.by_group['black'])

unmitigated_mean_prediction = group_mean_prediction(dataset.y_test, y_pred_unmitigated, dataset.race_test)
print("disparity", unmitigated_mean_prediction.range)

In [None]:
n_sweep = 9
sweep = GridSearch(LogisticRegression(solver='liblinear', fit_intercept=True),
                   constraints=DemographicParity(),
                   grid_size=n_sweep)

sweep.fit(dataset.X_train, dataset.y_train, sensitive_features=dataset.race_train)

print(sweep.best_result.lambda_vec)

In [None]:
lambda_vec_best = sweep.best_result.lambda_vec
lambda_best = lambda_vec_best[("+", "all", "white")] - lambda_vec_best[("-", "all", "white")]
print("lambda_best =", lambda_best)

In [None]:
n_second_sweep = 51
second_sweep_multipliers = np.linspace(lambda_best-0.5, lambda_best+0.5, n_second_sweep)

iterables = [['+','-'], ['all'], ['black', 'white']]
midx = pd.MultiIndex.from_product(iterables, names=['sign', 'event', 'group_id'])

second_sweep_lambdas = []
for l in second_sweep_multipliers:
    nxt = pd.Series(np.zeros(4), index=midx)
    if l < 0:
        nxt[("-", "all", "white")] = abs(l)
    else:
        nxt[("+", "all", "white")] = l
    second_sweep_lambdas.append(nxt)
    
multiplier_df = pd.concat(second_sweep_lambdas, axis=1)

In [None]:
second_sweep = GridSearch(LogisticRegression(solver='liblinear', fit_intercept=True),
                   constraints=DemographicParity(),
                   grid=multiplier_df)

second_sweep.fit(dataset.X_train, dataset.y_train, sensitive_features=dataset.race_train)

In [None]:
sweep_error = np.zeros(n_second_sweep)
sweep_disparity = np.zeros(n_second_sweep)

for i in range(n_second_sweep):
    preds = second_sweep.all_results[i].predictor.predict(dataset.X_test)
    sweep_error[i] = group_zero_one_loss(dataset.y_test, preds, dataset.race_test).maximum
    sweep_disparity[i] = group_mean_prediction(dataset.y_test, preds, dataset.race_test).range
    
plt.scatter(sweep_error, sweep_disparity)
plt.xlabel("Error")
plt.ylabel("Disparity")
plt.show()

In [None]:
print(second_sweep.best_result.predictor.coef_)

best_preds = second_sweep.predict(dataset.X_test)
best_error = group_zero_one_loss(dataset.y_test, best_preds, dataset.race_test).maximum
best_disparity = group_mean_prediction(dataset.y_test, best_preds, dataset.race_test).range
print(best_error, best_disparity)

In [None]:
c1 = [x.predictor.coef_[0][0] for x in second_sweep.all_results]
c2 = [x.predictor.coef_[0][1] for x in second_sweep.all_results]
plt.scatter(c1, c2)
plt.show()