### This notebook demonstrates the capabilities of the DeterministicReranking algorithm.
The algorithm provides a way to construct balanced rankings of candidates based on precalculated scores.

In [18]:
import sys
from aif360.datasets import LawSchoolGPADataset
from aif360.datasets import RegressionDataset
sys.path.append("../aif360/algorithms/postprocessing/")
from deterministic_reranking import DeterministicReranking
sys.path.append("../aif360/metrics/")
from regression_metric import RegressionDatasetMetric

In [22]:
# As an example, take the Law School GPA dataset
dataset = LawSchoolGPADataset()
# Convert to dataframe and sort by descending score
df = dataset.convert_to_dataframe()[0].sort_values(by=['zfygpa'], ascending=False)

In [23]:
ranked_dataset = RegressionDataset(df=df, dep_var_name='zfygpa', protected_attribute_names=['race'], privileged_classes=[[1]])

In [24]:
# Suppose we need a list of top-15 candidates (e.g. can only admit 15 students)
# Problem: they all share the same value of the sensitive attribute!
rank = ranked_dataset.convert_to_dataframe()[0][:15]
rank

Unnamed: 0,lsat,ugpa,race,zfygpa
5030,0.716216,1.0,1.0,1.0
5924,0.675676,0.75,1.0,0.97096
14599,0.891892,1.0,1.0,0.960859
10311,0.783784,0.7,1.0,0.953283
9635,0.783784,0.75,1.0,0.953283
3924,0.837838,0.875,1.0,0.95202
6723,0.891892,0.95,1.0,0.948232
6399,0.72973,0.525,1.0,0.944444
3070,1.0,0.975,1.0,0.944444
9533,0.675676,1.0,1.0,0.930556


In [25]:
# Quantify the imbalance with InfeasibleIndex - number of candidates who break the desired proportion of groups at every index
m = RegressionDatasetMetric(dataset=ranked_dataset, unprivileged_groups=[{'race': 0}], privileged_groups=[{'race': 1}])
# k = 15 as we need to analyze our list of 15 candidates
# As expected, it violates the fairness requirement
m.infeasible_index(target_prop={0: 0.5, 1: 0.5}, k=15)

(14, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])

In [26]:
# We can now try to resolve the issue using a fair ranking algorithm
dr = DeterministicReranking(unprivileged_groups=[{'race': 0}], privileged_groups=[{'race': 1}])
# Four variations of the algorithm are available: Greedy, Conservative, Relaxed and Constrained (default)
fair_rank = dr.fit_predict(dataset, rec_size=15, target_prop={0: 0.5, 1: 0.5}, rerank_type='Constrained')
fair_rank.convert_to_dataframe()[0]

Unnamed: 0,lsat,ugpa,race,zfygpa
5030,0.74,1.0,1.0,1.0
5924,0.68,0.473684,1.0,0.97096
2403,0.0,0.842105,0.0,0.900253
14599,1.0,1.0,1.0,0.960859
3492,0.8,0.421053,0.0,0.888889
10311,0.84,0.368421,1.0,0.953283
4925,0.72,0.105263,0.0,0.888889
9635,0.84,0.473684,1.0,0.953283
8769,0.36,0.526316,0.0,0.842172
3924,0.92,0.736842,1.0,0.95202


In [27]:
# The II for this ranking is much better!
m_fair = RegressionDatasetMetric(dataset=fair_rank, unprivileged_groups=[{'race': 0}], privileged_groups=[{'race': 1}])
m_fair.infeasible_index(target_prop={0: 0.5, 1: 0.5}, k=15)

(7, [2, 4, 6, 8, 10, 12, 14])

In [28]:
# We can quantify the loss in the quality of prediction using the normalized discounted cumulative gain:
print(f'Normalized DCG of strictly score-based ranking: {m.discounted_cum_gain(normalized=True)}')
print(f'Normalized DCG of fair ranking: {m_fair.discounted_cum_gain(normalized=True)}')

Normalized DCG of strictly score-based ranking: 1.0
Normalized DCG of fair ranking: 0.964754376256152
