In [1]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter(action='ignore', category=FutureWarning)

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from aif360.metrics import BinaryLabelDatasetMetric
from aif360.datasets import BinaryLabelDataset
from aif360.algorithms.preprocessing.reweighing import Reweighing
from aif360.sklearn.metrics import disparate_impact_ratio, statistical_parity_difference
# from aif360.sklearn.preprocessing import Reweighing
# Explainers
from aif360.explainers import MetricTextExplainer, MetricJSONExplainer

from data_engineering import run_data_engineering
from aif360.metrics import ClassificationMetric

from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [2]:
df = pd.read_csv('./data/original/student-por.csv', sep=';')
df = run_data_engineering(df)
df.sample(10)

Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,...,G2,G3,age_group,age_group_binary,sex_binary,famsize_binary,health_group,health_group_binary,grade_group,grade_group_binary
567,MS,M,18,R,GT3,T,3,2,services,other,...,0,0,GT17,1,1,0,good,1,low,0
81,GP,M,15,U,GT3,T,2,3,other,other,...,9,9,LE17,0,1,0,good,1,low,0
131,GP,F,18,U,GT3,T,2,1,services,other,...,9,8,GT17,1,0,0,good,1,low,0
371,GP,F,18,U,GT3,T,2,2,other,other,...,10,10,GT17,1,0,0,good,1,low,0
129,GP,M,16,U,GT3,T,2,3,other,other,...,12,13,LE17,0,1,0,bad,0,high,1
124,GP,F,16,U,GT3,T,2,2,other,other,...,11,11,LE17,0,0,0,good,1,low,0
512,MS,F,18,R,GT3,T,1,1,at_home,other,...,7,7,GT17,1,0,0,good,1,low,0
174,GP,M,17,R,LE3,T,2,1,at_home,other,...,8,9,LE17,0,1,1,good,1,low,0
92,GP,F,16,U,LE3,T,3,1,other,other,...,13,12,LE17,0,0,1,bad,0,low,0
541,MS,F,17,R,GT3,T,2,2,other,other,...,9,11,LE17,0,0,0,good,1,low,0


## Step 3

### Part 1

Based on your dataset, identify the privileged/unprivileged groups associated with each of your protected
class variables (i.e. convert to a binary classification problem) -- done as part of data engineering

### Parts 2 - 4
For each protected class variable, select two fairness metrics and compute the fairness metrics associated with your privileged/unprivileged groups as a function of your two dependent variables.

Fairness metrics selected:
* Disparate Impact: Computed as the ratio of rate of favorable outcome for the unprivileged group to that of the privileged group. The ideal value of this metric is 1.0 A value < 1 implies higher benefit for the privileged group and a value >1 implies a higher benefit for the unprivileged group. Fairness for this metric is between 0.8 and 1.2

* Statistical Parity Difference: Computed as the difference of the rate of favorable outcomes received by the unprivileged group to the privileged group. The ideal value of this metric is 0. Fairness for this metric is between -0.1 and 0.1

In [3]:
# Functions needed for analysis
def _make_dataset(df, label_name):
    dataset = BinaryLabelDataset(
        df=df[['sex_binary', 'age_group_binary', 'famsize_binary', 'health_group_binary', 'grade_group_binary']],
        label_names=[label_name],
        protected_attribute_names=['sex_binary', 'age_group_binary', 'famsize_binary']
    )
    return dataset


def _make_groups(var):
    privileged_groups = [{var: 1}]
    unprivileged_groups = [{var: 0}]
    return privileged_groups, unprivileged_groups


def _transform_dataset(dataset, unprivileged_groups, privileged_groups):
    weighter = Reweighing(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)
    weighter.fit(dataset)
    transformed_dataset = weighter.transform(dataset)
    return transformed_dataset
    
    
def _get_metrics(df, var, label_name, transform=False):
    dataset = _make_dataset(df, label_name)
    privileged_groups, unprivileged_groups = _make_groups(var)
    
    if transform:
        privileged_groups_all = [{'sex_binary': 1, 'age_group_binary': 1, 'famsize_binary': 1}]
        unprivileged_groups_all = [{'sex_binary': 0, 'age_group_binary': 0, 'famsize_binary': 0}]
        dataset = _transform_dataset(dataset, unprivileged_groups_all, privileged_groups_all)
        
    metric = BinaryLabelDatasetMetric(
        dataset,
        unprivileged_groups=unprivileged_groups,
        privileged_groups=privileged_groups
    )
    return metric

def _get_metric_df(df, var, label_name, transform=False):
    metrics = _get_metrics(df, var, label_name, transform)
    stat_par_diff = metrics.statistical_parity_difference()
    disp_impact = metrics.disparate_impact()
    return pd.DataFrame({
        'variable': [var],
        'label': [label_name],
        'stat_par_diff': [stat_par_diff],
        'disp_impact': [disp_impact]
    })

def generate_metrics(df, transform=False):
    results_dfs = []
    for var in ['sex_binary', 'age_group_binary', 'famsize_binary']:
        for label in ['health_group_binary', 'grade_group_binary']:
            results_dfs.append(_get_metric_df(df, var, label, transform))
    return pd.concat(results_dfs)



In [4]:
generate_metrics(df)

Unnamed: 0,variable,label,stat_par_diff,disp_impact
0,sex_binary,health_group_binary,-0.112625,0.817328
0,sex_binary,grade_group_binary,0.128183,1.36663
0,age_group_binary,health_group_binary,0.042629,1.082083
0,age_group_binary,grade_group_binary,0.045769,1.116679
0,famsize_binary,health_group_binary,0.004547,1.008315
0,famsize_binary,grade_group_binary,0.034407,1.085794


In [5]:
generate_metrics(df, transform=True)

Unnamed: 0,variable,label,stat_par_diff,disp_impact
0,sex_binary,health_group_binary,-0.109553,0.821661
0,sex_binary,grade_group_binary,0.06254,1.175284
0,age_group_binary,health_group_binary,0.046607,1.090317
0,age_group_binary,grade_group_binary,-0.012621,0.968666
0,famsize_binary,health_group_binary,0.008352,1.01536
0,famsize_binary,grade_group_binary,-0.024531,0.940309


In [6]:
privileged_groups_all = [{'sex_binary': 1, 'age_group_binary': 1, 'famsize_binary': 1}]
unprivileged_groups_all = [{'sex_binary': 0, 'age_group_binary': 0, 'famsize_binary': 0}]
original_dataset = _make_dataset(df, 'grade_group_binary')
transformed_dataset = _transform_dataset(original_dataset, unprivileged_groups_all, privileged_groups_all)

In [7]:
dataset_orig_train, dataset_orig_test = original_dataset.split([0.7], shuffle=True)
dataset_transformed_train, dataset_transformed_test = transformed_dataset.split([0.7], shuffle=True)

In [8]:
model = make_pipeline(StandardScaler(),
                      LogisticRegression(solver='liblinear', random_state=1))
fit_params = {'logisticregression__sample_weight': dataset_orig_train.instance_weights}

lr_orig_panel19 = model.fit(dataset_orig_train.features, dataset_orig_train.labels.ravel(), **fit_params)

In [9]:
sens_ind = 0
sens_attr = dataset_orig_train.protected_attribute_names[sens_ind]

unprivileged_groups = [{sens_attr: v} for v in
                       dataset_orig_train.unprivileged_protected_attributes[sens_ind]]
privileged_groups = [{sens_attr: v} for v in
                     dataset_orig_train.privileged_protected_attributes[sens_ind]]

In [10]:
def generate_metrics_dataset(dataset):
    results_dfs = []
    for var in ['sex_binary', 'age_group_binary', 'famsize_binary']:
        for label in ['grade_group_binary']:
            results_dfs.append(_get_metric_dataset(dataset, var, label))
    return pd.concat(results_dfs)
    
def _make_groups(var):
    privileged_groups = [{var: 1}]
    unprivileged_groups = [{var: 0}]
    return privileged_groups, unprivileged_groups


def _get_metric_dataset(dataset, var, label_name):
    metrics = _get_metric_details(dataset, var, label_name)
    stat_par_diff = metrics.statistical_parity_difference()
    disp_impact = metrics.disparate_impact()
    return pd.DataFrame({
        'variable': [var],
        'label': [label_name],
        'stat_par_diff': [stat_par_diff],
        'disp_impact': [disp_impact]
    })

def _get_metric_details(dataset, var, label_name):

    privileged_groups, unprivileged_groups = _make_groups(var)
    
    metric = BinaryLabelDatasetMetric(
        dataset,
        unprivileged_groups=unprivileged_groups,
        privileged_groups=privileged_groups
    )
    return metric

In [11]:
# generate_metrics_dataset(dataset_orig_train)

In [12]:
# generate_metrics_dataset(dataset_transformed_train)

In [13]:
from collections import defaultdict

def test(dataset, model, thresh_arr):
    try:
        # sklearn classifier
        y_val_pred_prob = model.predict_proba(dataset.features)
        pos_ind = np.where(model.classes_ == dataset.favorable_label)[0][0]
    except AttributeError:
        # aif360 inprocessing algorithm
        y_val_pred_prob = model.predict(dataset).scores
        pos_ind = 0
    
    metric_arrs = defaultdict(list)
    for thresh in thresh_arr:
        y_val_pred = (y_val_pred_prob[:, pos_ind] > thresh).astype(np.float64)

        dataset_pred = dataset.copy()
        dataset_pred.labels = y_val_pred
        metric = ClassificationMetric(
                dataset, dataset_pred,
                unprivileged_groups=unprivileged_groups,
                privileged_groups=privileged_groups)

        metric_arrs['bal_acc'].append((metric.true_positive_rate()
                                     + metric.true_negative_rate()) / 2)
        metric_arrs['avg_odds_diff'].append(metric.average_odds_difference())
        metric_arrs['disp_imp'].append(metric.disparate_impact())
        metric_arrs['stat_par_diff'].append(metric.statistical_parity_difference())
        metric_arrs['eq_opp_diff'].append(metric.equal_opportunity_difference())
        metric_arrs['theil_ind'].append(metric.theil_index())
    
    return metric_arrs

In [14]:
def describe_metrics(metrics, thresh_arr):
    best_ind = np.argmax(metrics['bal_acc'])
    print("Threshold corresponding to Best balanced accuracy: {:6.4f}".format(thresh_arr[best_ind]))
    print("Best balanced accuracy: {:6.4f}".format(metrics['bal_acc'][best_ind]))
#     print("Disparity Impact: {:6.4f}".format(metrics['disp_imp'][best_ind]))
    disp_imp_at_best_ind = 1 - min(metrics['disp_imp'][best_ind], 1/metrics['disp_imp'][best_ind])
    print("Corresponding 1-min(DI, 1/DI) value: {:6.4f}".format(disp_imp_at_best_ind))
    print("Corresponding average odds difference value: {:6.4f}".format(metrics['avg_odds_diff'][best_ind]))
    print("Corresponding statistical parity difference value: {:6.4f}".format(metrics['stat_par_diff'][best_ind]))
    print("Corresponding equal opportunity difference value: {:6.4f}".format(metrics['eq_opp_diff'][best_ind]))
    print("Corresponding Theil index value: {:6.4f}".format(metrics['theil_ind'][best_ind]))

In [15]:
thresh_arr = np.linspace(0.01, 0.5, 50)
val_metrics = test(dataset=dataset_orig_train,
                   model=lr_orig_panel19,
                   thresh_arr=thresh_arr)
lr_orig_best_ind = np.argmax(val_metrics['bal_acc'])

In [16]:
# describe_metrics(val_metrics, thresh_arr)

In [17]:
lr_orig_metrics = test(dataset=dataset_orig_test,
                       model=lr_orig_panel19,
                       thresh_arr=[thresh_arr[lr_orig_best_ind]])
describe_metrics(lr_orig_metrics, [thresh_arr[lr_orig_best_ind]])


Threshold corresponding to Best balanced accuracy: 0.4200
Best balanced accuracy: 0.5791
Corresponding 1-min(DI, 1/DI) value: 1.0000
Corresponding average odds difference value: 0.9127
Corresponding statistical parity difference value: 0.9106
Corresponding equal opportunity difference value: 0.9661
Corresponding Theil index value: 0.2198


In [18]:
model = make_pipeline(StandardScaler(),
                      LogisticRegression(solver='liblinear', random_state=1))

fit_params = {'logisticregression__sample_weight': dataset_transformed_train.instance_weights}

lr_transformed_panel19 = model.fit(dataset_transformed_train.features, dataset_transformed_train.labels.ravel(), **fit_params)


In [19]:
sens_ind = 0
sens_attr = dataset_transformed_train.protected_attribute_names[sens_ind]

unprivileged_groups = [{sens_attr: v} for v in
                       dataset_transformed_train.unprivileged_protected_attributes[sens_ind]]
privileged_groups = [{sens_attr: v} for v in
                     dataset_transformed_train.privileged_protected_attributes[sens_ind]]

In [20]:
thresh_arr = np.linspace(0.01, 0.5, 50)
val_metrics = test(dataset=dataset_transformed_train,
                   model=lr_transformed_panel19,
                   thresh_arr=thresh_arr)
lr_transformed_best_ind = np.argmax(val_metrics['bal_acc'])

In [21]:
lr_transformed_metrics = test(dataset=dataset_transformed_test,
                       model=lr_transformed_panel19,
                       thresh_arr=[thresh_arr[lr_transformed_best_ind]])
describe_metrics(lr_transformed_metrics, [thresh_arr[lr_transformed_best_ind]])

Threshold corresponding to Best balanced accuracy: 0.3600
Best balanced accuracy: 0.4528
Corresponding 1-min(DI, 1/DI) value: 0.2972
Corresponding average odds difference value: 0.1957
Corresponding statistical parity difference value: 0.1764
Corresponding equal opportunity difference value: 0.2260
Corresponding Theil index value: 0.3490
