In [154]:
import pandas as pd
import numpy as np
import math

In [None]:
def output_rates(input_data, output_data, attribute_name, privileged=None, unprivileged=None, favourable=None, unfavourable=None):
    # make dataframes for both groups
    input_priv = input_data.loc[input_data[attribute_name] == privileged]
    output_priv = output_data.loc[output_data[attribute_name] == privileged]
    privileged_df = pd.concat([input_priv, output_priv], axis=1)
    
    input_unpriv = input_data.loc[input_data[attribute_name] == unprivileged]
    output_unpriv = output_data.loc[output_data[attribute_name] == unprivileged]
    unprivileged_df = pd.concat([input_unpriv, output_unpriv], axis=1)
    
    tp = 0
    fp = 0
    tn = 0
    fn = 0
    
    for i in range(privileged_df.shape[0]):
        input_label = privileged_df.iloc[i,0]
        output_label = privileged_df.iloc[i,0]
        if input_label == output_label:
            if input_label == unfavourable:
                tn = tn + 1
            else:
                tp = tp + 1
        else:
            if input_label == favourable and output_label == unfavourable:
                fn = fn + 1
            else:
                fp = fp + 1
    
    rates_privileged = [tp,fp,tn,fn]
    
    tp = 0
    fp = 0
    tn = 0
    fn = 0
    
    for i in range(unprivileged_df.shape[0]):
        input_label = unprivileged_df.iloc[i,0]
        output_label = unprivileged_df.iloc[i,0]
        if input_label == output_label:
            if input_label == unfavourable:
                tn = tn + 1
            else:
                tp = tp + 1
        else:
            if input_label == favourable and output_label == unfavourable:
                fn = fn + 1
            else:
                fp = fp + 1
                
    rates_unprivileged = [tp,fp,tn,fn]           
    
    rates_list = [rates_privileged, rates_unprivileged]
    
    return rates_list

In [None]:
# input privileged and unprivileged as integers
# input favourable and unfavourable as integers
# data as a pandas dataframe
# attribute_name as string

def statistical_parity_diff(data, attribute_name, privileged=None, unprivileged=None, favourable=None, unfavourable=None):
    protected_counts = data[attribute_name].value_counts().to_dict()
    favourable_outcomes_privileged_group = data.loc[data[attribute_name] == privileged]
    ratio_privileged = favourable_outcomes_privileged_group.iloc[:,-1].value_counts().to_dict()
    print(ratio_privileged)
    if favourable in ratio_privileged.keys():
        ratio_privileged = ratio_privileged[favourable]/favourable_outcomes_privileged_group.shape[0]
    else:
        ratio_privileged = 0
    print(ratio_privileged)
    favourable_outcomes_unprivileged_group = data.loc[data[attribute_name] == unprivileged]
    ratio_unprivileged = favourable_outcomes_unprivileged_group.iloc[:,-1].value_counts().to_dict()
    print(ratio_unprivileged)
    if favourable in ratio_unprivileged.keys():
        ratio_unprivileged = ratio_unprivileged[favourable]/favourable_outcomes_unprivileged_group.shape[0]
    else:
        ratio_unprivileged = 0
    print(ratio_unprivileged)

    fairness = ratio_privileged - ratio_unprivileged
    
    return fairness

In [156]:
# input privileged and unprivileged as integers
# input favourable and unfavourable as integers
# data as a pandas dataframe
# attribute_name as string

def disparate_impact(data, attribute_name, privileged=None, unprivileged=None, favourable=None, unfavourable=None):
    protected_counts = data[attribute_name].value_counts().to_dict()
    favourable_outcomes_privileged_group = data.loc[data[attribute_name] == privileged]
    
    favourable_outcomes_unprivileged_group = data.loc[data[attribute_name] == unprivileged]
    ratio_unprivileged = favourable_outcomes_unprivileged_group.iloc[:,-1].value_counts().to_dict()
    print(ratio_unprivileged)
    if favourable in ratio_unprivileged.keys():
        ratio_unprivileged = ratio_unprivileged[favourable]/favourable_outcomes_unprivileged_group.shape[0]
    else:
        ratio_unprivileged = 0
        return 0
    print("ratio_unpr: ",ratio_unprivileged)
    
    ratio_privileged = favourable_outcomes_privileged_group.iloc[:,-1].value_counts().to_dict()
    print(ratio_privileged)
    if favourable in ratio_privileged.keys():
        ratio_privileged = ratio_privileged[favourable]/favourable_outcomes_privileged_group.shape[0]
    else:
        ratio_privileged = 0
        return math.inf
    print("ratio_priv: ",ratio_privileged)
    
    fairness = ratio_unprivileged/ratio_privileged
    
    return fairness

In [None]:
def equal_opp_diff(input_data, output_data, attribute_name, privileged, unprivileged, favourable, unfavourable):
    outcome_both = output_rates(input_data, output_data, attribute_name, privileged, unprivileged, favourable, unfavourable)
    
    # [tp, fp, tn, fn]
    outcome_privileged = rates_both[0]
    outcome_unprivileged = rates_both[1]
    
    # true positive rate = tp / (tp + fn)
    tpr_privileged = outcome_privileged[0] / (outcome_privileged[0] + outcome_privileged[3])
    tpr_unprivileged = outcome_unprivileged[0] / (outcome_unprivileged[0] + outcome_unprivileged[3])

    fairness = tpr_unprivileged - tpr_privileged
    
    return fairness

In [None]:
def avg_odds_diff(input_data, output_data, attribute_name, privileged, unprivileged, favourable, unfavourable):
    outcome_both = output_rates(input_data, output_data, attribute_name, privileged, unprivileged, favourable, unfavourable)
    
    # [tp, fp, tn, fn]
    outcome_privileged = rates_both[0]
    outcome_unprivileged = rates_both[1]
    
    # true positive rate = tp / (tp + fn)
    tpr_privileged = outcome_privileged[0] / (outcome_privileged[0] + outcome_privileged[3])
    tpr_unprivileged = outcome_unprivileged[0] / (outcome_unprivileged[0] + outcome_unprivileged[3])

    # false positive rate = fp / (fp + tn)
    fpr_privileged = outcome_privileged[1] / (outcome_privileged[1] + outcome_privileged[2])
    fpr_unprivileged = outcome_unprivileged[1] / (outcome_unprivileged[1] + outcome_unprivileged[2])
    
    fpr_diff = fpr_unprivileged - fpr_privileged
    tpr_diff = tpr_unprivileged - tpr_unprivileged
    
    fairness = (fpr_diff + tpr_diff) * 0.5
    
    return fairness

In [157]:
def random_data():
    return pd.DataFrame(np.random.randint(0,2,size=(10, 5)), columns=list('ABCPO'))

In [161]:
for i in range(10):
    D = random_data()
    fairness = disparate_impact(D,'P',1,0,1,0)
    if fairness > 100:
        print("true")
        print(fairness)

{1: 3, 0: 1}
ratio_unpr:  0.75
{0: 4, 1: 2}
ratio_priv:  0.3333333333333333
{0: 5, 1: 2}
ratio_unpr:  0.2857142857142857
{1: 2, 0: 1}
ratio_priv:  0.6666666666666666
{0: 3}
{1: 3}
ratio_unpr:  1.0
{0: 4, 1: 3}
ratio_priv:  0.42857142857142855
{1: 5, 0: 2}
ratio_unpr:  0.7142857142857143
{1: 2, 0: 1}
ratio_priv:  0.6666666666666666
{1: 3, 0: 2}
ratio_unpr:  0.6
{0: 3, 1: 2}
ratio_priv:  0.4
{1: 4, 0: 3}
ratio_unpr:  0.5714285714285714
{1: 2, 0: 1}
ratio_priv:  0.6666666666666666
{1: 3, 0: 2}
ratio_unpr:  0.6
{0: 3, 1: 2}
ratio_priv:  0.4
{0: 3, 1: 2}
ratio_unpr:  0.4
{0: 3, 1: 2}
ratio_priv:  0.4
{0: 4, 1: 3}
ratio_unpr:  0.42857142857142855
{1: 2, 0: 1}
ratio_priv:  0.6666666666666666


{1: 3, 0: 2}
0.6
{1: 4, 0: 1}
0.8
