In [1]:
from pathlib import Path
import pandas as pd
import utils
import matplotlib.pyplot as plt

In [2]:
def calc_prop(data, group_col, group, output_col, output_val):
    new = data[data[group_col] == group]
    return len(new[new[output_col] == output_val])/len(new)

In [3]:
def perf_measure(data, y_actual, y_hat, prv, unprv):
    TP = 0
    FP = 0
    TN = 0
    FN = 0

    for i in range(len(y_hat)): 
        if y_actual[i]==y_hat[i]==prv:
           TP += 1
        if y_hat[i]==prv and y_actual[i]!=y_hat[i]:
           FP += 1
        if y_actual[i]==y_hat[i]==unprv:
           TN += 1
        if y_hat[i]==unprv and y_actual[i]!=y_hat[i]:
           FN += 1
    FN += (data.predicted_gender[data.gender == prv].isna()).sum()

    return TP, FP, TN, FN 

In [4]:
def Equality_of_odds(data, y_actual, y_hat, prv, unprv):
    TP, FP, TN, FN = perf_measure(data, y_actual, y_hat, prv, unprv)
    
    FNR = FN/(FN+TP)
    TPR = TP/(TP+FN)
    
    return FNR, TPR

In [6]:
def get_metrics(data, prv, unprv):
    nan_data = data[data.predicted_gender.isna()]
    prv_nones = (nan_data.gender == prv).sum()
    unprv_nones  = (nan_data.gender == unprv).sum()

    
    print(f'{prv} faces not detected: {prv_nones}')
    print(f'{unprv} faces not detected: {unprv_nones}')
    
    pr_priv = calc_prop(data, "gender", prv, "predicted_gender", prv)
    pr_unpriv = calc_prop(data, "gender", unprv, "predicted_gender", unprv)
    print(f'Disparate impact for unpriveliged {unprv} and priveliged {prv}: {float(pr_unpriv / pr_priv):.4}')
    print(f'Disparate impact for unpriveliged {prv} and priveliged {unprv}: {float(pr_priv / pr_unpriv):.4}')
    
    y_actual = data.gender.to_list()
    y_hat = data.predicted_gender.to_list()
    
    FNR, prv_TPR= Equality_of_odds(data, y_actual, y_hat, prv, unprv)
    print(f'Equality of odds:\n True positive rate: {prv_TPR:.4}, false positive rate: {FNR:.4} for {prv}')
    
    FNR, unprv_TPR = Equality_of_odds(data, y_actual, y_hat, unprv, prv)
    print(f' True positive rate: {unprv_TPR:.4}, false positive rate: {FNR:.4} for {unprv}')
    
    print(f'Equality of opportunity difference for {prv} and {unprv}: {float(unprv_TPR - prv_TPR):.4}')
    print(f'Equality of opportunity difference for {unprv} and {prv}: {float(prv_TPR - unprv_TPR):.4}')

    

In [7]:
data = utils.get_aws_val_df()

In [8]:
get_metrics(data, 'Male', 'Female')

Male faces not detected: 108
Female faces not detected: 48
Disparate impact for unpriveliged Female and priveliged Male: 1.152
Disparate impact for unpriveliged Male and priveliged Female: 0.8681
Equality of odds:
 True positive rate: 0.797, false positive rate: 0.203 for Male
 True positive rate: 0.9181, false positive rate: 0.08194 for Female
Equality of opportunity difference for Male and Female: 0.1211
Equality of opportunity difference for Female and Male: -0.1211


In [9]:
get_metrics(data.dropna(), 'Male', 'Female')

Male faces not detected: 0
Female faces not detected: 0
Disparate impact for unpriveliged Female and priveliged Male: 1.141
Disparate impact for unpriveliged Male and priveliged Female: 0.8764
Equality of odds:
 True positive rate: 0.8121, false positive rate: 0.1879 for Male
 True positive rate: 0.9267, false positive rate: 0.07333 for Female
Equality of opportunity difference for Male and Female: 0.1146
Equality of opportunity difference for Female and Male: -0.1146


In [10]:
data = utils.get_azured_val()

In [11]:
data.predicted_gender[data.predicted_gender == 'male'] = 'Male'
data.predicted_gender[data.predicted_gender == 'female'] = 'Female'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.predicted_gender[data.predicted_gender == 'male'] = 'Male'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.predicted_gender[data.predicted_gender == 'female'] = 'Female'


In [12]:
get_metrics(data, 'Male', 'Female')

Male faces not detected: 1165
Female faces not detected: 777
Disparate impact for unpriveliged Female and priveliged Male: 1.114
Disparate impact for unpriveliged Male and priveliged Female: 0.8978
Equality of odds:
 True positive rate: 0.726, false positive rate: 0.274 for Male
 True positive rate: 0.8086, false positive rate: 0.1914 for Female
Equality of opportunity difference for Male and Female: 0.0826
Equality of opportunity difference for Female and Male: -0.0826


In [13]:
get_metrics(data.dropna(), 'Male', 'Female')

Male faces not detected: 0
Female faces not detected: 0
Disparate impact for unpriveliged Female and priveliged Male: 1.047
Disparate impact for unpriveliged Male and priveliged Female: 0.9547
Equality of odds:
 True positive rate: 0.9088, false positive rate: 0.0912 for Male
 True positive rate: 0.9519, false positive rate: 0.04812 for Female
Equality of opportunity difference for Male and Female: 0.04309
Equality of opportunity difference for Female and Male: -0.04309
