# Developing a classification Audit trend

In first iteration, this will only work on datasets that already have two or more binary classification variables included.

We will need additional metadata: role options of being predictions or ground truths.


In [1]:
import numpy as np
import pandas as pd
import wiggum as wg


First, we will need a dataset that we can work with

In [2]:
dataset = '../data/rateSPdataDeptRace/'
labeled_df = wg.LabeledDataFrame(dataset)

In [3]:
labeled_df.df.head()

Unnamed: 0,department,gender,decision,race
0,3,M,1,W
1,3,M,1,H
2,0,F,0,W
3,1,F,0,H
4,3,M,1,W


To test, we need create additional decision columns, to make it interesting, we will make them correlated with the true decision and the race, gender or dept.  

We will start by modifying only the dataframe.  We will add the metadata and make a new labeled df after. 

In [4]:
test_df = labeled_df.df

In [5]:
def add_decision_rand(row,p_flip):
    """
    a function that can be applied to a df using apply that flips 'decision' with prob p, indep
    """
    cur = row['decision']
    opp = int(not(cur))
    return np.random.choice([cur, opp],p=[p_flip, 1-p_flip])
    
    

In [6]:
decision_90 =lambda r: add_decision_rand(r,.9)
cur_new = 'decision_noise'
test_df[cur_new] = test_df.apply(decision_90,axis=1)

In [7]:
test_df.head()

Unnamed: 0,department,gender,decision,race,decision_noise
0,3,M,1,W,1
1,3,M,1,H,0
2,0,F,0,W,0
3,1,F,0,H,0
4,3,M,1,W,1


In [8]:
test_df.groupby(['gender','department'])['decision'].mean().unstack()

department,0,1,2,3
gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
F,0.228261,0.142857,0.35,0.29661
M,0.155172,0.037037,0.166667,0.280738


In [9]:
test_df.groupby(['gender','department'])[cur_new].mean().unstack()

department,0,1,2,3
gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
F,0.26087,0.168067,0.425,0.398305
M,0.206897,0.111111,0.222222,0.336066


In [10]:
test_df.groupby('gender')['decision'].mean()

gender
F    0.246944
M    0.253807
Name: decision, dtype: float64

In [11]:
test_df.groupby('gender')[cur_new].mean()

gender
F    0.305623
M    0.309645
Name: decision_noise, dtype: float64

In [12]:
def add_acc(row,true_col,pred_col):
    label_mat = [['TN','FP'],['FN','TP']]
    return label_mat[row[true_col]][row[pred_col]]

In [13]:
add_acc_cur = lambda row: add_acc(row,'decision',cur_new)
test_df[cur_new+'test'] = test_df.apply(add_acc_cur,axis=1)

In [14]:
test_df.head()

Unnamed: 0,department,gender,decision,race,decision_noise,decision_noisetest
0,3,M,1,W,1,TP
1,3,M,1,H,0,FN
2,0,F,0,W,0,TN
3,1,F,0,H,0,TN
4,3,M,1,W,1,TP


In [15]:
confusion = test_df['decision_noisetest'].value_counts()
confusion

TN    670
TP    229
FP     79
FN     22
Name: decision_noisetest, dtype: int64

In [16]:
(confusion['TP'] + confusion['TN'])/sum(confusion)

0.899

In [17]:
if not('FT' in confusion.keys()):
    print('yes')

yes


In [18]:
def compute_conf_stats(df,col):
    confusion = df[col].value_counts()
    return (confusion['TP'] + confusion['TN'])/sum(confusion)

In [19]:
compute_conf_stats(test_df,'decision_noisetest')

0.899

In [20]:
for l,gdf in test_df.groupby('gender'):
    print(compute_conf_stats(gdf,'decision_noisetest'))

0.8924205378973105
0.9035532994923858


In [21]:
confusion = test_df.groupby('gender')['decision_noisetest'].value_counts()
confusion

gender  decision_noisetest
F       TN                    274
        TP                     91
        FP                     34
        FN                     10
M       TN                    396
        TP                    138
        FP                     45
        FN                     12
Name: decision_noisetest, dtype: int64

In [22]:
(confusion['F','TP'] + confusion['F','TN'])/sum(confusion['F'])

0.8924205378973105

In [23]:
(confusion['M','TP'] + confusion['M','TN'])/sum(confusion['M'])

0.9035532994923858

Now, we make it correlated with somthing

In [24]:
def add_decision_corr(row,corr_col,p_flips):
    """
    a function that can be applied to a df using apply that flips 'decision' with prob p, indep
    """
    cur = row['decision']
    opp = int(not(cur))
    p_cur = p_flips[row[corr_col]]
    return np.random.choice([cur, opp],p=[p_cur, 1-p_cur])
    
    

In [25]:
p_gender_corr = {'F':.75,'M':.9}
decision_gender =lambda r: add_decision_corr(r,'gender',p_gender_corr)
cur_new = 'decision_gender'
test_df[cur_new] = test_df.apply(decision_gender,axis=1)

In [26]:
add_acc_cur = lambda row: add_acc(row,'decision',cur_new)
test_df[cur_new+'test'] = test_df.apply(add_acc_cur,axis=1)

In [27]:
confusion = test_df.groupby('gender')[cur_new +'test'].value_counts()
confusion

gender  decision_gendertest
F       TN                     229
        FP                      79
        TP                      79
        FN                      22
M       TN                     400
        TP                     140
        FP                      41
        FN                      10
Name: decision_gendertest, dtype: int64

In [28]:
(confusion['F','TP'] + confusion['F','TN'])/sum(confusion['F'])

0.7530562347188264

In [29]:
(confusion['M','TP'] + confusion['M','TN'])/sum(confusion['M'])

0.9137055837563451

In [30]:
test_df.groupby(['department','gender'])[cur_new +'test'].value_counts()


department  gender  decision_gendertest
0           F       TN                      54
                    FP                      17
                    TP                      14
                    FN                       7
            M       TN                      43
                    TP                       7
                    FP                       6
                    FN                       2
1           F       TN                      78
                    FP                      24
                    TP                      12
                    FN                       5
            M       TN                      23
                    FP                       3
                    TP                       1
2           F       TN                      36
                    TP                      24
                    FP                      16
                    FN                       4
            M       TN                      15
                    

In [31]:
p_race_corr = {'B':.65,'W':.9,'H':.8}
decision_race =lambda r: add_decision_corr(r,'race',p_race_corr)
cur_new = 'decision_race'
test_df[cur_new] = test_df.apply(decision_race,axis=1)
test_df.head()

Unnamed: 0,department,gender,decision,race,decision_noise,decision_noisetest,decision_gender,decision_gendertest,decision_race
0,3,M,1,W,1,TP,1,TP,1
1,3,M,1,H,0,FN,0,FN,1
2,0,F,0,W,0,TN,0,TN,0
3,1,F,0,H,0,TN,0,TN,1
4,3,M,1,W,1,TP,1,TP,0


In [32]:
test_df.drop(columns=['decision_noisetest','decision_gendertest'],inplace=True)

In [33]:
labeled_df.meta_df

Unnamed: 0_level_0,dtype,var_type,role,isCount,weighting_var
variable,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
department,int64,categorical,"[groupby, trend]",,
gender,object,categorical,"[groupby, trend]",,
decision,int64,binary,[trend],,
race,object,categorical,"[groupby, trend]",,


In [34]:
multi_decision_labeled_df = wg.LabeledDataFrame(test_df)


roles = {'department':['groupby', 'trend'],'gender':['groupby', 'trend'],'race':['groupby', 'trend'],
         'decision_noise':['trend','prediction'],'decision':['trend','groundtruth'],
         'decision_gender':['trend','prediction'],'decision_race':['trend','prediction']}
var_types = {'department':'categorical','gender':'categorical'}
# is_count = {c:False for c in test_df.columns}

multi_decision_labeled_df.infer_var_types()
multi_decision_labeled_df.set_roles(roles)
multi_decision_labeled_df.set_var_types(var_types)

In [35]:
multi_decision_labeled_df.meta_df

Unnamed: 0_level_0,dtype,var_type,role,isCount,weighting_var
variable,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
department,int64,categorical,"[groupby, trend]",,
gender,object,categorical,"[groupby, trend]",,
decision,int64,binary,"[trend, groundtruth]",,
race,object,categorical,"[groupby, trend]",,
decision_noise,int64,binary,"[trend, prediction]",,
decision_gender,int64,binary,"[trend, prediction]",,
decision_race,int64,binary,"[trend, prediction]",,


In [36]:
multi_decision_labeled_df.to_csvs('../data/multi_decision_admisions/')

In [37]:
acc_trend = wg.Binary_Accuracy_Trend()
tpr_trend = wg.Binary_TPR_Trend()
ppv_trend = wg.Binary_PPV_Trend()
multi_decision_labeled_df.get_subgroup_trends_1lev([acc_trend,tpr_trend,ppv_trend])

Unnamed: 0,feat1,feat2,subgroup_trend,subgroup,subgroup_trend_strength,group_feat,trend_type,agg_trend,agg_trend_strength,comparison_type
0,decision,decision_noise,0.933333,0,1,department,binary_acc,0.899000,1,aggregate-subgroup
1,decision,decision_noise,0.938356,1,1,department,binary_acc,0.899000,1,aggregate-subgroup
2,decision,decision_noise,0.846939,2,1,department,binary_acc,0.899000,1,aggregate-subgroup
3,decision,decision_noise,0.889439,3,1,department,binary_acc,0.899000,1,aggregate-subgroup
4,decision,decision_noise,0.892421,F,1,gender,binary_acc,0.899000,1,aggregate-subgroup
5,decision,decision_noise,0.903553,M,1,gender,binary_acc,0.899000,1,aggregate-subgroup
6,decision,decision_noise,0.890000,B,1,race,binary_acc,0.899000,1,aggregate-subgroup
7,decision,decision_noise,0.921739,H,1,race,binary_acc,0.899000,1,aggregate-subgroup
8,decision,decision_noise,0.896815,W,1,race,binary_acc,0.899000,1,aggregate-subgroup
9,decision,decision_gender,0.786667,0,1,department,binary_acc,0.848000,1,aggregate-subgroup


In [38]:
multi_decision_labeled_df.get_SP_rows(thresh=.2)

Unnamed: 0,feat1,feat2,subgroup_trend,subgroup,subgroup_trend_strength,group_feat,trend_type,agg_trend,agg_trend_strength,comparison_type,distance,SP_thresh0.2
24,decision,decision_race,0.69,B,1,race,binary_acc,0.867,1,aggregate-subgroup,0.204152,True
46,decision,decision_race,0.666667,1,1,department,binary_tpr,0.876494,1,aggregate-subgroup,0.239394,True
63,decision,decision_gender,0.477273,0,1,department,binary_ppv,0.646018,1,aggregate-subgroup,0.261208,True
64,decision,decision_gender,0.325,1,1,department,binary_ppv,0.646018,1,aggregate-subgroup,0.496918,True
67,decision,decision_gender,0.5,F,1,gender,binary_ppv,0.646018,1,aggregate-subgroup,0.226027,True
73,decision,decision_race,0.413793,1,1,department,binary_ppv,0.68323,1,aggregate-subgroup,0.394357,True
78,decision,decision_race,0.404762,B,1,race,binary_ppv,0.68323,1,aggregate-subgroup,0.407576,True


In [62]:
multi_decision_labeled_df.df.head()

variable,department,gender,decision,race,decision_noise,decision_gender,decision_race,decision_decision_noise_acc,decision_decision_gender_acc,decision_decision_race_acc
0,3,M,1,W,1,1,1,TP,TP,TP
1,3,M,1,H,1,1,1,TP,TP,TP
2,0,F,0,W,0,0,0,TN,TN,TN
3,1,F,0,H,0,1,0,TN,FP,TN
4,3,M,1,W,1,1,1,TP,TP,TP


In [63]:
var_list = list(multi_decision_labeled_df.get_vars_per_type('categorical'))
var_list


['department', 'gender', 'race']

In [78]:
new_name = '_'.join(var_list)

mergerow =  lambda row: '_'.join([str(v) for v in row[var_list].values])
# multi_decision_labeled_df[var_list[0]].str.cat()
multi_decision_labeled_df.df[new_name] = multi_decision_labeled_df.df.apply(mergerow,axis=1)

In [80]:
multi_decision_labeled_df.df.head()

variable,department,gender,decision,race,decision_noise,decision_gender,decision_race,decision_decision_noise_acc,decision_decision_gender_acc,decision_decision_race_acc,department_gender_race
0,3,M,1,W,1,1,1,TP,TP,TP,3_M_W
1,3,M,1,H,1,1,1,TP,TP,TP,3_M_H
2,0,F,0,W,0,0,0,TN,TN,TN,0_F_W
3,1,F,0,H,0,1,0,TN,FP,TN,1_F_H
4,3,M,1,W,1,1,1,TP,TP,TP,3_M_W


In [86]:
multi_decision_labeled_df.result_df.head()

Unnamed: 0,feat1,feat2,subgroup_trend,subgroup,subgroup_trend_strength,group_feat,trend_type,agg_trend,agg_trend_strength,distance,SP_thresh0.2
0,decision,decision_noise,0.88,0,1,department,binary_acc,0.909,1,0.031903,False
1,decision,decision_noise,0.90411,1,1,department,binary_acc,0.909,1,0.00538,False
2,decision,decision_noise,0.918367,2,1,department,binary_acc,0.909,1,0.010305,False
3,decision,decision_noise,0.915842,3,1,department,binary_acc,0.909,1,0.007526,False
4,decision,decision_noise,0.914425,F,1,gender,binary_acc,0.909,1,0.005969,False


In [90]:
view_vars = ['feat1','feat2','group_feat']
score_col = 'subgroup_trend'
multi_decision_labeled_df.result_df.groupby(by=view_vars)[score_col].var()

feat1     feat2            group_feat
decision  decision_gender  department    0.032143
                           gender        0.018801
                           race          0.015756
          decision_noise   department    0.014637
                           gender        0.006785
                           race          0.005288
          decision_race    department    0.021279
                           gender        0.010738
                           race          0.039146
Name: subgroup_trend, dtype: float64

In [95]:
pair_range = lambda x: np.abs(x.max()-x.min())
multi_decision_labeled_df.result_df.groupby(by=view_vars)[score_col].apply(pair_range)

feat1     feat2            group_feat
decision  decision_gender  department    0.624156
                           gender        0.380000
                           race          0.345476
          decision_noise   department    0.377778
                           gender        0.199298
                           race          0.185666
          decision_race    department    0.519444
                           gender        0.271169
                           race          0.584847
Name: subgroup_trend, dtype: float64