# Developing a classification Audit trend

In first iteration, this will only work on datasets that already have two or more binary classification variables included.

We will need additional metadata: role options of being predictions or ground truths.


In [1]:
import numpy as np
import pandas as pd
import wiggum as wg


First, we will need a dataset that we can work with

In [2]:
dataset = '../data/rateSPdataDeptRace/'
labeled_df = wg.LabeledDataFrame(dataset)

In [3]:
labeled_df.df.head()

Unnamed: 0,department,gender,decision,race
0,3,M,1,W
1,3,M,1,H
2,0,F,0,W
3,1,F,0,H
4,3,M,1,W


To test, we need create additional decision columns, to make it interesting, we will make them correlated with the true decision and the race, gender or dept.  

We will start by modifying only the dataframe.  We will add the metadata and make a new labeled df after. 

In [4]:
test_df = labeled_df.df

In [5]:
def add_decision_rand(row,p_flip):
    """
    a function that can be applied to a df using apply that flips 'decision' with prob p, indep
    """
    cur = row['decision']
    opp = int(not(cur))
    return np.random.choice([cur, opp],p=[p_flip, 1-p_flip])
    
    

In [6]:
decision_90 =lambda r: add_decision_rand(r,.9)
cur_new = 'decision_noise'
test_df[cur_new] = test_df.apply(decision_90,axis=1)

In [7]:
test_df.head()

Unnamed: 0,department,gender,decision,race,decision_noise
0,3,M,1,W,1
1,3,M,1,H,1
2,0,F,0,W,0
3,1,F,0,H,0
4,3,M,1,W,1


In [8]:
test_df.groupby(['gender','department'])['decision'].mean().unstack()

department,0,1,2,3
gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
F,0.228261,0.142857,0.35,0.29661
M,0.155172,0.037037,0.166667,0.280738


In [9]:
test_df.groupby(['gender','department'])[cur_new].mean().unstack()

department,0,1,2,3
gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
F,0.26087,0.218487,0.375,0.322034
M,0.275862,0.148148,0.277778,0.338115


In [10]:
test_df.groupby('gender')['decision'].mean()

gender
F    0.246944
M    0.253807
Name: decision, dtype: float64

In [11]:
test_df.groupby('gender')[cur_new].mean()

gender
F    0.288509
M    0.321489
Name: decision_noise, dtype: float64

In [12]:
def add_acc(row,true_col,pred_col):
    label_mat = [['TN','FP'],['FN','TP']]
    return label_mat[row[true_col]][row[pred_col]]

In [13]:
add_acc_cur = lambda row: add_acc(row,'decision',cur_new)
test_df[cur_new+'test'] = test_df.apply(add_acc_cur,axis=1)

In [14]:
test_df.head()

Unnamed: 0,department,gender,decision,race,decision_noise,decision_noisetest
0,3,M,1,W,1,TP
1,3,M,1,H,1,TP
2,0,F,0,W,0,TN
3,1,F,0,H,0,TN
4,3,M,1,W,1,TP


In [15]:
confusion = test_df['decision_noisetest'].value_counts()
confusion

TN    675
TP    234
FP     74
FN     17
Name: decision_noisetest, dtype: int64

In [16]:
def compute_conf_stats(df,col):
    confusion = df[col].value_counts()
    return (confusion['TP'] + confusion['TN'])/sum(confusion)

In [17]:
(confusion['TP'] + confusion['TN'])/sum(confusion)

0.909

In [18]:
compute_conf_stats(test_df,'decision_noisetest')

0.909

In [19]:
for l,gdf in test_df.groupby('gender'):
    print(compute_conf_stats(gdf,'decision_noisetest'))

0.9144254278728606
0.9052453468697124


In [20]:
confusion = test_df.groupby('gender')['decision_noisetest'].value_counts()
confusion

gender  decision_noisetest
F       TN                    282
        TP                     92
        FP                     26
        FN                      9
M       TN                    393
        TP                    142
        FP                     48
        FN                      8
Name: decision_noisetest, dtype: int64

In [21]:
(confusion['F','TP'] + confusion['F','TN'])/sum(confusion['F'])

0.9144254278728606

In [22]:
(confusion['M','TP'] + confusion['M','TN'])/sum(confusion['M'])

0.9052453468697124

Now, we make it correlated with somthing

In [23]:
def add_decision_corr(row,corr_col,p_flips):
    """
    a function that can be applied to a df using apply that flips 'decision' with prob p, indep
    """
    cur = row['decision']
    opp = int(not(cur))
    p_cur = p_flips[row[corr_col]]
    return np.random.choice([cur, opp],p=[p_cur, 1-p_cur])
    
    

In [24]:
p_gender_corr = {'F':.75,'M':.9}
decision_gender =lambda r: add_decision_corr(r,'gender',p_gender_corr)
cur_new = 'decision_gender'
test_df[cur_new] = test_df.apply(decision_gender,axis=1)

In [25]:
add_acc_cur = lambda row: add_acc(row,'decision',cur_new)
test_df[cur_new+'test'] = test_df.apply(add_acc_cur,axis=1)

In [26]:
confusion = test_df.groupby('gender')[cur_new +'test'].value_counts()
confusion

gender  decision_gendertest
F       TN                     237
        TP                      79
        FP                      71
        FN                      22
M       TN                     386
        TP                     136
        FP                      55
        FN                      14
Name: decision_gendertest, dtype: int64

In [27]:
(confusion['F','TP'] + confusion['F','TN'])/sum(confusion['F'])

0.7726161369193154

In [28]:
(confusion['M','TP'] + confusion['M','TN'])/sum(confusion['M'])

0.883248730964467

In [29]:
test_df.groupby(['department','gender'])[cur_new +'test'].value_counts()


department  gender  decision_gendertest
0           F       TN                      54
                    FP                      17
                    TP                      16
                    FN                       5
            M       TN                      43
                    TP                       7
                    FP                       6
                    FN                       2
1           F       TN                      75
                    FP                      27
                    TP                      11
                    FN                       6
            M       TN                      22
                    FP                       4
                    TP                       1
2           F       TN                      38
                    TP                      25
                    FP                      14
                    FN                       3
            M       TN                      13
                    

In [30]:
p_race_corr = {'B':.65,'W':.9,'H':.8}
decision_race =lambda r: add_decision_corr(r,'race',p_race_corr)
cur_new = 'decision_race'
test_df[cur_new] = test_df.apply(decision_race,axis=1)
test_df.head()

Unnamed: 0,department,gender,decision,race,decision_noise,decision_noisetest,decision_gender,decision_gendertest,decision_race
0,3,M,1,W,1,TP,1,TP,1
1,3,M,1,H,1,TP,1,TP,1
2,0,F,0,W,0,TN,0,TN,0
3,1,F,0,H,0,TN,1,FP,0
4,3,M,1,W,1,TP,1,TP,1


In [31]:
test_df.drop(columns=['decision_noisetest','decision_gendertest'],inplace=True)

In [32]:
labeled_df.meta_df

Unnamed: 0_level_0,dtype,var_type,role,isCount,weighting_var
variable,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
department,int64,categorical,"[groupby, trend]",,
gender,object,categorical,"[groupby, trend]",,
decision,int64,binary,[trend],,
race,object,categorical,"[groupby, trend]",,


In [33]:
multi_decision_labeled_df = wg.LabeledDataFrame(test_df)


roles = {'department':['groupby', 'trend'],'gender':['groupby', 'trend'],'race':['groupby', 'trend'],
         'decision_noise':['trend','prediction'],'decision':['trend','groundtruth'],
         'decision_gender':['trend','prediction'],'decision_race':['trend','prediction']}
var_types = {'department':'categorical','gender':'categorical'}
# is_count = {c:False for c in test_df.columns}

multi_decision_labeled_df.infer_var_types()
multi_decision_labeled_df.set_roles(roles)
multi_decision_labeled_df.set_var_types(var_types)

In [34]:
multi_decision_labeled_df.meta_df

Unnamed: 0_level_0,dtype,var_type,role,isCount,weighting_var
variable,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
department,int64,categorical,"[groupby, trend]",,
gender,object,categorical,"[groupby, trend]",,
decision,int64,binary,"[trend, groundtruth]",,
race,object,categorical,"[groupby, trend]",,
decision_noise,int64,binary,"[trend, prediction]",,
decision_gender,int64,binary,"[trend, prediction]",,
decision_race,int64,binary,"[trend, prediction]",,


In [38]:
multi_decision_labeled_df.to_csvs('../data/multi_decision_admisions/')

In [35]:
acc_trend = wg.Binary_Accuracy_Trend()
tpr_trend = wg.Binary_TPR_Trend()
ppv_trend = wg.Binary_PPV_Trend()
multi_decision_labeled_df.get_subgroup_trends_1lev([acc_trend,tpr_trend,ppv_trend])

adding decision_decision_noise_acc
adding decision_decision_gender_acc
adding decision_decision_race_acc
adding decision_decision_noise_acc
adding decision_decision_gender_acc
adding decision_decision_race_acc
adding decision_decision_noise_acc
adding decision_decision_gender_acc
adding decision_decision_race_acc


Unnamed: 0,feat1,feat2,subgroup_trend,subgroup,subgroup_trend_strength,group_feat,trend_type,agg_trend,agg_trend_strength
0,decision,decision_noise,0.880000,0,1,department,binary_acc,0.909000,1
1,decision,decision_noise,0.904110,1,1,department,binary_acc,0.909000,1
2,decision,decision_noise,0.918367,2,1,department,binary_acc,0.909000,1
3,decision,decision_noise,0.915842,3,1,department,binary_acc,0.909000,1
4,decision,decision_noise,0.914425,F,1,gender,binary_acc,0.909000,1
5,decision,decision_noise,0.905245,M,1,gender,binary_acc,0.909000,1
6,decision,decision_noise,0.920000,B,1,race,binary_acc,0.909000,1
7,decision,decision_noise,0.913043,H,1,race,binary_acc,0.909000,1
8,decision,decision_noise,0.907006,W,1,race,binary_acc,0.909000,1
9,decision,decision_gender,0.800000,0,1,department,binary_acc,0.838000,1


In [36]:
multi_decision_labeled_df.get_SP_rows(thresh=.2)

Unnamed: 0,feat1,feat2,subgroup_trend,subgroup,subgroup_trend_strength,group_feat,trend_type,agg_trend,agg_trend_strength,distance,SP_thresh0.2
24,decision,decision_race,0.63,B,1,race,binary_acc,0.858,1,0.265734,True
37,decision,decision_gender,0.666667,1,1,department,binary_tpr,0.856574,1,0.221705,True
51,decision,decision_race,0.521739,B,1,race,binary_tpr,0.848606,1,0.385181,True
55,decision,decision_noise,0.566667,1,1,department,binary_ppv,0.75974,1,0.254131,True
63,decision,decision_gender,0.5,0,1,department,binary_ppv,0.630499,1,0.206977,True
64,decision,decision_gender,0.27907,1,1,department,binary_ppv,0.630499,1,0.557382,True
73,decision,decision_race,0.425,1,1,department,binary_ppv,0.671924,1,0.367488,True
78,decision,decision_race,0.315789,B,1,race,binary_ppv,0.671924,1,0.530022,True
79,decision,decision_race,0.521739,H,1,race,binary_ppv,0.671924,1,0.223515,True
