# A Classification Audit: ProPublica

In first iteration, this will only work on datasets that already have two or more binary classification variables included.

We will need additional metadata: role options of being predictions or ground truths.


In [1]:
import numpy as np
import pandas as pd
import wiggum as wg


First, we will need a dataset that we can work with

In [2]:
dataset = pd.read_csv('https://github.com/propublica/compas-analysis/raw/master/compas-scores-two-years.csv',
                      header=0).set_index('id')


In [3]:

def clean_compas(df):
    
    # Clean the compas dataset according to the description provided by ProPublica of their analysis. 
    # In the original notebook the authors state:

    # There are a number of reasons remove rows because of missing data:
        
        # If the charge date of a defendants Compas scored crime was not within 30 days from when the person was arrested, 
        # we assume that because of data quality reasons, that we do not have the right offense.

        # We coded the recidivist flag -- `is_recid` -- to be -1 if we could not find a compas case at all.

        # In a similar vein, ordinary traffic offenses -- those with a `c_charge_degree` of 'O' -- will not result in Jail time 
        # are removed (only two of them).
 
        # We filtered the underlying data from Broward county to include only those rows representing people who had either 
        # recidivated in two years, or had at least two years outside of a correctional facility.

    # ix is the index of variables we want to keep.
    # Remove entries with inconsistent arrest information.
    rows_start = len(df)
    ix = df['days_b_screening_arrest'] <= 30
    ix = (df['days_b_screening_arrest'] >= -30) & ix

    # remove entries entries where compas case could not be found.
    ix = (df['is_recid'] != -1) & ix

    # remove traffic offenses.
    ix = (df['c_charge_degree'] != "O") & ix

    # remove entries without available text scores.
    ix = (df['score_text'] != 'N/A') & ix

    # trim dataset
    df = df.loc[ix,:]

    # create new attribute "length of stay" with total jail time.
    df['length_of_stay'] = (pd.to_datetime(df['c_jail_out'])-pd.to_datetime(df['c_jail_in'])).apply(lambda x: x.days)

    # print number of rows
    print('Number of rows removed: '+str(rows_start - len(df)))
    # print list of features again
    print('Features: '+str(list(df)))
    return df

dataset = clean_compas(dataset)

Number of rows removed: 1042
Features: ['name', 'first', 'last', 'compas_screening_date', 'sex', 'dob', 'age', 'age_cat', 'race', 'juv_fel_count', 'decile_score', 'juv_misd_count', 'juv_other_count', 'priors_count', 'days_b_screening_arrest', 'c_jail_in', 'c_jail_out', 'c_case_number', 'c_offense_date', 'c_arrest_date', 'c_days_from_compas', 'c_charge_degree', 'c_charge_desc', 'is_recid', 'r_case_number', 'r_charge_degree', 'r_days_from_arrest', 'r_offense_date', 'r_charge_desc', 'r_jail_in', 'r_jail_out', 'violent_recid', 'is_violent_recid', 'vr_case_number', 'vr_charge_degree', 'vr_offense_date', 'vr_charge_desc', 'type_of_assessment', 'decile_score.1', 'score_text', 'screening_date', 'v_type_of_assessment', 'v_decile_score', 'v_score_text', 'v_screening_date', 'in_custody', 'out_custody', 'priors_count.1', 'start', 'end', 'event', 'two_year_recid', 'length_of_stay']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [4]:
def corels_rule(row):
    return int(row['priors_count'] > 3 or (row['priors_count'] <=3 and row['age'] <=22))

dataset['corels'] = dataset.apply(corels_rule,axis=1)

In [5]:
dataset['decile_score_pred6'] = [int(ds>=6) for ds in dataset['decile_score']]
dataset['decile_score_pred7'] = [int(ds>=7) for ds in dataset['decile_score']]
dataset['decile_score_pred8'] = [int(ds>=8) for ds in dataset['decile_score']]
labeled_df = wg.LabeledDataFrame(dataset)

In [6]:
labeled_df.df.head()

variable,name,first,last,compas_screening_date,sex,dob,age,age_cat,race,juv_fel_count,...,priors_count.1,start,end,event,two_year_recid,length_of_stay,corels,decile_score_pred6,decile_score_pred7,decile_score_pred8
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,miguel hernandez,miguel,hernandez,2013-08-14,Male,1947-04-18,69,Greater than 45,Other,0,...,0,0,327,0,0,0,0,0,0,0
3,kevon dixon,kevon,dixon,2013-01-27,Male,1982-01-22,34,25 - 45,African-American,0,...,0,9,159,1,1,10,0,0,0,0
4,ed philo,ed,philo,2013-04-14,Male,1991-05-14,24,Less than 25,African-American,0,...,4,0,63,0,1,1,1,0,0,0
7,marsha miles,marsha,miles,2013-11-30,Male,1971-08-22,44,25 - 45,Other,0,...,0,1,853,0,0,1,0,0,0,0
8,edward riddle,edward,riddle,2014-02-19,Male,1974-07-23,41,25 - 45,Caucasian,0,...,14,5,40,1,1,6,1,1,0,0


To test, we need create additional decision columns, to make it interesting, we will make them correlated with the true decision and the race, gender or dept.  

We will start by modifying only the dataframe.  We will add the metadata and make a new labeled df after. 

In [7]:
labeled_df.infer_var_types()
labeled_df.df.columns

Index(['name', 'first', 'last', 'compas_screening_date', 'sex', 'dob', 'age',
       'age_cat', 'race', 'juv_fel_count', 'decile_score', 'juv_misd_count',
       'juv_other_count', 'priors_count', 'days_b_screening_arrest',
       'c_jail_in', 'c_jail_out', 'c_case_number', 'c_offense_date',
       'c_arrest_date', 'c_days_from_compas', 'c_charge_degree',
       'c_charge_desc', 'is_recid', 'r_case_number', 'r_charge_degree',
       'r_days_from_arrest', 'r_offense_date', 'r_charge_desc', 'r_jail_in',
       'r_jail_out', 'violent_recid', 'is_violent_recid', 'vr_case_number',
       'vr_charge_degree', 'vr_offense_date', 'vr_charge_desc',
       'type_of_assessment', 'decile_score.1', 'score_text', 'screening_date',
       'v_type_of_assessment', 'v_decile_score', 'v_score_text',
       'v_screening_date', 'in_custody', 'out_custody', 'priors_count.1',
       'start', 'end', 'event', 'two_year_recid', 'length_of_stay', 'corels',
       'decile_score_pred6', 'decile_score_pred7', 'decil

In [8]:
labeled_df.df.dtypes

variable
name                        object
first                       object
last                        object
compas_screening_date       object
sex                         object
dob                         object
age                          int64
age_cat                     object
race                        object
juv_fel_count                int64
decile_score                 int64
juv_misd_count               int64
juv_other_count              int64
priors_count                 int64
days_b_screening_arrest    float64
c_jail_in                   object
c_jail_out                  object
c_case_number               object
c_offense_date              object
c_arrest_date               object
c_days_from_compas         float64
c_charge_degree             object
c_charge_desc               object
is_recid                     int64
r_case_number               object
r_charge_degree             object
r_days_from_arrest         float64
r_offense_date              object
r_charge_de

In [9]:
labeled_df.df.head()

variable,name,first,last,compas_screening_date,sex,dob,age,age_cat,race,juv_fel_count,...,priors_count.1,start,end,event,two_year_recid,length_of_stay,corels,decile_score_pred6,decile_score_pred7,decile_score_pred8
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,miguel hernandez,miguel,hernandez,2013-08-14,Male,1947-04-18,69,Greater than 45,Other,0,...,0,0,327,0,0,0,0,0,0,0
3,kevon dixon,kevon,dixon,2013-01-27,Male,1982-01-22,34,25 - 45,African-American,0,...,0,9,159,1,1,10,0,0,0,0
4,ed philo,ed,philo,2013-04-14,Male,1991-05-14,24,Less than 25,African-American,0,...,4,0,63,0,1,1,1,0,0,0
7,marsha miles,marsha,miles,2013-11-30,Male,1971-08-22,44,25 - 45,Other,0,...,0,1,853,0,0,1,0,0,0,0
8,edward riddle,edward,riddle,2014-02-19,Male,1974-07-23,41,25 - 45,Caucasian,0,...,14,5,40,1,1,6,1,1,0,0


In [10]:

roles = {'name':'ignore', 'first':'ignore', 'last':'ignore', 'compas_screening_date':'ignore', 
         'sex':['groupby', 'trend'], 'dob':'ignore', 'age':'trend',
       'age_cat':['groupby', 'trend'], 'race':['groupby', 'trend'], 'juv_fel_count':'ignore', 
         'decile_score':'trend', 'juv_misd_count':'ignore',
       'juv_other_count': 'ignore', 'priors_count': 'trend', 'days_b_screening_arrest':'ignore',
       'c_jail_in':'ignore', 'c_jail_out':'ignore', 'c_case_number':'ignore', 'c_offense_date':'ignore',
       'c_arrest_date':'ignore', 'c_days_from_compas':'ignore', 'c_charge_degree':'ignore',
       'c_charge_desc':'ignore', 'is_recid':'ignore', 'r_case_number':'ignore', 'r_charge_degree':'ignore',
       'r_days_from_arrest':'ignore', 'r_offense_date':'ignore', 'r_charge_desc':'ignore', 'r_jail_in':'ignore',
       'r_jail_out':'ignore', 'violent_recid':'ignore', 'is_violent_recid':'ignore',
         'vr_case_number':'ignore',
       'vr_charge_degree':'ignore', 'vr_offense_date':'ignore', 'vr_charge_desc':'ignore',
       'type_of_assessment':'groupby', 'decile_score.1':'trend', 'score_text':'groupby', 'screening_date':'ignore',
       'v_type_of_assessment':'groupby', 'v_decile_score':'trend', 'v_score_text':'ignore',
       'v_screening_date':'ignore', 'in_custody':'ignore', 'out_custody':'ignore', 'priors_count.1':'ignore',
       'start':'ignore', 'end':'ignore', 'event':'ignore', 'two_year_recid':['trend','groundtruth'],
        'decile_score_pred6':['trend','prediction'],'decile_score_pred7':['trend','prediction'],
         'decile_score_pred8':['trend','prediction'],'corels':['trend','prediction'],'length_of_stay':'ignore'}
var_types = {'sex':'categorical','age':'continuous'}
# is_count = {c:False for c in test_df.columns}


labeled_df.set_roles(roles)
labeled_df.set_var_types(var_types)
labeled_df.meta_df

Unnamed: 0_level_0,dtype,var_type,role,isCount,weighting_var
variable,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
name,object,categorical,ignore,,
first,object,categorical,ignore,,
last,object,categorical,ignore,,
compas_screening_date,object,categorical,ignore,,
sex,object,categorical,"[groupby, trend]",,
dob,object,categorical,ignore,,
age,int64,continuous,trend,,
age_cat,object,categorical,"[groupby, trend]",,
race,object,categorical,"[groupby, trend]",,
juv_fel_count,int64,ordinal,ignore,,


In [11]:
labeled_df.to_csvs('../data/propublica_multi/')

In [12]:
labeled_df.add_intersectional(var_list = ['race','sex','age_cat'])

variable,name,first,last,compas_screening_date,sex,dob,age,age_cat,race,juv_fel_count,...,event,two_year_recid,length_of_stay,corels,decile_score_pred6,decile_score_pred7,decile_score_pred8,race_sex,race_age_cat,sex_age_cat
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,miguel hernandez,miguel,hernandez,2013-08-14,Male,1947-04-18,69,Greater than 45,Other,0,...,0,0,0,0,0,0,0,Other_Male,Other_Greater than 45,Male_Greater than 45
3,kevon dixon,kevon,dixon,2013-01-27,Male,1982-01-22,34,25 - 45,African-American,0,...,1,1,10,0,0,0,0,African-American_Male,African-American_25 - 45,Male_25 - 45
4,ed philo,ed,philo,2013-04-14,Male,1991-05-14,24,Less than 25,African-American,0,...,0,1,1,1,0,0,0,African-American_Male,African-American_Less than 25,Male_Less than 25
7,marsha miles,marsha,miles,2013-11-30,Male,1971-08-22,44,25 - 45,Other,0,...,0,0,1,0,0,0,0,Other_Male,Other_25 - 45,Male_25 - 45
8,edward riddle,edward,riddle,2014-02-19,Male,1974-07-23,41,25 - 45,Caucasian,0,...,1,1,6,1,1,0,0,Caucasian_Male,Caucasian_25 - 45,Male_25 - 45
9,steven stewart,steven,stewart,2013-08-30,Male,1973-02-25,43,25 - 45,Other,0,...,0,0,0,0,0,0,0,Other_Male,Other_25 - 45,Male_25 - 45
10,elizabeth thieme,elizabeth,thieme,2014-03-16,Female,1976-06-03,39,25 - 45,Caucasian,0,...,0,0,2,0,0,0,0,Caucasian_Female,Caucasian_25 - 45,Female_25 - 45
14,benjamin franc,benjamin,franc,2013-11-26,Male,1988-06-01,27,25 - 45,Caucasian,0,...,0,0,1,0,0,0,0,Caucasian_Male,Caucasian_25 - 45,Male_25 - 45
15,ellyaher lanza,ellyaher,lanza,2013-10-03,Male,1992-08-18,23,Less than 25,African-American,0,...,1,1,4,0,1,0,0,African-American_Male,African-American_Less than 25,Male_Less than 25
16,kortney coleman,kortney,coleman,2013-01-01,Female,1978-08-22,37,25 - 45,Caucasian,0,...,0,0,0,0,0,0,0,Caucasian_Female,Caucasian_25 - 45,Female_25 - 45


In [13]:
acc_trend = wg.Binary_Accuracy_Trend()
tpr_trend = wg.Binary_TPR_Trend()
ppv_trend = wg.Binary_PPV_Trend()
tnr_trend = wg.Binary_TNR_Trend()
fdr_trend = wg.Binary_FDR_Trend()
fnr_trend = wg.Binary_FNR_Trend()
err_trend = wg.Binary_Error_Trend()
f1_trend = wg.Binary_F1_Trend()
labeled_df.get_pairwise_trends_1lev([acc_trend,tpr_trend,ppv_trend,
                                    tnr_trend,fdr_trend,f1_trend,
                                    fnr_trend,err_trend],replace=True)


  'ppv': lambda c: c['TP']/(c['TP'] + c['FP']),
  'tnr': lambda c: c['TN']/(c['TN'] + c['FP']),
  'fdr': lambda c: c['FP']/(c['TP'] + c['FP']),


Index(['feat1', 'feat2', 'subgroup_trend', 'subgroup',
       'subgroup_trend_strength', 'group_feat', 'trend_type'],
      dtype='object')
Index(['feat1', 'feat2', 'group_feat', 'subgroup', 'subgroup_trend',
       'subgroup_trend_strength', 'trend_type'],
      dtype='object')
1664
18048
Index(['feat1', 'feat2', 'group_feat', 'subgroup', 'subgroup_trend',
       'subgroup_trend_strength', 'trend_type', 'subgroup2', 'subgroup_trend2',
       'subgroup_trend_strength2'],
      dtype='object')
8192
7870


Unnamed: 0,feat1,feat2,group_feat,subgroup,subgroup_trend,subgroup_trend_strength,trend_type,subgroup2,subgroup_trend2,subgroup_trend_strength2,comparison_type
1,two_year_recid,corels,sex,Female,0.677447,0.674271,binary_acc,Male,0.660997,0.729635,pairwise
5,two_year_recid,decile_score_pred6,sex,Female,0.683404,0.674271,binary_acc,Male,0.659796,0.729635,pairwise
9,two_year_recid,decile_score_pred7,sex,Female,0.688511,0.674271,binary_acc,Male,0.641185,0.729635,pairwise
13,two_year_recid,decile_score_pred8,sex,Female,0.686809,0.674271,binary_acc,Male,0.621573,0.729635,pairwise
17,two_year_recid,corels,age_cat,25 - 45,0.672707,0.718153,binary_acc,Greater than 45,0.709977,0.678622,pairwise
18,two_year_recid,corels,age_cat,25 - 45,0.672707,0.718153,binary_acc,Less than 25,0.597624,0.680447,pairwise
21,two_year_recid,corels,age_cat,Greater than 45,0.709977,0.678622,binary_acc,Less than 25,0.597624,0.680447,pairwise
26,two_year_recid,decile_score_pred6,age_cat,25 - 45,0.664213,0.718153,binary_acc,Greater than 45,0.731632,0.678622,pairwise
27,two_year_recid,decile_score_pred6,age_cat,25 - 45,0.664213,0.718153,binary_acc,Less than 25,0.599852,0.680447,pairwise
30,two_year_recid,decile_score_pred6,age_cat,Greater than 45,0.731632,0.678622,binary_acc,Less than 25,0.599852,0.680447,pairwise


In [14]:
len(labeled_df.result_df)

7870

In [21]:
labeled_df.get_trend_rows(trend_type='binary_tnr')

848  total rows meet the criteria


Unnamed: 0,feat1,feat2,group_feat,subgroup,subgroup_trend,subgroup_trend_strength,trend_type,subgroup2,subgroup_trend2,subgroup_trend_strength2,comparison_type,distance
6769,two_year_recid,corels,sex,Female,0.795276,0.674271,binary_tnr,Male,0.716263,0.729635,pairwise,0.079013
6773,two_year_recid,decile_score_pred6,sex,Female,0.797900,0.674271,binary_tnr,Male,0.783929,0.729635,pairwise,0.013971
6777,two_year_recid,decile_score_pred7,sex,Female,0.876640,0.674271,binary_tnr,Male,0.845829,0.729635,pairwise,0.030812
6781,two_year_recid,decile_score_pred8,sex,Female,0.930446,0.674271,binary_tnr,Male,0.906190,0.729635,pairwise,0.024256
6785,two_year_recid,corels,age_cat,25 - 45,0.790058,0.718153,binary_tnr,Greater than 45,0.796359,0.678622,pairwise,0.006301
6786,two_year_recid,corels,age_cat,25 - 45,0.790058,0.718153,binary_tnr,Less than 25,0.463744,0.680447,pairwise,0.326314
6789,two_year_recid,corels,age_cat,Greater than 45,0.796359,0.678622,binary_tnr,Less than 25,0.463744,0.680447,pairwise,0.332616
6794,two_year_recid,decile_score_pred6,age_cat,25 - 45,0.788472,0.718153,binary_tnr,Greater than 45,0.913538,0.678622,pairwise,0.125066
6795,two_year_recid,decile_score_pred6,age_cat,25 - 45,0.788472,0.718153,binary_tnr,Less than 25,0.595278,0.680447,pairwise,0.193193
6798,two_year_recid,decile_score_pred6,age_cat,Greater than 45,0.913538,0.678622,binary_tnr,Less than 25,0.595278,0.680447,pairwise,0.318260


In [15]:
labeled_df.get_trend_rows('two_year_recid','decile_score_pred6','race')

120  total rows meet the criteria


Unnamed: 0,feat1,feat2,group_feat,subgroup,subgroup_trend,subgroup_trend_strength,trend_type,subgroup2,subgroup_trend2,subgroup_trend_strength2,comparison_type
89,two_year_recid,decile_score_pred6,race,African-American,0.651339,0.714428,binary_acc,Asian,0.806452,0.329472,pairwise
90,two_year_recid,decile_score_pred6,race,African-American,0.651339,0.714428,binary_acc,Caucasian,0.680456,0.699053,pairwise
91,two_year_recid,decile_score_pred6,race,African-American,0.651339,0.714428,binary_acc,Hispanic,0.660118,0.630549,pairwise
92,two_year_recid,decile_score_pred6,race,African-American,0.651339,0.714428,binary_acc,Native American,0.727273,0.039747,pairwise
93,two_year_recid,decile_score_pred6,race,African-American,0.651339,0.714428,binary_acc,Other,0.676385,0.605568,pairwise
96,two_year_recid,decile_score_pred6,race,Asian,0.806452,0.329472,binary_acc,Caucasian,0.680456,0.699053,pairwise
97,two_year_recid,decile_score_pred6,race,Asian,0.806452,0.329472,binary_acc,Hispanic,0.660118,0.630549,pairwise
98,two_year_recid,decile_score_pred6,race,Asian,0.806452,0.329472,binary_acc,Native American,0.727273,0.039747,pairwise
99,two_year_recid,decile_score_pred6,race,Asian,0.806452,0.329472,binary_acc,Other,0.676385,0.605568,pairwise
103,two_year_recid,decile_score_pred6,race,Caucasian,0.680456,0.699053,binary_acc,Hispanic,0.660118,0.630549,pairwise


In [16]:
labeled_df.add_distance('subgroup','subgroup2',True)

In [24]:
labeled_df.get_trend_rows(subgroup='African-American',subgroup2='Caucasian',trend_type='binary_tnr').sort_values(by='distance',ascending=False)

4  total rows meet the criteria


Unnamed: 0,feat1,feat2,group_feat,subgroup,subgroup_trend,subgroup_trend_strength,trend_type,subgroup2,subgroup_trend2,subgroup_trend_strength2,comparison_type,distance
6858,two_year_recid,decile_score_pred6,race,African-American,0.685601,0.714428,binary_tnr,Caucasian,0.864949,0.699053,pairwise,0.179348
6822,two_year_recid,corels,race,African-American,0.638705,0.714428,binary_tnr,Caucasian,0.803279,0.699053,pairwise,0.164573
6894,two_year_recid,decile_score_pred7,race,African-American,0.772127,0.714428,binary_tnr,Caucasian,0.917252,0.699053,pairwise,0.145125
6930,two_year_recid,decile_score_pred8,race,African-American,0.860634,0.714428,binary_tnr,Caucasian,0.952381,0.699053,pairwise,0.091747


In [18]:
labeled_df.get_trend_rows(subgroup='Caucasian')

96  total rows meet the criteria


Unnamed: 0,feat1,feat2,group_feat,subgroup,subgroup_trend,subgroup_trend_strength,trend_type,subgroup2,subgroup_trend2,subgroup_trend_strength2,comparison_type,distance
67,two_year_recid,corels,race,Caucasian,0.667618,0.699053,binary_acc,Hispanic,0.689587,0.630549,pairwise,0.021970
68,two_year_recid,corels,race,Caucasian,0.667618,0.699053,binary_acc,Native American,0.909091,0.039747,pairwise,0.241473
69,two_year_recid,corels,race,Caucasian,0.667618,0.699053,binary_acc,Other,0.725948,0.605568,pairwise,0.058330
103,two_year_recid,decile_score_pred6,race,Caucasian,0.680456,0.699053,binary_acc,Hispanic,0.660118,0.630549,pairwise,0.020339
104,two_year_recid,decile_score_pred6,race,Caucasian,0.680456,0.699053,binary_acc,Native American,0.727273,0.039747,pairwise,0.046816
105,two_year_recid,decile_score_pred6,race,Caucasian,0.680456,0.699053,binary_acc,Other,0.676385,0.605568,pairwise,0.004072
139,two_year_recid,decile_score_pred7,race,Caucasian,0.668093,0.699053,binary_acc,Hispanic,0.642436,0.630549,pairwise,0.025657
140,two_year_recid,decile_score_pred7,race,Caucasian,0.668093,0.699053,binary_acc,Native American,0.727273,0.039747,pairwise,0.059180
141,two_year_recid,decile_score_pred7,race,Caucasian,0.668093,0.699053,binary_acc,Other,0.688047,0.605568,pairwise,0.019953
175,two_year_recid,decile_score_pred8,race,Caucasian,0.657156,0.699053,binary_acc,Hispanic,0.642436,0.630549,pairwise,0.014720


In [19]:
labeled_df.df['two_year_recid_decile_score_pred6_acc'].value_counts()

TN    2647
TP    1453
FN    1356
FP     716
Name: two_year_recid_decile_score_pred6_acc, dtype: int64