In [1]:
%load_ext autoreload
%autoreload 2
import os
import sys
import random
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
# src files
from situation_testing.situation_testing import SituationTesting
# plot params
plt.style.use('seaborn-whitegrid')
plt.rc('font', size=11)
plt.rc('legend', fontsize=11)
plt.rc('lines', linewidth=2)
plt.rc('axes', linewidth=2)
plt.rc('axes', edgecolor='k')
plt.rc('xtick.major', width=2)
plt.rc('xtick.major', size=6)
plt.rc('ytick.major', width=2)
plt.rc('ytick.major', size=6)
plt.rc('pdf', fonttype=42)
plt.rc('ps', fonttype=42)

In [2]:
# working directory
wd = os.path.dirname(os.getcwd())
# relevant folders
data_path = os.path.abspath(os.path.join(wd, 'data')) + '\\'
resu_path = os.path.abspath(os.path.join(wd, 'results')) + '\\'

In [3]:
# load and modify factual data accordingly (same for all SCFs versions)
org_df = pd.read_csv(data_path + 'clean_LawSchool.csv', sep='|').reset_index(drop=True)
print(org_df.shape)
print(org_df.columns.to_list())
org_df.head(5)

(21790, 7)
['race', 'sex', 'LSAT', 'UGPA', 'ZFYA', 'race_nonwhite', 'race_simpler']


Unnamed: 0,race,sex,LSAT,UGPA,ZFYA,race_nonwhite,race_simpler
0,White,Female,39.0,3.1,-0.98,White,White
1,White,Female,36.0,3.0,0.09,White,White
2,White,Male,30.0,3.1,-0.35,White,White
3,Hispanic,Male,39.0,2.2,0.58,NonWhite,Latino
4,White,Female,37.0,3.4,-1.26,White,White


In [4]:
# we focus on sex and race_nonwhite
df = org_df[['sex', 'race_nonwhite', 'LSAT', 'UGPA']].copy()
# df['sex'] = df['sex'].map({'Male': 0, 'Female': 1})
# df['race_nonwhite'] = df['race_nonwhite'].map({'White': 0, 'NonWhite': 1})
df.rename(columns={'sex': 'Gender', 'race_nonwhite': 'Race'}, inplace=True)
df.head(5)

Unnamed: 0,Gender,Race,LSAT,UGPA
0,Female,White,39.0,3.1
1,Female,White,36.0,3.0
2,Male,White,30.0,3.1
3,Male,NonWhite,39.0,2.2
4,Female,White,37.0,3.4


### A 'known' decision maker

To frame it as a (discrete) decision making process, we include an *admissions officer* based on [the "known" requirements of US Law Schools](https://schools.lawschoolnumbers.com/). We assume the case for Yale School. We could not find a one-to-one converter between LSAT over 48 to over 180. 173/180 is 96%; it would be abloud 46.1/48. Let's assume Yale cuts at these median values, and puts a slightly higher weight on UGPA over LSAT.

In [5]:
# Our decision maker:
b1 = 0.6
b2 = 0.4
min_score = round(b1*3.93 + b2*46.1, 2)  # 20.8
max_score = round(b1*4.00 + b2*48.00)    # 22

In [6]:
# add the decision maker
df['Score'] = b1*df['UGPA'] + b2*df['LSAT']
df['Y'] = np.where(df['Score'] >= min_score, 1, 0)
df.head(5)

Unnamed: 0,Gender,Race,LSAT,UGPA,Score,Y
0,Female,White,39.0,3.1,17.46,0
1,Female,White,36.0,3.0,16.2,0
2,Male,White,30.0,3.1,13.86,0
3,Male,NonWhite,39.0,2.2,16.92,0
4,Female,White,37.0,3.4,16.84,0


### $|\mathbf{A}=1|$: Gender

We focus on the Level 3 SCFs.

In [7]:
do = 'Male'
org_cf_df = pd.read_csv(data_path + '\\counterfactuals\\' + f'cf_LawSchool_lev3_do{do}.csv', sep='|').reset_index(drop=True)
print(org_cf_df.shape)
print(org_cf_df.columns.to_list())
org_cf_df.head(5)

(21790, 8)
['female', 'nonwhite', 'Sex', 'Race', 'resid_LSAT', 'resid_UGPA', 'scf_LSAT', 'scf_UGPA']


Unnamed: 0,female,nonwhite,Sex,Race,resid_LSAT,resid_UGPA,scf_LSAT,scf_UGPA
0,0,0,Female,White,1.823899,-0.232222,39.609,2.975
1,0,0,Female,White,-1.176101,-0.332222,36.609,2.875
2,0,0,Male,White,-7.784813,-0.107057,30.0,3.1
3,0,1,Male,NonWhite,5.853746,-0.788069,39.0,2.2
4,0,0,Female,White,-0.176101,0.067778,37.609,3.275


In [8]:
cf_df = org_cf_df[['Sex', 'Race', 'scf_LSAT', 'scf_UGPA']].copy()
# cf_df['Sex'] = cf_df['Sex'].map({'Male': 0, 'Female': 1})
# cf_df['Race'] = cf_df['Race'].map({'White': 0, 'NonWhite': 1})
cf_df = cf_df.rename(columns={'Sex': 'Gender', 'scf_LSAT': 'LSAT', 'scf_UGPA': 'UGPA'})

# add the decision maker
cf_df['Score'] = b1*cf_df['UGPA'] + b2*cf_df['LSAT']
cf_df['Y'] = np.where(cf_df['Score'] >= min_score, 1, 0)
cf_df.head(5)

Unnamed: 0,Gender,Race,LSAT,UGPA,Score,Y
0,Female,White,39.609,2.975,17.6286,0
1,Female,White,36.609,2.875,16.3686,0
2,Male,White,30.0,3.1,13.86,0
3,Male,NonWhite,39.0,2.2,16.92,0
4,Female,White,37.609,3.275,17.0086,0


In [9]:
# store do:=Male results
m_res_df = df[['Gender', 'Race', 'Y']].copy()
m_res_df['cf_Y'] = cf_df[['Y']].copy()
m_res_df.head(5)

Unnamed: 0,Gender,Race,Y,cf_Y
0,Female,White,0,0
1,Female,White,0,0
2,Male,White,0,0
3,Male,NonWhite,0,0
4,Female,White,0,0


In [10]:
print(df[df['Gender'] == 'Female'].shape[0] / df.shape[0] * 100)
print(df[df['Race'] == 'NonWhite'].shape[0] / df.shape[0] * 100)

43.76778338687471
16.08994951812758


In [11]:
print(df[(df['Gender'] == 'Female') & (df['Y']==1)].shape[0] / df[df['Gender'] == 'Female'].shape[0] * 100)
print(df[(df['Gender'] == 'Male') & (df['Y']==1)].shape[0] / df[df['Gender'] == 'Male'].shape[0] * 100)

print(df[(df['Race'] == 'NonWhite') & (df['Y']==1)].shape[0] / df[df['Race'] == 'NonWhite'].shape[0] * 100)
print(df[(df['Race'] == 'White') & (df['Y']==1)].shape[0] / df[df['Race'] == 'White'].shape[0] * 100)

1.887385970430953
2.6524116542887457
0.9412435824301199
2.5814920148763947


In [12]:
# --- attribute-specific params
feat_trgt = 'Y'
feat_trgt_vals = {'positive': 1, 'negative': 0}
# list of relevant features
feat_rlvt = ['LSAT', 'UGPA']
# protected feature
feat_prot = 'Gender'
# values for the protected feature: use 'non_protected' and 'protected' accordingly
feat_prot_vals = {'non_protected': 'Male', 'protected': 'Female'}

# --- st-specific params
# size of neiuborhoods
n = 15
# significance level
alpha = 0.05
# tau diviation
tau = 0.0

#### Standard ST (stST)

Notice that, by construction, by not specifying 'Race' (or 'Gender') as a relevant feature, ST will ignore it.

In [13]:
test_df = df.copy()

st = SituationTesting()
st.setup_baseline(test_df, nominal_atts=['Gender'], continuous_atts=['LSAT', 'UGPA'])

m_res_df['stST'] = st.run(target_att=feat_trgt, target_val=feat_trgt_vals,  
                          sensitive_att=feat_prot, sensitive_val=feat_prot_vals,
                          k=n, alpha=alpha, tau=tau)

In [14]:
m_res_df[(m_res_df['stST'] > tau)].shape[0]

77

In [15]:
# positive discrimination?
m_res_df[(m_res_df['stST'] < tau)].shape[0]

44

#### Counterfactual Situation Testing (cfST)

In [16]:
test_df    = df.copy()
test_cf_df = cf_df.copy()

# don't include the centers
cf_st = SituationTesting()
cf_st.setup_baseline(test_df, test_cf_df, nominal_atts=['Gender'], continuous_atts=['LSAT', 'UGPA'])

m_res_df['cfST'] = cf_st.run(target_att=feat_trgt, target_val=feat_trgt_vals, 
                             sensitive_att=feat_prot, sensitive_val=feat_prot_vals,
                             include_centers=False,
                             k=n, alpha=alpha, tau=tau)

In [17]:
m_res_df[(m_res_df['cfST'] > tau)].shape[0]

78

In [18]:
# positive discrimination?
m_res_df[(m_res_df['cfST'] < tau)].shape[0]

57

#### Counterfactual Fairness

In [19]:
m_res_df['CF'] = cf_st.res_counterfactual_unfairness

In [20]:
m_res_df[m_res_df['CF'] == 1].shape[0]

56

In [21]:
# positive discrimination?
m_res_df[m_res_df['CF'] == 2].shape[0] 

1

#### cfST with centers

In [22]:
test_df    = df.copy()
test_cf_df = cf_df.copy()

# don't include the centers
wcf_st = SituationTesting()
wcf_st.setup_baseline(test_df, test_cf_df, nominal_atts=['Gender'], continuous_atts=['LSAT', 'UGPA'])

m_res_df['cfST_w'] = wcf_st.run(target_att=feat_trgt, target_val=feat_trgt_vals, 
                                sensitive_att=feat_prot, sensitive_val=feat_prot_vals,
                                include_centers=True,
                                k=n, alpha=alpha, tau=tau)

In [23]:
m_res_df[(m_res_df['cfST_w'] > tau)].shape[0]

99

In [24]:
# positive discrimination?
m_res_df[(m_res_df['cfST_w'] < tau)].shape[0]

42

### Add CIs for CF

In [25]:
df_wcf_sf_disc = wcf_st.get_test_discrimination()
df_wcf_sf_disc.head(5)

Unnamed: 0,individual,p1,p2,org_diff,d_alpha,diff,CIs,cfST
0,0,1.0,1.0,0.0,0.0,0.0,"[0.0, 0.0]",No
1,1,1.0,1.0,0.0,0.0,0.0,"[0.0, 0.0]",No
2,4,1.0,1.0,0.0,0.0,0.0,"[0.0, 0.0]",No
3,5,1.0,1.0,0.0,0.0,0.0,"[0.0, 0.0]",No
4,8,1.0,1.0,0.0,0.0,0.0,"[0.0, 0.0]",No


In [26]:
# add this to results to showcase the method
df_wcf_sf_disc[df_wcf_sf_disc['individual'].isin(m_res_df[m_res_df['CF'] == True].index.to_list())].head(5)

Unnamed: 0,individual,p1,p2,org_diff,d_alpha,diff,CIs,cfST
906,2026,1.0,0.8125,0.188,0.191253,0.0,"[-0.004, 0.379]",Yes
1118,2554,0.9375,0.9375,0.0,0.16774,0.0,"[-0.168, 0.168]",No
1119,2559,1.0,0.8125,0.188,0.191253,0.0,"[-0.004, 0.379]",Yes
1214,2793,1.0,0.9375,0.062,0.11861,0.0,"[-0.056, 0.181]",Yes
1266,2931,1.0,0.8125,0.188,0.191253,0.0,"[-0.004, 0.379]",Yes


### $|\mathbf{A}=1|$: Race

We focus on the Level 3 SCFs.

In [27]:
do = 'White'
org_cf_df = pd.read_csv(data_path + '\\counterfactuals\\' + f'cf_LawSchool_lev3_do{do}.csv', sep='|').reset_index(drop=True)
print(org_cf_df.shape)
print(org_cf_df.columns.to_list())
org_cf_df.head(5)

(21790, 8)
['female', 'nonwhite', 'Sex', 'Race', 'resid_LSAT', 'resid_UGPA', 'scf_LSAT', 'scf_UGPA']


Unnamed: 0,female,nonwhite,Sex,Race,resid_LSAT,resid_UGPA,scf_LSAT,scf_UGPA
0,1,0,Female,White,1.823899,-0.232222,39.0,3.1
1,1,0,Female,White,-1.176101,-0.332222,36.0,3.0
2,0,0,Male,White,-7.784813,-0.107057,30.0,3.1
3,0,0,Male,NonWhite,5.853746,-0.788069,43.639,2.419
4,1,0,Female,White,-0.176101,0.067778,37.0,3.4


In [28]:
cf_df = org_cf_df[['Sex', 'Race', 'scf_LSAT', 'scf_UGPA']].copy()
cf_df = cf_df.rename(columns={'Sex': 'Gender', 'scf_LSAT': 'LSAT', 'scf_UGPA': 'UGPA'})

# add the decision maker
cf_df['Score'] = b1*cf_df['UGPA'] + b2*cf_df['LSAT']
cf_df['Y'] = np.where(cf_df['Score'] >= min_score, 1, 0)
cf_df.head(5)

Unnamed: 0,Gender,Race,LSAT,UGPA,Score,Y
0,Female,White,39.0,3.1,17.46,0
1,Female,White,36.0,3.0,16.2,0
2,Male,White,30.0,3.1,13.86,0
3,Male,NonWhite,43.639,2.419,18.907,0
4,Female,White,37.0,3.4,16.84,0


In [None]:
# df.groupby(['Y', 'Race']).count()
# cf_df.groupby(['Y', 'Race']).count()

In [29]:
# store do:=White results
w_res_df = df[['Gender', 'Race', 'Y']].copy()
w_res_df['cf_Y'] = cf_df[['Y']].copy()
w_res_df.head(5)

Unnamed: 0,Gender,Race,Y,cf_Y
0,Female,White,0,0
1,Female,White,0,0
2,Male,White,0,0
3,Male,NonWhite,0,0
4,Female,White,0,0


In [30]:
# attribute-specific params
feat_trgt = 'Y'
feat_trgt_vals = {'positive': 1, 'negative': 0}
# list of relevant features
feat_rlvt = ['LSAT', 'UGPA']
# protected feature
feat_prot = 'Race'
# values for the protected feature: use 'non_protected' and 'protected' accordingly
feat_prot_vals = {'non_protected': 'White', 'protected': 'NonWhite'}

# st-specific params
# size of neiuborhoods
n = 15
# significance level
alpha = 0.05
# tau diviation
tau = 0.0

#### Standard ST (stST)

Notice that, by construction, by not specifying 'Race' (or 'Gender') as a relevant feature, ST will ignore it.

In [31]:
test_df = df.copy()

st = SituationTesting()
st.setup_baseline(test_df, nominal_atts=['Race'], continuous_atts=['LSAT', 'UGPA'])

w_res_df['stST'] = st.run(target_att=feat_trgt, target_val=feat_trgt_vals,  
                          sensitive_att=feat_prot, sensitive_val=feat_prot_vals,
                          k=n, alpha=alpha, tau=tau)

In [32]:
w_res_df[(w_res_df['stST'] > tau)].shape[0]

34

In [33]:
# positive discrimination?
w_res_df[(w_res_df['stST'] < tau)].shape[0]

38

#### Counterfactual Situation Testing (cfST)

In [34]:
test_df    = df.copy()
test_cf_df = cf_df.copy()

# don't include the centers
cf_st = SituationTesting()
cf_st.setup_baseline(test_df, test_cf_df, nominal_atts=['Race'], continuous_atts=['LSAT', 'UGPA'])

w_res_df['cfST'] = cf_st.run(target_att=feat_trgt, target_val=feat_trgt_vals, 
                             sensitive_att=feat_prot, sensitive_val=feat_prot_vals,
                             include_centers=False,
                             k=n, alpha=alpha, tau=tau)

In [35]:
w_res_df[(w_res_df['cfST'] > tau)].shape[0]

243

In [36]:
# positive discrimination?
w_res_df[(w_res_df['cfST'] < tau)].shape[0]

0

#### Counterfactual Fairness

In [37]:
w_res_df['CF'] = cf_st.res_counterfactual_unfairness

In [38]:
w_res_df[w_res_df['CF'] == 1].shape[0]

231

In [39]:
w_res_df[w_res_df['CF'] == 2].shape[0]

0

#### cfST with centers

In [40]:
test_df    = df.copy()
test_cf_df = cf_df.copy()

# include the centers
wcf_st = SituationTesting()
wcf_st.setup_baseline(test_df, test_cf_df, nominal_atts=['Race'], continuous_atts=['LSAT', 'UGPA'])

w_res_df['cfST_w'] = wcf_st.run(target_att=feat_trgt, target_val=feat_trgt_vals, 
                                sensitive_att=feat_prot, sensitive_val=feat_prot_vals,
                                include_centers=True,
                                k=n, alpha=alpha, tau=tau)

In [41]:
w_res_df[(w_res_df['cfST_w'] > tau)].shape[0]

284

In [42]:
# positive discrimination?
w_res_df[(w_res_df['cfST_w'] < tau)].shape[0]

0

### Multiple discrimination: $|\mathbf{A}|=2$

In [43]:
df[(df['Gender'] == 'Female') & (df['Race'] == 'NonWhite')].shape[0]

1833

In [44]:
# for stST
pd.merge(left=m_res_df[m_res_df['stST'] > tau], right=w_res_df[w_res_df['stST'] > tau], 
         how='inner', 
         left_index=True, right_index=True).shape[0]

6

In [45]:
# for stST +
pd.merge(left=m_res_df[m_res_df['stST'] < tau], right=w_res_df[w_res_df['stST'] < tau], 
         how='inner', 
         left_index=True, right_index=True).shape[0]

1

In [46]:
# for cfST
pd.merge(left=m_res_df[m_res_df['cfST'] > tau], right=w_res_df[w_res_df['cfST'] > tau], 
         how='inner', 
         left_index=True, right_index=True).shape[0]

8

In [47]:
# for cfST +
pd.merge(left=m_res_df[m_res_df['cfST'] < tau], right=w_res_df[w_res_df['cfST'] < tau], 
         how='inner', 
         left_index=True, right_index=True).shape[0]

0

In [48]:
# for stST_w
pd.merge(left=m_res_df[m_res_df['cfST_w'] > tau], right=w_res_df[w_res_df['cfST_w'] > tau], 
         how='inner', 
         left_index=True, right_index=True).shape[0]

9

In [49]:
# for stST_w +
pd.merge(left=m_res_df[m_res_df['cfST_w'] < tau], right=w_res_df[w_res_df['cfST_w'] < tau], 
         how='inner', 
         left_index=True, right_index=True).shape[0]

0

In [50]:
# for Counterfactual Fairness
pd.merge(left=m_res_df[m_res_df['CF'] == 1], right=w_res_df[w_res_df['CF'] == 1], 
         how='inner', 
         left_index=True, right_index=True).shape[0]

5

In [51]:
# for Counterfactual Fairness w +
pd.merge(left=m_res_df[m_res_df['CF'] == 2], right=w_res_df[w_res_df['CF'] == 2], 
         how='inner', 
         left_index=True, right_index=True).shape[0]

0

### Intersectional Discrimination: $A_1 \cap A_2$

In [52]:
do = 'MaleWhite'
org_cf_df = pd.read_csv(data_path + '\\counterfactuals\\' + f'cf_LawSchool_lev3_do{do}.csv', sep='|').reset_index(drop=True)
print(org_cf_df.shape)
print(org_cf_df.columns.to_list())
org_cf_df.head(5)

(21790, 6)
['female_nonwhite', 'GenderRace', 'resid_LSAT', 'resid_UGPA', 'scf_LSAT', 'scf_UGPA']


Unnamed: 0,female_nonwhite,GenderRace,resid_LSAT,resid_UGPA,scf_LSAT,scf_UGPA
0,0,Female-White,1.810693,-0.138949,39.0,3.1
1,0,Female-White,-1.189307,-0.238949,36.0,3.0
2,0,Male-White,-7.189307,-0.138949,30.0,3.1
3,0,Male-NonWhite,1.810693,-1.038949,39.0,2.2
4,0,Female-White,-0.189307,0.161051,37.0,3.4


In [53]:
cf_df = org_cf_df[['GenderRace', 'scf_LSAT', 'scf_UGPA']].copy()
cf_df = cf_df.rename(columns={'scf_LSAT': 'LSAT', 'scf_UGPA': 'UGPA'})

# add the decision maker
cf_df['Score'] = b1*cf_df['UGPA'] + b2*cf_df['LSAT']
cf_df['Y'] = np.where(cf_df['Score'] >= min_score, 1, 0)
cf_df.head(5)

Unnamed: 0,GenderRace,LSAT,UGPA,Score,Y
0,Female-White,39.0,3.1,17.46,0
1,Female-White,36.0,3.0,16.2,0
2,Male-White,30.0,3.1,13.86,0
3,Male-NonWhite,39.0,2.2,16.92,0
4,Female-White,37.0,3.4,16.84,0


In [54]:
# add the intersectional var to df
df.head()
df['GenderRace'] =  df['Gender'] + '-' + df['Race']
df.head(5)

Unnamed: 0,Gender,Race,LSAT,UGPA,Score,Y,GenderRace
0,Female,White,39.0,3.1,17.46,0,Female-White
1,Female,White,36.0,3.0,16.2,0,Female-White
2,Male,White,30.0,3.1,13.86,0,Male-White
3,Male,NonWhite,39.0,2.2,16.92,0,Male-NonWhite
4,Female,White,37.0,3.4,16.84,0,Female-White


In [55]:
# df.groupby(['Y', 'GenderRace']).count()
# cf_df.groupby(['Y', 'GenderRace']).count()

In [56]:
# store do:=White results
int_res_df = df[['Gender', 'Race', 'Y']].copy()
int_res_df['cf_Y'] = cf_df[['Y']].copy()
int_res_df.head(5)

Unnamed: 0,Gender,Race,Y,cf_Y
0,Female,White,0,0
1,Female,White,0,0
2,Male,White,0,0
3,Male,NonWhite,0,0
4,Female,White,0,0


In [57]:
# attribute-specific params
feat_trgt = 'Y'
feat_trgt_vals = {'positive': 1, 'negative': 0}
# list of relevant features
feat_rlvt = ['LSAT', 'UGPA']
# protected feature
feat_prot = 'GenderRace'
# values for the protected feature: use 'non_protected' and 'protected' accordingly
feat_prot_vals = {'non_protected': ['Female-White', 'Male-NonWhite', 'Male-NonWhite', 'Male-White'], 
                  'protected': 'Female-NonWhite'
                 }

# st-specific params
# size of neiuborhoods
n = 15
# significance level
alpha = 0.05
# tau diviation
tau = 0.0

#### Standard ST (stST)

In [58]:
test_df = df.copy()

st = SituationTesting()
st.setup_baseline(test_df, nominal_atts=['GenderRace'], continuous_atts=['LSAT', 'UGPA'])

int_res_df['stST'] = st.run(target_att=feat_trgt, target_val=feat_trgt_vals,
                            sensitive_att=feat_prot, sensitive_val=feat_prot_vals,
                            k=n, alpha=alpha, tau=tau)

In [59]:
int_res_df[(int_res_df['stST'] > tau)].shape[0]

14

In [60]:
# positive discrimination?
int_res_df[(int_res_df['stST'] < tau)].shape[0]

25

#### Counterfactual Situation Testing (cfST)

In [61]:
test_df    = df.copy()
test_cf_df = cf_df.copy()

# don't include the centers
cf_st = SituationTesting()
cf_st.setup_baseline(test_df, test_cf_df, nominal_atts=['Race'], continuous_atts=['LSAT', 'UGPA'])

int_res_df['cfST'] = cf_st.run(target_att=feat_trgt, target_val=feat_trgt_vals, 
                               sensitive_att=feat_prot, sensitive_val=feat_prot_vals,
                               include_centers=False,
                               k=n, alpha=alpha, tau=tau)

In [62]:
int_res_df[(int_res_df['cfST'] > tau)].shape[0]

130

In [63]:
# positive discrimination?
int_res_df[(int_res_df['cfST'] < tau)].shape[0]

0

#### Counterfactual Fairness

In [64]:
int_res_df['CF'] = cf_st.res_counterfactual_unfairness

In [65]:
int_res_df[int_res_df['CF'] == 1].shape[0]

113

In [66]:
int_res_df[int_res_df['CF'] == 2].shape[0]

0

#### cfST with centers

In [67]:
test_df    = df.copy()
test_cf_df = cf_df.copy()

# include the centers
wcf_st = SituationTesting()
wcf_st.setup_baseline(test_df, test_cf_df, nominal_atts=['Race'], continuous_atts=['LSAT', 'UGPA'])

int_res_df['cfST_w'] = wcf_st.run(target_att=feat_trgt, target_val=feat_trgt_vals, 
                                  sensitive_att=feat_prot, sensitive_val=feat_prot_vals,
                                  include_centers=True,
                                  k=n, alpha=alpha, tau=tau)

In [68]:
int_res_df[(int_res_df['cfST_w'] > tau)].shape[0]

130

In [69]:
# positive discrimination?
int_res_df[(int_res_df['cfST_w'] < tau)].shape[0]

0