# Calculating Influential Fairness Scores

In this notebook, we exemplify how we create influence scores for the COMPAS dataset. We will generate our original IF scores and the Black-box scores (inspired by Wang et al., 2017).

In [1]:
# Import packages
import sys
sys.path.insert(0, '../..')
sys.path.insert(0, '../../IF/AIX360')

import pandas as pd
import numpy as np
from IF import *
import torch
from torch import nn

import pickle
from sklearn.model_selection import train_test_split


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Start with defining dataset-specific attributes
data_name = 'compas'
outcome_column_name = 'decile_score'
group_column_name = 'Race'
default_minority_group = 'Non-White'
default_majority_group = 'White'

In [3]:
# Define other global attributes
random_seed = 5968
random_state = np.random.RandomState(random_seed)

# Attribute to define if data should be (re)split
splitData = True

# We split our dataset into three subsets: 
#    - train is used to train model
#    - eval is used to evaluate model
#    - public is a held-out subset
train_size = 0.30 
public_size = 0.40
eval_size = 0.30

# Attribute to define if model should be (re)created
createModel = True

In [4]:
# Split or read in (previously) split data
data_file_name = '../../data/modified/%s_proxy_data.csv' % (data_name)

# load dataset
if (splitData):
    raw_df = pd.read_csv(data_file_name)
    
    # Drop the first column
    raw_df = raw_df.iloc[:,1:]
    
    # Make minority attribute a binary number
    raw_df[group_column_name] = [default_minority_group if x == 0 else default_majority_group 
                                    for x in raw_df[group_column_name]]

    # returns order train, audit, test
    df_train, df_eval, df_public = BB_functions.train_test_audit_split(df = raw_df,
                                                        train_size = train_size,
                                                        test_size = public_size,
                                                        audit_size = eval_size,
                                                        group_column_name = group_column_name,
                                                        outcome_column_name = outcome_column_name,
                                                        random_state = random_state)
    
    # Print new files
    df_train.to_csv("out/%s_train.csv"%(data_name))
    df_eval.to_csv("out/%s_eval.csv"%(data_name))
    df_public.to_csv("out/%s_public.csv"%(data_name))
            
else:
    df_train = pd.read_csv("out/%s_train.csv"%(data_name), index_col = 0)
    df_eval = pd.read_csv("out/%s_eval.csv"%(data_name), index_col = 0)
    df_public = pd.read_csv("out/%s_public.csv"%(data_name), index_col = 0)

In [5]:
# Prep your train and eval set for subsequent pytorch analysis
X_train, Y_train, S_train = BB_functions.df_to_XYS(df_train,
                                    outcome_column_name = outcome_column_name,
                                    group_column_name = group_column_name,
                                    minority_group = default_minority_group)

X_eval, Y_eval, S_eval = BB_functions.df_to_XYS(df_eval,
                                outcome_column_name = outcome_column_name,
                                group_column_name = group_column_name,
                                minority_group = default_minority_group)
        
# creating train and valid data objects
train_ds = pytorch_functions.CurrentDataset(X_train, Y_train)
eval_ds = pytorch_functions.CurrentDataset(X_eval, Y_eval)

trainloader = torch.utils.data.DataLoader(train_ds, batch_size=100,
                                            shuffle=True, num_workers=0)
evalloader = torch.utils.data.DataLoader(eval_ds, batch_size=100,
                                            shuffle=False, num_workers=0)


# Won't need these again, so we can save space and delete
del train_ds, eval_ds

# Make tensor of data
y_train_tensor = torch.tensor(Y_train.astype(np.float64),requires_grad=True).long()
x_train_tensor = torch.tensor(X_train.astype(np.float32),requires_grad=True)
s_train_tensor = torch.tensor(S_train.astype(np.float32),requires_grad=True)
y_eval_tensor = torch.tensor(Y_eval.astype(np.float64),requires_grad=True).long()
x_eval_tensor = torch.tensor(X_eval.astype(np.float32),requires_grad=True)
s_eval_tensor = torch.tensor(S_eval.astype(np.float32),requires_grad=True)

In [6]:
# Train and evaluate model
input_dim = trainloader.dataset.__getitem__(0)[0].shape[0]
    
if splitData or createModel:
    model = pytorch_functions.LogisticRegression(input_dim)
    model.cpu()
    pytorch_functions.train(trainloader, model)
    pytorch_functions.save_model(model, 'models/%s_model.pth'%(data_name))

    # Also calculate disparities
    disp_scores = support_functions.calc_disp(model, x_train_tensor, y_train_tensor, s_train_tensor, 
                            [default_minority_group,default_majority_group])
    disp_scores.to_csv("out/%s_train_disparities.csv"%(data_name))
    disp_scores = support_functions.calc_disp(model, x_eval_tensor, y_eval_tensor, s_eval_tensor, 
                            [default_minority_group,default_majority_group])
    disp_scores.to_csv("out/%s_eval_disparities.csv"%(data_name))
else:
    model = load_model(input_dim, '%s_model.pth'%(data_name))

# Get results on test set
#test(evalloader, model)

Now, to calculate original glass-box influence scores

In [7]:
# First, get the gradient of loss of the train set wrt model parameters
#optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-5)
loss_fxn = nn.BCELoss()
grad_loss_train = WB_functions.grad_group(x_train_tensor, y_train_tensor, model, loss_fxn)

# Next, calculate the hessian wrt the gradient of loss for the training data
hess = WB_functions.hessian(grad_loss_train, model)

# Next, calculate the gradient of the fairness metric wrt to model parameters
metrics = ['SP','FNR', 'FPR', 'FDR']
for metric in metrics:
    # Calculate gradient of fairness metric on test set
    grad_fair_test = WB_functions.grad_disp(model, metric, x_eval_tensor, y_eval_tensor, s_eval_tensor)

    # Next, calculate gradient for each train point
    influences = np.array([])
    for i in range(df_train.shape[0]):
        attr = x_train_tensor[i]
        label = y_train_tensor[i]
        grad_loss_ind = WB_functions.grad_individual(attr,label,model,loss_fxn)
        grad_loss_ind = torch.vstack([g.view(-1,1) for g in grad_loss_ind])
        term2 = np.linalg.solve(hess.detach().numpy(), grad_loss_ind.detach().numpy())
        grad_fair_test = torch.vstack([g.view(-1,1) for g in grad_fair_test])
        influence = - torch.matmul(grad_fair_test.T, torch.tensor(term2))
        influences = np.append(influences, influence.detach().numpy().squeeze())

    # Save influences to a file
    out = df_train.copy()
    out['influences'] = influences
    out.sort_values(by='influences', key=abs, inplace=True)
    out.to_csv("out/%s_%s_train_influences.csv"%(data_name, metric))
    
    out['prediction'] = model(x_train_tensor,1).detach().numpy()
    out.to_csv("out/%s_%s_train_influences_withYhat.csv"%(data_name, metric))


Now, we will calculate the black-box scores

In [8]:
# Split the public dataset
X_public, Y_public, S_public = BB_functions.df_to_XYS(df_public,
                                outcome_column_name = outcome_column_name,
                                group_column_name = group_column_name,
                                minority_group = default_minority_group)

# Create tensor for the held-out subset
x_public_tensor = torch.tensor(X_public.astype(np.float32),requires_grad=True)

# Recalculate eval disparities
disp_scores = support_functions.calc_disp(model, x_eval_tensor, y_eval_tensor, s_eval_tensor, 
                        [default_minority_group,default_majority_group])

# Outputs the black box IF scores for the held out subset (public), using performance of the evaluation subset
# Files are outputted based n filename given
BB_functions.calcBlackBoxFn(df_public, df_public, model, disp_scores, default_minority_group, default_majority_group, 'public', 
            data_name, outcome_column_name, group_column_name, x_apply_tensor = x_public_tensor)

#calcBlackBoxFn(df_public, df_train, model, disp_scores, default_minority_group, default_majority_group, 'train', 
#         data_name, outcome_column_name, group_column_name, random_seed, x_apply_tensor = x_train_tensor)

Now, to determine which models best learn and reflect white-box scores

In [None]:
metrics = ['SP','FNR', 'FPR', 'FDR']
knowLabels = ['','withY']
model_types = ['Kneighbors', 'GradientBoost', 'Linear']


for model_type in model_types:
    for knowLabel in knowLabels:
        for metric in metrics:
            # File name
            filename = 'models/%s_%s_%s_%s.sav'%(model_type, data_name, metric, knowLabel)
            
            # Skip if model exists 
            #if (os.path.exists(filename)):
            #    continue
            
            # Read in the file
            df = pd.read_csv("out/%s_%s_train_influences_withYhat.csv"%(data_name, metric),index_col = 0)
            df[group_column_name] = [0 if x == default_minority_group else 1 for x in df[group_column_name]]

            # Split data 60/40 for train/test
            df_train, df_test = train_test_split(df, test_size = 0.4, random_state=random_state)
            
            # Prep y column
            outcome = df[outcome_column_name]
            y = df['influences']
            outcome_train = df_train[outcome_column_name]
            y_train = df_train['influences']
            outcome_test = df_test[outcome_column_name]
            y_test = df_test['influences']

            # Do something different if I know the labels
            if knowLabel == '':
                X = df.drop(['influences', outcome_column_name, 'prediction'], axis = 1)
                X_train = df_train.drop(['influences', outcome_column_name, 'prediction'], axis = 1)
                X_test = df_test.drop(['influences', outcome_column_name, 'prediction'], axis = 1)
            elif knowLabel == 'withY':
                X = df.drop(['prediction','influences'], axis = 1)
                X_train = df_train.drop(['prediction','influences'], axis = 1)
                X_test = df_test.drop(['prediction','influences'], axis = 1)
                X_test_changeY = X_test.copy()
                X_test_changeY[outcome_column_name] = [1 if x > 0.5 else 0 for x in X_test_changeY[outcome_column_name]]

            # Do hyper parameter turning to get best model
            ifs_predictor = support_functions.prepPredictor(model_type, X, y, X_train, y_train, random_state)

            # Apply influence predictor
            pred = ifs_predictor.predict(X_test)
            if knowLabel == 'withY':
                pred_changeY = ifs_predictor.predict(X_test_changeY)

            # Save the model
            pickle.dump(ifs_predictor, open(filename, 'wb'))

            # For every model built, we also want to calculate the scores from BB FIF
            df_public = pd.read_csv("out/%s_public.csv"%(data_name), index_col = 0)
            X_public, Y_public, _ = BB_functions.df_to_XYS(df_public,
                                outcome_column_name = outcome_column_name,
                                group_column_name = group_column_name,
                                minority_group = default_minority_group)
            public_ds = pytorch_functions.CurrentDataset(X_public, Y_public)
            publicloader = torch.utils.data.DataLoader(public_ds, batch_size=100,
                                                        shuffle=False, num_workers=0)
            input_dim = publicloader.dataset.__getitem__(0)[0].shape[0]
            model = pytorch_functions.load_model(input_dim, 'models/%s_model.pth'%(data_name))
            disp_scores = pd.read_csv("out/%s_eval_disparities.csv"%(data_name), sep = ',', index_col=0)
            val = BB_functions.calcBlackBoxMetric(df_public, df_test.drop(['prediction','influences'], axis=1), model, disp_scores,
                                        default_minority_group, default_majority_group, 'train-test', data_name, 
                                        outcome_column_name, group_column_name, metric, x_apply_tensor = None)
            
            df_test['GB_prediction'] = pred
            if knowLabel == 'withY':
                df_test['GB_prediction_changeY'] = pred_changeY
            df_test['BB_prediction'] = val
            df_test.to_csv("out/%s_%s_%s_%s_train-test_influences_withYhat.csv"%(metric, model_type, knowLabel, data_name))

Traceback (most recent call last):
  File "/Users/brikeyrich/anaconda3/envs/IF/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/Users/brikeyrich/anaconda3/envs/IF/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 234, in __call__
    return self._score(
  File "/Users/brikeyrich/anaconda3/envs/IF/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 276, in _score
    y_pred = method_caller(estimator, "predict", X)
  File "/Users/brikeyrich/anaconda3/envs/IF/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 73, in _cached_call
    return getattr(estimator, method)(*args, **kwargs)
  File "/Users/brikeyrich/anaconda3/envs/IF/lib/python3.10/site-packages/sklearn/neighbors/_regression.py", line 236, in predict
    neigh_ind = self.kneighbors(X, return_distance=False)
  File "/Users/brikeyrich/anaconda3/envs/IF/lib/python3.10/site-packages/sklearn/neighbors/_b

TypeError: calcBlackBoxMetric() got multiple values for argument 'x_apply_tensor'