In [None]:
import numpy as np
import pandas as pd
import ast

########################SET folder to path of code directory in the project folder#####################
## SET PATH to code directory in the project folder
code_path = ""
import sys  
sys.path.insert(1, code_path)

########################SET folder to project directory path#####################
folder= ""

from utils.data_loading import get_feedback_df, get_training
from utils.utils import attributes_names_mapping
from utils.training import oneoff_training_evaluation, iml_training_evaluation

import warnings
from pandas.errors import SettingWithCopyWarning
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

sensitive_attrs = list(attributes_names_mapping.keys())
fs = 0.5

# Data Loading

In [None]:
## LOAD TRAINING DATA
training_dict = get_training(folder+"data\\collected_data","_training.csv") 

In [None]:
## load training, validation and test sets
train_df_train = pd.read_csv(folder+'data\\processed_data\\train.csv', delimiter=',')
train_df_train = train_df_train.loc[:, ~train_df_train.columns.isin(['Unnamed: 0'])]
train_df_test = pd.read_csv(folder+'data\\processed_data\\validation.csv', delimiter=',')
train_df_test = train_df_test.loc[:, ~train_df_test.columns.isin(['Unnamed: 0'])]
test_df = pd.read_csv(folder+'data\\processed_data\\test.csv', delimiter=',') ## test data used to predict labels before shown to participants
test_df = test_df.loc[:, ~test_df.columns.isin(['TARGET','Unnamed: 0'])]

X_train_original = train_df_train.loc[:, ~train_df_train.columns.isin(['TARGET', 'SK_ID_CURR'])]
y_train_original = train_df_train.loc[:, train_df_train.columns == 'TARGET']
X_test_original = train_df_test.loc[:, ~train_df_test.columns.isin(['TARGET', 'SK_ID_CURR'])] ## dataset used for accuracy
y_test_original = train_df_test.loc[:, train_df_test.columns == 'TARGET']

## attributes used in training in correct order
training_attr_in_order = X_train_original.columns.tolist()
training_attr_descriptive_in_order = [attributes_names_mapping[attr[:-3]] if attr.endswith('_LE') else attributes_names_mapping[attr] for attr in training_attr_in_order]

## load original test set to be used for fairness metrics
train_df_test_bin = pd.read_csv(folder+'data\\processed_data\\validation_bin.csv', delimiter=',')
train_df_test_bin = train_df_test_bin.loc[:, ~train_df_test_bin.columns.isin(['Unnamed: 0'])]

## LOAD FEEDBACK
prolific_export_filePath = folder+"data\\collected_data\\prolific_export_demographics.xlsx"
interaction_logs_filePath = folder+"data\\collected_data\\LOGS\\interactions\\"
feedback_df = get_feedback_df(prolific_export_filePath, interaction_logs_filePath)

## discard feedback before revert button click
rev_df = feedback_df[feedback_df["Function"]=="REVERTBUTTON_CLICKED"]
ok_rev_df = feedback_df[feedback_df["Function"].isin(["REVERTBUTTON_CLICKED","OKBUTTON_CLICKED_DECIDE_MODAL_Applications_List"]) ]
rev_idx = rev_df.index.tolist()
ok_rev_idx = ok_rev_df.index.tolist()
ok_rev_idx_idx = [ok_rev_idx.index(i)-1 for i in rev_idx] 
ok_rev_del_idx = [ok_rev_idx[i] for i in ok_rev_idx_idx]
feedback_df = feedback_df[feedback_df['Function']=='OKBUTTON_CLICKED_DECIDE_MODAL_Applications_List']
print(len(feedback_df))
feedback_df = feedback_df[~feedback_df.index.isin(ok_rev_del_idx)]
print(len(feedback_df))
##
feedback_df.drop(columns = "Lower bound",inplace = True)
feedback_df.drop(columns = "Upper bound",inplace = True)

## GET NEW WEIGHTS AND NORMALIZE
all_new_values = []
pred_decision = []
for i in feedback_df.index :
    app_id = feedback_df['App ID'].loc[i]
    p_id = feedback_df['ID'].loc[i]
    value = ast.literal_eval(feedback_df['Value'].loc[i])
    ch_values = {pair['attribute']:float(pair['value']) for pair in value['changed_weights']}
    init_values = {pair['attribute']:float(pair['value']) for pair in ast.literal_eval(value['initial_weights'])}
    # ##
    # tr_idx = 0
    # fd_p_id_app_id = feedback_df[feedback_df['ID']==p_id]
    # fd_p_id_app_id = fd_p_id_app_id[fd_p_id_app_id['App ID']==app_id]
    # if len(fd_p_id_app_id):
    #     tr_idx = fd_p_id_app_id.index.tolist().index(i)
    ## PREDICTED DECISION
    if p_id in training_dict and str(app_id) in training_dict[p_id]:
        # pred_decision.append('Accepted' if training_dict[p_id][str(app_id)][tr_idx]['predicted_decision'][str(app_id)] else 'Rejected')
        pred_decision.append('Accepted' if training_dict[p_id][str(app_id)]['predicted_decision'][str(app_id)] else 'Rejected')
    else:
        pred_decision.append(None)
    ##
    new_value = []
    if len(ch_values)==0:
        all_new_values.append(new_value) 
    else:
        for attr in training_attr_descriptive_in_order:
            if attr in ch_values:
                new_value.append(ch_values[attr])
            else:
                new_value.append(init_values[attr])
        ## NORMALIZE    
        normalizing_factor = sum(new_value)
        new_value = [v/normalizing_factor for v in new_value]
        # if sum(new_value) >1.0:
        #     print(sum(new_value), max(new_value),min(new_value))
        all_new_values.append(new_value)    
feedback_df['Value'] = all_new_values
feedback_df['PredictedDecision'] = pred_decision

### Global Model - Labels ('checked'+'unfair')
The XGboost classifier is retrained. The feedback instances of all participants are added into the training set all at once.

**The 'checked'+'unfair'** labelled instances by participants are used. 

We set fs = 0.5.

In [None]:
X_test_original['NAME_FAMILY_STATUS_LE'].unique()

In [None]:
# feedback_instance_i =X_test_original.loc[0].to_frame().T
# feedback_instance_i['CODE_GENDER_LE'].tolist()[0]

In [None]:
oneoff_training_evaluation(X_train_original, y_train_original,
                            X_test_original, y_test_original,
                           train_df_test_bin, test_df,
                            sensitive_attrs, fs, feedback_df,
                           False, False,
                           folder+'data\\results\\global\\Labels\\','global_Labels')

### Global Model - Labels ('unfair' only)
The XGboost classifier is retrained. The feedback instances of all participants are added into the training set all at once.

**Only the 'unfair'** labelled instances by participants are used. 

We set fs = 0.5.

In [None]:
oneoff_training_evaluation(X_train_original, y_train_original,
                            X_test_original, y_test_original,
                           train_df_test_bin, test_df,
                            sensitive_attrs, fs, feedback_df,
                           True, False,
                           folder+'data\\results\\global\\Labels_Unfair\\','global_Labels_Unfair')

### Global Model - Labels+Weights ('checked'+'unfair')
The XGboost classifier is retrained. The feedback instances of all participants are added into the training set all at once.

**All the 'checked' and'unfair'** labelled instances by participants are used. 

We set fs = 0.5.

In [None]:
oneoff_training_evaluation(X_train_original, y_train_original,
                            X_test_original, y_test_original,
                           train_df_test_bin, test_df,
                            sensitive_attrs, fs, feedback_df,
                           False, True, folder+'data\\results\\global\\Labels+Weights\\','global_Labels+Weights')

### Personalized - Labels ('ckecked'+'unfair')
58 XGboost classifiers will be trained, one for each participant independently. The feedback instances of each participant will be added to the training set incrementally, one at a time, in increasing timestamps (the previous feedback instances of a participant remain in the training set as we add a new one). The XGboost classifier is retrained from scratch in every incremental step (iteration). 

**All the 'checked' and 'unfair'** labelled instances by participants are used. 

We set fs = 0.5.

In [None]:
iml_training_evaluation(X_train_original, y_train_original,
                        X_test_original, y_test_original,
                        train_df_test_bin, test_df,
                        sensitive_attrs, fs, feedback_df,
                        False, False, 
                        folder+'data\\results\\personalized\\Labels\\','personalized-labels')

### Personalized - Labels ('unfair')
58 XGboost classifiers will be trained, one for each participant independently. The feedback instances of each participant will be added to the training set incrementally, one at a time, in increasing timestamps (the previous feedback instances of a participant remain in the training set as we add a new one). The XGboost classifier is retrained from scratch in every incremental step (iteration). 

**Only the 'unfair'** labelled instances by participants are used.

We set fs = 0.5.

In [None]:
iml_training_evaluation(X_train_original, y_train_original,
                        X_test_original, y_test_original,
                        train_df_test_bin, test_df,
                        sensitive_attrs, fs, feedback_df,
                        True, False, 
                        folder+'data\\results\\personalized\\Labels_Unfair\\', 'personalized-labels_unfair')

### Personalized - Labels-Weights ('checked'+'unfair')
58 XGboost classifiers will be trained, one for each participant independently. The feedback instances of each participant will be added to the training set incrementally, one at a time, in increasing timestamps (the previous feedback instances of a participant remain in the training set as we add a new one). The XGboost classifier is retrained from scratch in every incremental step (iteration). 

**All the 'checked' and 'unfair'** labelled instances **with proposed weight changes** are used. Participants weights of each instance are used to set feature weights of the training.

We set fs = 0.5.

In [None]:
iml_training_evaluation(X_train_original, y_train_original,
                        X_test_original, y_test_original,
                        train_df_test_bin, test_df,
                        sensitive_attrs, fs, feedback_df,
                        False, True, 
                        folder+'data\\results\\personalized\\Labels+Weights\\','personalized-Labels+Weights')