<a href="https://colab.research.google.com/github/jhzhang07/CAS783/blob/master/german_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
import warnings
warnings.filterwarnings('ignore')
import sys
sys.path.append('/content/drive/MyDrive/CAS783/')
PROJECT_PATH = "/content/drive/MyDrive/CAS783/"

In [None]:
!pip3 install --upgrade pip

In [None]:
!pip3 install -r "/content/drive/MyDrive/CAS783/requirements.txt"

In [None]:

from adversarial_models import *
from utils import *
from get_data import *

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import numpy as np
import pandas as pd

import lime
import lime.lime_tabular
import shap
from copy import deepcopy

In [None]:
params = Params("/content/drive/MyDrive/CAS783/model_configurations/experiment_params.json")
X, y, cols = get_and_preprocess_german(params)

features = [c for c in X]

gender_indc = features.index('Gender')
loan_rate_indc = features.index('LoanRateAsPercentOfIncome')

X = X.values

xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size=0.1)
ss = StandardScaler().fit(xtrain)
xtrain = ss.transform(xtrain)
xtest = ss.transform(xtest)

mean_lrpi = np.mean(xtrain[:, loan_rate_indc])

categorical = ['Gender', 'ForeignWorker', 'Single', 'HasTelephone', 'CheckingAccountBalance_geq_0',
               'CheckingAccountBalance_geq_200', 'SavingsAccountBalance_geq_100', 'SavingsAccountBalance_geq_500',
               'MissedPayments', 'NoCurrentLoan', 'CriticalAccountOrLoansElsewhere', 'OtherLoansAtBank',
               'OtherLoansAtStore', 'HasCoapplicant', 'HasGuarantor', 'OwnsHouse', 'RentsHouse', 'Unemployed',
               'YearsAtCurrentJob_lt_1', 'YearsAtCurrentJob_geq_4', 'JobClassIsSkilled']
categorical = [features.index(c) for c in categorical]

class racist_model_f:
    def predict(self, X):
        return np.array([params.negative_outcome if x[gender_indc] < 0 else params.positive_outcome for x in X])

    def predict_proba(self, X):
        return one_hot_encode(self.predict(X))

    def score(self, X, y):
        return np.sum(self.predict(X) == y) / len(X)

class innocuous_model_psi:
    def predict_proba(self, X):
        return one_hot_encode(np.array([params.negative_outcome if x[loan_rate_indc] > mean_lrpi else params.positive_outcome for x in X]))

def experiment_main():
    print('---------------------')
    print("Beginning LIME GERMAN Experiments....")
    print('---------------------')

    adv_lime = Adversarial_Lime_Model(racist_model_f(), innocuous_model_psi()).train(
        xtrain, ytrain, feature_names=features, perturbation_multiplier=30, categorical_features=categorical)
    adv_explainer = lime.lime_tabular.LimeTabularExplainer(
        xtrain, feature_names=adv_lime.get_column_names(), discretize_continuous=False, categorical_features=categorical)

    explanations = []
    for i in range(xtest.shape[0]):
        explanations.append(adv_explainer.explain_instance(xtest[i], adv_lime.predict_proba).as_list())

    print("LIME Ranks and Pct Occurances (1 corresponds to most important feature) for one unrelated feature:")
    print(experiment_summary(explanations, features))
    print("Fidelity:", round(adv_lime.fidelity(xtest), 2))

    print('---------------------')
    print('Beginning SHAP GERMAN Experiments....')
    print('---------------------')

    background_distribution = KMeans(n_clusters=10, random_state=0).fit(xtrain).cluster_centers_
    adv_shap = Adversarial_Kernel_SHAP_Model(racist_model_f(), innocuous_model_psi()).train(
        xtrain, ytrain, feature_names=features, background_distribution=background_distribution,
        rf_estimators=100, n_samples=5e4)
    adv_kernel_explainer = shap.KernelExplainer(adv_shap.predict, background_distribution)
    explanations = adv_kernel_explainer.shap_values(xtest)

    formatted_explanations = []
    for exp in explanations:
        formatted_explanations.append([(features[i], exp[i]) for i in range(len(exp))])

    print("SHAP Ranks and Pct Occurances one unrelated features:")
    print(experiment_summary(formatted_explanations, features))
    print("Fidelity:", round(adv_shap.fidelity(xtest), 2))
    print('---------------------')

if __name__ == "__main__":
    experiment_main()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

lime_results = {
    1: {'LoanRateAsPercentOfIncome': 1.0},
    2: {
        'CheckingAccountBalance_geq_0': 0.02,
        'CheckingAccountBalance_geq_200': 0.06,
        'ForeignWorker': 0.23,
        'Gender': 0.02,
        'HasCoapplicant': 0.17,
        'HasGuarantor': 0.11,
        'HasTelephone': 0.01,
        'JobClassIsSkilled': 0.01,
        'MissedPayments': 0.05,
        'NoCurrentLoan': 0.11,
        'OtherLoansAtBank': 0.02,
        'RentsHouse': 0.01,
        'SavingsAccountBalance_geq_100': 0.02,
        'SavingsAccountBalance_geq_500': 0.05,
        'Single': 0.01,
        'Unemployed': 0.09,
        'YearsAtCurrentJob_lt_1': 0.01
    },
    3: {
        'CheckingAccountBalance_geq_0': 0.01,
        'CheckingAccountBalance_geq_200': 0.07,
        'CriticalAccountOrLoansElsewhere': 0.04,
        'ForeignWorker': 0.15,
        'HasCoapplicant': 0.11,
        'HasGuarantor': 0.15,
        'HasTelephone': 0.03,
        'MissedPayments': 0.06,
        'NoCurrentLoan': 0.09,
        'OtherLoansAtBank': 0.06,
        'OwnsHouse': 0.03,
        'RentsHouse': 0.01,
        'SavingsAccountBalance_geq_100': 0.02,
        'SavingsAccountBalance_geq_500': 0.09,
        'Unemployed': 0.04,
        'YearsAtCurrentJob_geq_4': 0.02,
        'YearsAtCurrentJob_lt_1': 0.02
    }
}

shap_results = {
    1: {
        'CheckingAccountBalance_geq_0': 0.01,
        'CheckingAccountBalance_geq_200': 0.02,
        'LoanRateAsPercentOfIncome': 0.9,
        'NumberOfLiableIndividuals': 0.06,
        'YearsAtCurrentJob_lt_1': 0.01
    },
    2: {
        'CheckingAccountBalance_geq_0': 0.01,
        'CheckingAccountBalance_geq_200': 0.12,
        'Gender': 0.46,
        'HasCoapplicant': 0.04,
        'HasGuarantor': 0.01,
        'NumberOfLiableIndividuals': 0.13,
        'OtherLoansAtBank': 0.11,
        'SavingsAccountBalance_geq_100': 0.01,
        'Unemployed': 0.1,
        'YearsAtCurrentJob_lt_1': 0.01
    },
    3: {
        'CheckingAccountBalance_geq_0': 0.05,
        'CheckingAccountBalance_geq_200': 0.13,
        'Gender': 0.01,
        'HasCoapplicant': 0.05,
        'HasGuarantor': 0.07,
        'HasTelephone': 0.01,
        'JobClassIsSkilled': 0.04,
        'LoanAmount': 0.02,
        'LoanDuration': 0.03,
        'MissedPayments': 0.03,
        'NumberOfLiableIndividuals': 0.18,
        'NumberOfOtherLoansAtBank': 0.02,
        'OtherLoansAtBank': 0.12,
        'OwnsHouse': 0.04,
        'RentsHouse': 0.02,
        'SavingsAccountBalance_geq_100': 0.02,
        'SavingsAccountBalance_geq_500': 0.01,
        'Single': 0.06,
        'Unemployed': 0.06,
        'YearsAtCurrentHome': 0.01,
        'YearsAtCurrentJob_geq_4': 0.01,
        'YearsAtCurrentJob_lt_1': 0.03
    }
}

protected_features = ['Gender']
unrelated_features = ['LoanRateAsPercentOfIncome']

def split_contributions(results_dict):
    protected, unrelated, others = [], [], []
    for rank in [1,2,3]:
        prot_sum = 0
        unrel_sum = 0
        total_sum = 0
        for feat, val in results_dict.get(rank, {}).items():
            total_sum += val
            if feat in protected_features:
                prot_sum += val
            elif feat in unrelated_features:
                unrel_sum += val
        other_sum = total_sum - prot_sum - unrel_sum
        protected.append(prot_sum)
        unrelated.append(unrel_sum)
        others.append(other_sum)
    return protected, unrelated, others

lime_prot, lime_unrel, lime_oth = split_contributions(lime_results)
shap_prot, shap_unrel, shap_oth = split_contributions(shap_results)

ranks = ['1st', '2nd', '3rd']
x = np.arange(len(ranks))
width = 0.25

fig, axs = plt.subplots(1, 2, figsize=(10, 5), sharey=True)

colors = {'protected': '#d9534f', 'unrelated': '#0275d8', 'others': '#999999'}

axs[0].barh(x, lime_prot, height=width, color=colors['protected'], label='Gender')
axs[0].barh(x, lime_unrel, height=width, left=lime_prot, color=colors['unrelated'], label='Loan Rate % Income')
axs[0].barh(x, lime_oth, height=width, left=np.array(lime_prot)+np.array(lime_unrel), color=colors['others'], label='All Others')
axs[0].invert_yaxis()
axs[0].set_title('With LIME Attack', fontsize=14)
axs[0].set_xlabel('% Occurrence', fontsize=12)
axs[0].set_yticks([])
axs[0].set_ylabel('Feature Importance Rank', fontsize=14)
axs[0].grid(axis='x', linestyle='--', linewidth=0.5)

for idx, label in enumerate(ranks):
    axs[0].text(-0.05, idx, label, va='center', ha='right', fontsize=12)

axs[1].barh(x, shap_prot, height=width, color=colors['protected'])
axs[1].barh(x, shap_unrel, height=width, left=shap_prot, color=colors['unrelated'])
axs[1].barh(x, shap_oth, height=width, left=np.array(shap_prot)+np.array(shap_unrel), color=colors['others'])
axs[1].invert_yaxis()
axs[1].set_title('With SHAP Attack', fontsize=14)
axs[1].set_xlabel('% Occurrence', fontsize=12)
axs[1].set_yticks([])
axs[1].grid(axis='x', linestyle='--', linewidth=0.5)

handles, labels = axs[0].get_legend_handles_labels()
fig.legend(handles, labels, loc='lower center', ncol=3, fontsize=12)

plt.tight_layout(rect=[0,0.08,1,1])
plt.show()
