# Load data and results

In [1]:
import pickle
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

def load_results(filename):
    with open(filename, 'rb') as file:
        loaded_results = pickle.load(file)
    return loaded_results

def save_results(results, filename):
    with open(filename, 'wb') as file:
        pickle.dump(results, file)

In [2]:
results = load_results('result/results_test.pickle')
results_grid = load_results('result/results_grid.pickle')
best_models = load_results('result/best_models.pickle')
data_models = load_results('result/data_models.pickle')

### SHAP values

In [4]:
import shap
from sklearn.preprocessing import StandardScaler

models = ['Logistic', 'Random Forest', 'GBM', 'XGBoost', 'SVM', 'KNN']

results_shap = {}
shap_values = []

i_models = [1, 2]

for i_model in i_models:

    if i_model != 2:
        continue

    print(f"Computing SHAP values for {i_model}...")

    X_train, X_test, y_train, y_test = data_models[f"Model_{i_model}"]

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    df_X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns)
    df_X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns)

    shap_train = df_X_train_scaled.copy()
    shap_test = df_X_test_scaled.copy()

    for model in models:

        print(f"Computing SHAP values for {model}...")

        best_model = best_models[f"Model_{i_model}_{model}"]
        
        # if model != 'Logistic': # this is to test a single model - OK
        # if model != 'Random Forest': # this is to test a single model
        # if model != 'GBM': # this is to test a single model - OK
        # if model != 'XGBoost': # this is to test a single model
        # if model != 'SVM': # this is to test a single model
        # if model != 'KNN': # this is to test a single model
            # continue

        if model == 'Logistic':
            explainer = shap.LinearExplainer(best_model.named_steps['classifier'], shap_train)
            shap_values = explainer(shap_test)
        
        elif model == 'Random Forest':
            explainer = shap.TreeExplainer(best_model.named_steps['rf'])
            shap_values = explainer(shap_test)
            shap_values = shap_values[:,:,-1]

        elif model == 'GBM':
            explainer = shap.TreeExplainer(best_model.named_steps['gb'])
            shap_values = explainer(shap_test)
        
        elif model == 'XGBoost':
            explainer = shap.TreeExplainer(best_model.named_steps['xgb'])
            shap_values = explainer(shap_test)
        
        elif model == 'SVM':
            explainer = shap.explainers.Permutation(best_model.predict_proba, shap_train.values)
            shap_values = explainer(shap_test)
            shap_values = shap_values[:,:,-1]

        elif model == 'KNN':      
            
            explainer = shap.explainers.Permutation(best_model.predict_proba, shap_train.values)
            shap_values = explainer(shap_test)
            shap_values = shap_values[:,:,-1]

        results_shap[model] = {
            'shap_values': shap_values,
            'shap_data': shap_test,
        }

    save_results(results_shap, f"result/results_shap-{i_model}.pickle")    

Computing SHAP values for 2...
Computing SHAP values for Logistic...
Computing SHAP values for Random Forest...
Computing SHAP values for GBM...
Computing SHAP values for XGBoost...
Computing SHAP values for SVM...


PermutationExplainer explainer: 1136it [1:36:19,  5.10s/it]                          


Computing SHAP values for KNN...


PermutationExplainer explainer: 1136it [48:30,  2.57s/it]                          
