# Bank

In [None]:
import sage
import pickle
import numpy as np
import matplotlib.pyplot as plt

In [None]:
sage_final = sage.load('results/bank_sage.pkl').values

In [None]:
with open('results/bank sage convergence.pkl', 'rb') as f:
    sage_results = pickle.load(f)

In [None]:
with open('results/bank shap convergence.pkl', 'rb') as f:
    shap_results = pickle.load(f)

In [None]:
num_average = 2 ** np.arange(10)
tries = 5000
sage_iters = num_average * sage_results['evals'] * sage_results['inner_samples']
sage_mse = []
sage_corr = []
sage_mse_std = []
sage_corr_std = []

for num in num_average:
    # Setup
    mse = []
    corr = []
    
    for it in range(tries):
        inds = np.random.choice(len(sage_results['values']), size=num, replace=False)
        vals = np.mean(sage_results['values'][inds], axis=0)
        mse.append(np.sum((vals - sage_final) ** 2))
        corr.append(np.corrcoef(vals, sage_final)[0, 1])
        
    # Save result
    sage_mse.append(np.mean(mse))
    sage_corr.append(np.mean(corr))
    sage_mse_std.append(np.std(mse))
    sage_corr_std.append(np.std(corr))

In [None]:
num_average = 2 ** np.arange(10)
tries = 5000
shap_iters = num_average * shap_results['evals'] * shap_results['inner_samples']
shap_mse = []
shap_corr = []
shap_mse_std = []
shap_corr_std = []

for num in num_average:
    # Setup
    mse = []
    corr = []
    
    for it in range(tries):
        inds = np.random.choice(len(shap_results['values']), size=num, replace=False)
        vals = np.mean(shap_results['values'][inds], axis=0)
        mse.append(np.sum((vals - sage_final) ** 2))
        corr.append(np.corrcoef(vals, sage_final)[0, 1])
        
    # Save result
    shap_mse.append(np.mean(mse))
    shap_corr.append(np.mean(corr))
    shap_mse_std.append(np.std(mse))
    shap_corr_std.append(np.std(corr))

In [None]:
fig, axarr = plt.subplots(1, 2, figsize=(16, 5.5))

##### MSE ####
ax = axarr[0]
plt.sca(ax)

plt.plot(sage_iters, sage_mse, label='SAGE', color='tab:pink',
         marker='o')
plt.plot(shap_iters, shap_mse, label='SHAP', color='tab:blue',
         marker='o')

plt.tick_params(labelsize=16)
plt.legend(loc='upper left', fontsize=18, framealpha=1)
plt.xlabel('Model Evaluations', fontsize=18)
plt.ylabel('Mean Squared Error', fontsize=18)
plt.title('Bank MSE Convergence', fontsize=20)
plt.xscale('log')
# plt.ylim(-0.025, 0.29)

##### Correlation ####
ax = axarr[1]
plt.sca(ax)

plt.plot(sage_iters, sage_corr, label='SAGE', color='tab:pink',
         marker='o')
plt.plot(shap_iters, shap_corr, label='SHAP', color='tab:blue',
         marker='o')

plt.tick_params(labelsize=16)
plt.legend(loc='upper left', fontsize=18, framealpha=1)
plt.xlabel('Model Evaluations', fontsize=18)
plt.ylabel('Correlation', fontsize=18)
plt.title('Bank Correlation Convergence', fontsize=20)
plt.xscale('log')
# plt.ylim(0.1, 1.1)

plt.tight_layout()
# plt.show()
plt.savefig('figures/bank_convergence.pdf')

# Bike

In [None]:
import sage
import pickle
import numpy as np
import matplotlib.pyplot as plt

In [None]:
sage_final = sage.load('results/bike_sage.pkl').values

In [None]:
with open('results/bike sage convergence.pkl', 'rb') as f:
    sage_results = pickle.load(f)

In [None]:
with open('results/bike shap convergence.pkl', 'rb') as f:
    shap_results = pickle.load(f)

In [None]:
num_average = 2 ** np.arange(10)
tries = 5000
sage_iters = num_average * sage_results['evals'] * sage_results['inner_samples']
sage_mse = []
sage_corr = []
sage_mse_std = []
sage_corr_std = []

for num in num_average:
    # Setup
    mse = []
    corr = []
    
    for it in range(tries):
        inds = np.random.choice(len(sage_results['values']), size=num, replace=False)
        vals = np.mean(sage_results['values'][inds], axis=0)
        mse.append(np.sum((vals - sage_final) ** 2))
        corr.append(np.corrcoef(vals, sage_final)[0, 1])
        
    # Save result
    sage_mse.append(np.mean(mse))
    sage_corr.append(np.mean(corr))
    sage_mse_std.append(np.std(mse))
    sage_corr_std.append(np.std(corr))

In [None]:
num_average = 2 ** np.arange(10)
tries = 5000
shap_iters = num_average * shap_results['evals'] * shap_results['inner_samples']
shap_mse = []
shap_corr = []
shap_mse_std = []
shap_corr_std = []

for num in num_average:
    # Setup
    mse = []
    corr = []
    
    for it in range(tries):
        inds = np.random.choice(len(shap_results['values']), size=num, replace=False)
        vals = np.mean(shap_results['values'][inds], axis=0)
        mse.append(np.sum((vals - sage_final) ** 2))
        corr.append(np.corrcoef(vals, sage_final)[0, 1])
        
    # Save result
    shap_mse.append(np.mean(mse))
    shap_corr.append(np.mean(corr))
    shap_mse_std.append(np.std(mse))
    shap_corr_std.append(np.std(corr))

In [None]:
fig, axarr = plt.subplots(1, 2, figsize=(16, 5.5))

##### MSE ####
ax = axarr[0]
plt.sca(ax)

plt.plot(sage_iters, sage_mse, label='SAGE', color='tab:pink',
         marker='o')
plt.plot(shap_iters, shap_mse, label='SHAP', color='tab:blue',
         marker='o')

plt.tick_params(labelsize=16)
plt.legend(loc='upper right', fontsize=18, framealpha=1)
plt.xlabel('Model Evaluations', fontsize=18)
plt.ylabel('Mean Squared Error', fontsize=18)
plt.title('Bike MSE Convergence', fontsize=20)
plt.xscale('log')
# plt.ylim(-0.025, 0.29)

##### Correlation ####
ax = axarr[1]
plt.sca(ax)

plt.plot(sage_iters, sage_corr, label='SAGE', color='tab:pink',
         marker='o')
plt.plot(shap_iters, shap_corr, label='SHAP', color='tab:blue',
         marker='o')

plt.tick_params(labelsize=16)
plt.legend(loc='lower right', fontsize=18, framealpha=1)
plt.xlabel('Model Evaluations', fontsize=18)
plt.ylabel('Correlation', fontsize=18)
plt.title('Bike Correlation Convergence', fontsize=20)
plt.xscale('log')
# plt.ylim(0.1, 1.1)

plt.tight_layout()
# plt.show()
plt.savefig('figures/bike_convergence.pdf')

# Credit

In [None]:
import sage
import pickle
import numpy as np
import matplotlib.pyplot as plt

In [None]:
sage_final = sage.load('results/credit_sage.pkl').values

In [None]:
with open('results/credit sage convergence.pkl', 'rb') as f:
    sage_results = pickle.load(f)

In [None]:
with open('results/credit shap convergence.pkl', 'rb') as f:
    shap_results = pickle.load(f)

In [None]:
num_average = 2 ** np.arange(10)
tries = 5000
sage_iters = num_average * sage_results['evals'] * sage_results['inner_samples']
sage_mse = []
sage_corr = []
sage_mse_std = []
sage_corr_std = []

for num in num_average:
    # Setup
    mse = []
    corr = []
    
    for it in range(tries):
        inds = np.random.choice(len(sage_results['values']), size=num, replace=False)
        vals = np.mean(sage_results['values'][inds], axis=0)
        mse.append(np.sum((vals - sage_final) ** 2))
        corr.append(np.corrcoef(vals, sage_final)[0, 1])
        
    # Save result
    sage_mse.append(np.mean(mse))
    sage_corr.append(np.mean(corr))
    sage_mse_std.append(np.std(mse))
    sage_corr_std.append(np.std(corr))

In [None]:
num_average = 2 ** np.arange(7)
tries = 5000
shap_iters = num_average * shap_results['evals'] * shap_results['inner_samples']
shap_mse = []
shap_corr = []
shap_mse_std = []
shap_corr_std = []

for num in num_average:
    # Setup
    mse = []
    corr = []
    
    for it in range(tries):
        inds = np.random.choice(len(shap_results['values']), size=num, replace=False)
        vals = np.mean(shap_results['values'][inds], axis=0)
        mse.append(np.sum((vals - sage_final) ** 2))
        corr.append(np.corrcoef(vals, sage_final)[0, 1])
        
    # Save result
    shap_mse.append(np.mean(mse))
    shap_corr.append(np.mean(corr))
    shap_mse_std.append(np.std(mse))
    shap_corr_std.append(np.std(corr))

In [None]:
fig, axarr = plt.subplots(1, 2, figsize=(16, 5.5))

##### MSE ####
ax = axarr[0]
plt.sca(ax)

plt.plot(sage_iters, sage_mse, label='SAGE', color='tab:pink',
         marker='o')
plt.plot(shap_iters, shap_mse, label='SHAP', color='tab:blue',
         marker='o')

plt.tick_params(labelsize=16)
plt.legend(loc='upper right', fontsize=18, framealpha=1)
plt.xlabel('Model Evaluations', fontsize=18)
plt.ylabel('Mean Squared Error', fontsize=18)
plt.title('Credit MSE Convergence', fontsize=20)
plt.xscale('log')
# plt.ylim(-0.025, 0.29)

##### Correlation ####
ax = axarr[1]
plt.sca(ax)

plt.plot(sage_iters, sage_corr, label='SAGE', color='tab:pink',
         marker='o')
plt.plot(shap_iters, shap_corr, label='SHAP', color='tab:blue',
         marker='o')

plt.tick_params(labelsize=16)
plt.legend(loc='lower right', fontsize=18, framealpha=1)
plt.xlabel('Model Evaluations', fontsize=18)
plt.ylabel('Correlation', fontsize=18)
plt.title('Credit Correlation Convergence', fontsize=20)
plt.xscale('log')
# plt.ylim(0.1, 1.1)

plt.tight_layout()
# plt.show()
plt.savefig('figures/credit_convergence.pdf')