# Bank

In [None]:
import sage
import pickle
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Load data
df = sage.datasets.bank()

# Feature names and categorical columns (for CatBoost model)
feature_names = df.columns.tolist()[:-1]
categorical_cols = ['Job', 'Marital', 'Education', 'Default', 'Housing',
                    'Loan', 'Contact', 'Month', 'Prev Outcome']
categorical_inds = [feature_names.index(col) for col in categorical_cols]

In [None]:
sage_values = sage.load('results/bank_sage.pkl')

In [None]:
permutation = []
for i in range(512):
    filename = 'results/bank permutation_test {}.pkl'.format(i)
    with open(filename, 'rb') as f:
        permutation.append(pickle.load(f)['scores'])
permutation = np.array(permutation).mean(axis=0)

In [None]:
with open('results/bank feature_ablation.pkl', 'rb') as f:
    ablation = pickle.load(f)

In [None]:
with open('results/bank univariate.pkl', 'rb') as f:
    univariate = pickle.load(f)

In [None]:
results = [sage_values, permutation, ablation, univariate]
results_names = ['SAGE', 'Permutation Test', 'Feature Ablation', 'Univariate Predictors']

In [None]:
fig, axarr = plt.subplots(1, len(results), figsize=(16, 6))

for i, (result, name) in enumerate(zip(results, results_names)):
    ax = axarr[i]
    plt.sca(ax)
    if isinstance(result, np.ndarray):
        # Regular values
        values = result
        plt.bar(np.arange(len(values)), values)
    else:
        # SAGE values
        values = result.values
        std = result.std
        plt.bar(np.arange(len(values)), values, yerr=1.96*std, capsize=5)
        
    plt.title(name, fontsize=20)
    plt.xticks(np.arange(len(values)), feature_names,
               rotation=90, ha='center', fontsize=16)
    plt.tick_params('y', labelsize=16)
    
    if i == 0:
        plt.ylabel('SAGE Values', fontsize=18)

plt.tight_layout()
# plt.show()
plt.savefig('figures/bank_sage.pdf')

# Bike

In [None]:
import sage
import pickle
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Load data
df = sage.datasets.bike()
feature_names = df.columns.tolist()[:-3]

In [None]:
sage_values = sage.load('results/bike_sage.pkl')

In [None]:
with open('results/bike mean_importance.pkl', 'rb') as f:
    mean_imp = pickle.load(f)

In [None]:
permutation = []
for i in range(512):
    filename = 'results/bike permutation_test {}.pkl'.format(i)
    with open(filename, 'rb') as f:
        permutation.append(pickle.load(f)['scores'])
permutation = np.array(permutation).mean(axis=0)

In [None]:
with open('results/bike feature_ablation.pkl', 'rb') as f:
    ablation = pickle.load(f)

In [None]:
with open('results/bike univariate.pkl', 'rb') as f:
    univariate = pickle.load(f)

In [None]:
results = [sage_values, permutation, mean_imp, ablation, univariate]
results_names = ['SAGE', 'Permutation Test', 'Mean Importance', 'Feature Ablation', 'Univariate Predictors']

In [None]:
fig, axarr = plt.subplots(1, len(results), figsize=(16, 5))

for i, (result, name) in enumerate(zip(results, results_names)):
    ax = axarr[i]
    plt.sca(ax)
    if isinstance(result, np.ndarray):
        # Regular values
        values = result / 1000
        plt.bar(np.arange(len(values)), values)
    else:
        # SAGE values
        values = result.values / 1000
        std = result.std / 1000
        plt.bar(np.arange(len(values)), values, yerr=1.96*std, capsize=5)
        
    plt.title(name, fontsize=20)
    plt.xticks(np.arange(len(values)), feature_names,
               rotation=90, ha='center', fontsize=16)
    plt.tick_params('y', labelsize=16)
    
    if i == 0:
        plt.ylabel(r'SAGE Values ($\times$1000)', fontsize=18)

plt.tight_layout()
# plt.show()
plt.savefig('figures/bike_sage.pdf')

# Credit

In [None]:
import sage
import pickle
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Load data
df = sage.datasets.credit()

# Feature names and categorical columns (for CatBoost model)
feature_names = df.columns.tolist()[:-1]
categorical_columns = [
    'Checking Status', 'Credit History', 'Purpose', 'Credit Amount',
    'Savings Account/Bonds', 'Employment Since', 'Personal Status',
    'Debtors/Guarantors', 'Property Type', 'Other Installment Plans',
    'Housing Ownership', 'Job', 'Telephone', 'Foreign Worker'
]
categorical_inds = [feature_names.index(col) for col in categorical_columns]

In [None]:
sage_values = sage.load('results/credit_sage.pkl')

In [None]:
permutation = []
for i in range(512):
    filename = 'results/credit permutation_test {}.pkl'.format(i)
    with open(filename, 'rb') as f:
        permutation.append(pickle.load(f)['scores'])
permutation = np.array(permutation).mean(axis=0)

In [None]:
with open('results/credit feature_ablation.pkl', 'rb') as f:
    ablation = pickle.load(f)

In [None]:
with open('results/credit univariate.pkl', 'rb') as f:
    univariate = pickle.load(f)

In [None]:
results = [sage_values, permutation, ablation, univariate]
results_names = ['SAGE', 'Permutation Test', 'Feature Ablation', 'Univariate Predictors']

In [None]:
fig, axarr = plt.subplots(1, len(results), figsize=(16, 6.5))

for i, (result, name) in enumerate(zip(results, results_names)):
    ax = axarr[i]
    plt.sca(ax)
    if isinstance(result, np.ndarray):
        # Regular values
        values = result
        plt.bar(np.arange(len(values)), values)
    else:
        # SAGE values
        values = result.values
        std = result.std
        plt.bar(np.arange(len(values)), values, yerr=1.96*std, capsize=5)
        
    plt.title(name, fontsize=20)
    plt.xticks(np.arange(len(values)), feature_names,
               rotation=90, ha='center', fontsize=14)
    plt.tick_params('y', labelsize=16)
    
    if i == 0:
        plt.ylabel('SAGE Values', fontsize=18)

plt.tight_layout()
# plt.show()
plt.savefig('figures/credit_sage.pdf')