In [37]:
import numpy as np
import pickle
import pathlib
from matplotlib.ticker import ScalarFormatter
import matplotlib.pyplot as plt

import os
from os.path import join
path_to_file = str(pathlib.Path().resolve())
dir_path = join(path_to_file, "../../")
import warnings
warnings.filterwarnings('ignore')

import sys
sys.path.append(join(dir_path, "HelperFiles"))
# from load_data import *
from helper import *

# SHAP Results
## Failure cases for maximum are at indices that converge infrequently

In [38]:
np.set_printoptions(suppress=True, precision=3)

In [39]:
datasets = ["census", "bank", "brca", "credit", "breast_cancer"]
Ks = [2,5]
methods = ["rankshap", "sprtshap"]
guarantees = ["rank", "set"]
for method in methods:
    for K in Ks:
        for alpha in [0.1, 0.2]:
            max_mat = np.empty((len(Ks), len(datasets)))
            for i, guarantee in enumerate(guarantees):
                data_dir = join(dir_path, "Experiments", "Results", "Top_K", guarantee, "alpha_"+str(alpha)) 
                for j, dataset in enumerate(datasets):
                    max_mat[i,j] = np.nan
                    fname = method + "_" + dataset + "_K" + str(K) 
                    path = join(data_dir, fname)
                    if os.path.exists(path):
                        with open(path, "rb") as fp:
                            results = pickle.load(fp)
                        # fwers = results['fwers']
                        try:
                            top_K_all = results['top_K']
                            rejection_idx = results['rejection_idx']
                            fwers_all = np.array([calc_fwer(top_K, rejection_idx=idx) for top_K, idx in zip(top_K_all, rejection_idx)])
                            relevant_idx = [len(idx)>=50*0.9 for idx in rejection_idx]
                            fwers = fwers_all[relevant_idx]
                            # if len(fwers)==30:
                            if len(fwers)>=10:
                                max_fwer = np.max(fwers)
                                max_mat[i, j] = np.round(max_fwer*100).item()
                            else:
                                print(f"Only ran on {len(fwers)} inputs: ", fname, guarantee, alpha)
                        except:
                            print(f"OUTDATED: ", fname, guarantee, alpha)
                    else:
                        print("File not found: ", fname, guarantee, alpha)
            print("#"*20)
            print("Method: ", method, "\tK:", str(K), "\tAlpha:", alpha)

            # print("Average FWER (%)")
            # print(avg_mat)
            print("Max FWER (%)")
            print(max_mat)
            # print("Proportion of FWERs below alpha")
            # print(control_mat)

Only ran on 8 inputs:  rankshap_breast_cancer_K2 rank 0.1
####################
Method:  rankshap 	K: 2 	Alpha: 0.1
Max FWER (%)
[[ 8.  6.  6.  4. nan]
 [ 2.  2.  6.  2.  4.]]
####################
Method:  rankshap 	K: 2 	Alpha: 0.2
Max FWER (%)
[[16. 14. 14.  8.  6.]
 [16.  0. 10.  2. 12.]]
File not found:  rankshap_breast_cancer_K5 rank 0.1
####################
Method:  rankshap 	K: 5 	Alpha: 0.1
Max FWER (%)
[[ 6. 10. 10.  4. nan]
 [ 6. 10. 10.  4.  4.]]
File not found:  rankshap_breast_cancer_K5 rank 0.2
####################
Method:  rankshap 	K: 5 	Alpha: 0.2
Max FWER (%)
[[14. 24. 20. 12. nan]
 [14. 16. 20. 16.  4.]]
Only ran on 0 inputs:  sprtshap_brca_K2 rank 0.1
Only ran on 1 inputs:  sprtshap_brca_K2 set 0.1
####################
Method:  sprtshap 	K: 2 	Alpha: 0.1
Max FWER (%)
[[ 0.  0. nan  0.  0.]
 [ 0.  0. nan  0.  0.]]
Only ran on 0 inputs:  sprtshap_brca_K2 rank 0.2
Only ran on 1 inputs:  sprtshap_brca_K2 set 0.2
####################
Method:  sprtshap 	K: 2 	Alpha: 0.2
Ma

In [42]:
method = "rankshap"
dataset = "breast_cancer"
K = 2
guarantee = "rank"
alpha = 0.1
data_dir = join(dir_path, "Experiments", "Results", "Top_K", guarantee, "alpha_"+str(alpha)) 
fname = method + "_" + dataset + "_K" + str(K) 
path = join(data_dir, fname)

with open(path, "rb") as fp:
    results = pickle.load(fp)
    
top_K_all = results['top_K']
rejection_idx = results['rejection_idx']
fwers_all = np.array([calc_fwer(top_K, rejection_idx=idx) for top_K, idx in zip(top_K_all, rejection_idx)])
relevant_idx = [len(idx)>=50*0.9 for idx in rejection_idx]
fwers = fwers_all[relevant_idx]
(fwers)

array([0.  , 0.  , 0.  , 0.02, 0.  , 0.  , 0.  , 0.  ])

## LIME Results

In [36]:
method = "lime"
guarantee = "rank"
for alpha in [0.1, 0.2]:
    avg_mat = np.empty((len(Ks), len(datasets)))
    control_mat = np.empty((len(Ks), len(datasets)))
    max_mat = np.empty((len(Ks), len(datasets)))
    data_dir = join(dir_path, "Experiments", "Results", "Top_K", guarantee, "alpha_"+str(alpha))
    for i, K in enumerate(Ks):
        for j, dataset in enumerate(datasets):
            avg_mat[i, j] = np.nan
            control_mat[i, j] = np.nan
            max_mat[i,j] = np.nan
            fname = method + "_" + dataset + "_K" + str(K) 
            path = join(data_dir, fname)
            if os.path.exists(path):
                with open(path, "rb") as fp:
                    results = pickle.load(fp)
                # fwers = results['fwers']
                top_K_all = results['top_K']
                if top_K_all.shape[0]==30:
                    rejection_idx = results['rejection_idx']
                    # fwers = [calc_fwer(top_K, rejection_idx=rejection_idx[i]) for i, top_K in enumerate(top_K_all)]
                    fwers_all = np.array([calc_fwer(top_K, rejection_idx=idx) for top_K, idx in zip(top_K_all, rejection_idx)])
                    relevant_idx = [len(idx)>50*0.95 for idx in rejection_idx]
                    fwers = fwers_all[relevant_idx]

                    avg_fwer = np.mean(fwers)
                    max_fwer = np.max(fwers)
                    max_mat[i, j] = np.round(max_fwer*100, 1)
                    avg_mat[i, j] = np.round(avg_fwer*100, 1)

    print("#"*20)
    print("Method: LIME", "\tAlpha: ", alpha)

    print("Max FWER (%)")
    print(max_mat)

    # print("Avg FWER (%)")
    # print(avg_mat)


####################
Method: LIME 	Alpha:  0.1
Max FWER (%)
[[ 0.  2.  2.  6. nan]
 [nan  0. nan nan nan]]
####################
Method: LIME 	Alpha:  0.2
Max FWER (%)
[[ 0.  2.  2.  8. nan]
 [nan  0. nan nan nan]]


In [33]:
alpha = 0.1
guarantee = "rank"
data_dir = join(dir_path, "Experiments", "Results", "Top_K", guarantee, "alpha_"+str(alpha))
method = "lime"
dataset = "brca"
K = 5
fname = method + "_" + dataset + "_K" + str(K) 
path = join(data_dir, fname)
if os.path.exists(path):
    with open(path, "rb") as fp:
        results = pickle.load(fp)
    # fwers = results['fwers']
    top_K_all = results['top_K']
    rejection_idx = results['rejection_idx']
    # fwers = [calc_fwer(top_K) for top_K in top_K_all]
    fwers_all = np.array([calc_fwer(top_K, rejection_idx=idx) for top_K, idx in zip(top_K_all, rejection_idx)])
    relevant_idx = [len(idx)>50*0.8 for idx in rejection_idx]
    fwers = fwers_all[relevant_idx]

    print(fwers)
# idx_of_failure = np.argmax(fwers)
# x_idx_failure = results['x_indices'][idx_of_failure]
# print(x_idx_failure)
path

'/Users/jeremygoldwasser/Desktop/RankSHAP/Experiments/Analysis/../../Experiments/Results/Top_K/rank/alpha_0.1/lime_brca_K5'

In [95]:
alphas = [0.1, 0.2]
arrs = []
method = "lime"
dataset = "credit"
fname = method + "_" + dataset + "_K" + str(K) 
for alpha in alphas:
    data_dir = join(dir_path, "Experiments", "Results", "Top_K", guarantee, "alpha_"+str(alpha))
    path = join(data_dir, fname)
    with open(path, "rb") as fp:
        results = pickle.load(fp)
    # fwers = results['fwers']
    top_K_all = results['top_K']
    arrs.append(top_K_all)
np.array_equal(arrs[0], arrs[1])

True