In [45]:
import lore

from prepare_dataset import *
from neighbor_generator import *

from sklearn.model_selection import train_test_split

warnings.filterwarnings("ignore")

In [46]:
path_data = '../../dataset/breast-cancer-wisconsin/'

dataset_name = 'breast-cancer-wisconsin.csv'
dataset = prepare_breast_dataset(dataset_name, path_data)

df_train = pd.read_csv('../../dataset/breast-cancer-wisconsin/train_data.csv')
X_train = df_train.drop('diagnosis',axis=1)
y_train = df_train['diagnosis']

df_test = pd.read_csv('../../dataset/breast-cancer-wisconsin/test_case/test_case_mlp_lower.csv')
X_test = df_test.drop('diagnosis',axis=1)
y_test = df_test['diagnosis']

feature_names = X_test.columns

test_row_count = df_test.shape[0]

from sklearn.preprocessing import StandardScaler
transform = StandardScaler()
X_train = transform.fit_transform(X_train)
X_test = transform.transform(X_test)

noise_std = 0.2
np.random.seed(0)
X_test_noisy = X_test + noise_std * np.random.randn(*X_test.shape)

import joblib
blackbox = joblib.load('../../saved_model/breast-cancer-wisconsin/MLP.pkl')

X2E = X_test_noisy
y2E = blackbox.predict(X2E)
y2E = np.asarray([dataset['possible_outcomes'][i] for i in y2E])

In [47]:
test_row_count

311

In [48]:
explanations = [] 

# First loop over idx_record2explain, generate explanation, and store the results in explanations
for idx_record2explain in range(test_row_count):
    try:
        explanation, infos = lore.explain(idx_record2explain, X2E, dataset, blackbox,
                                          ng_function=genetic_neighborhood,
                                          discrete_use_probabilities=True,
                                          continuous_function_estimation=False,
                                          returns_infos=True,
                                          path=path_data, sep=';', log=False)
        if explanation:
                explanations.append((explanation, infos))
    except Exception as e:
            print(f"Error generating explanation for idx_record2explain {idx_record2explain}: {e}")
            explanations.append(None)

idx_record2explain: 0
idx_record2explain: 1
No 'ss' samples have been generated. Trying again...
No 'ss' samples have been generated. Trying again...
No 'ss' samples have been generated. Trying again...
No 'ss' samples have been generated. Trying again...
No 'ss' samples have been generated. Trying again...
idx_record2explain: 2
No 'ss' samples have been generated. Trying again...
No 'ss' samples have been generated. Trying again...
No 'ss' samples have been generated. Trying again...
No 'ss' samples have been generated. Trying again...
No 'ss' samples have been generated. Trying again...
No 'ss' samples have been generated. Trying again...
No 'ss' samples have been generated. Trying again...
No 'ss' samples have been generated. Trying again...
No 'ss' samples have been generated. Trying again...
No 'ss' samples have been generated. Trying again...
No 'ss' samples have been generated. Trying again...
No 'ss' samples have been generated. Trying again...
No 'ss' samples have been generat

In [49]:
import os
file_path = '../../dataset/breast-cancer-wisconsin/test_case/unexplainable_sample_mlp_lower.csv'
df_test_noisy = pd.DataFrame(X_test_noisy, columns=feature_names)
nonelist = []

for idx_record2explain in range(test_row_count):
    if explanations[idx_record2explain] == None:
        nonelist.append(idx_record2explain)

fail = df_test_noisy.loc[nonelist]
y_fail = y_test.loc[nonelist]
file_exists = os.path.isfile(file_path)
if not file_exists:
    pd.concat([y_fail,fail], axis=1).to_csv(file_path, index=False, header=True)
else:
    pd.concat([y_fail,fail], axis=1).to_csv(file_path, mode='a', index=False, header=False)

In [50]:
pd.concat([y_test,df_test_noisy], axis=1).to_csv('../../dataset/breast-cancer-wisconsin/test_case/test_case_mlp_lower_noise.csv', index=False, header=True)

In [51]:
explain_index = []

# Then loop over k, and for each k iterate through all explanations
for k in range(3, 4):
    test_data_noise = pd.read_csv('../../dataset/breast-cancer-wisconsin/test_case/test_case_mlp_lower_noise.csv')
    bound_standardized = pd.read_csv('../../dataset/breast-cancer-wisconsin/bound_standardized.csv')
    for idx_record2explain in range(test_row_count):
        if explanations[idx_record2explain] != None:
            explain_index.append(idx_record2explain)
            explanation, infos = explanations[idx_record2explain]
            keys_view = explanation[0][1].keys()
            keys_list = list(keys_view)
            length = len(keys_list)
            if length > k:
                for j in range(0,k):
                    test_data_noise.at[idx_record2explain,keys_list[j]]=bound_standardized.at[0,keys_list[j]]
            elif length <= k:
                for j in range(0,length):
                    test_data_noise.at[idx_record2explain,keys_list[j]]=bound_standardized.at[0,keys_list[j]]
    filename = '../../explain_set_lower/breast-cancer-wisconsin/MLP/lore/noise/test_case/lore_explain_'+str(k)+'.csv'
    test_data_noise.to_csv(filename,index=False)
    np.save('../../dataset/breast-cancer-wisconsin/test_case/explain_index_mlp_lower.npy',explain_index)

In [52]:
arr = np.load('../../dataset/breast-cancer-wisconsin/test_case/explain_index_mlp_lower.npy')
arr

array([  0,   1,   3,   4,   5,   6,   7,  10,  11,  12,  13,  14,  15,
        16,  17,  19,  20,  21,  23,  24,  25,  26,  27,  28,  29,  30,
        31,  32,  33,  34,  35,  37,  38,  39,  40,  41,  42,  43,  44,
        45,  46,  47,  48,  50,  52,  53,  54,  55,  57,  58,  59,  60,
        61,  62,  63,  64,  65,  67,  69,  70,  71,  72,  73,  74,  75,
        76,  77,  78,  79,  80,  81,  82,  83,  85,  86,  88,  91,  92,
        93,  94,  95,  96,  97,  98, 101, 102, 103, 104, 105, 106, 108,
       109, 110, 111, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
       124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136,
       137, 138, 139, 141, 142, 143, 145, 146, 147, 149, 150, 151, 153,
       154, 155, 156, 157, 160, 161, 162, 163, 164, 165, 166, 168, 169,
       170, 171, 172, 174, 175, 176, 177, 178, 179, 180, 181, 182, 184,
       186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 199,
       200, 201, 202, 203, 204, 206, 208, 209, 210, 211, 212, 21