In [7]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd

df = pd.read_csv('dataset/heart-disease/heart-disease.csv')

X = df.drop('target',axis=1)

y=df['target']

feature_names = X.columns

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

from sklearn.preprocessing import StandardScaler
transform = StandardScaler()
X_train[['age', 'trestbps', 'chol', 'thalach', 'oldpeak', 'ca']] = transform.fit_transform(X_train[['age', 'trestbps', 'chol', 'thalach', 'oldpeak', 'ca']])
X_test[['age', 'trestbps', 'chol', 'thalach', 'oldpeak', 'ca']] = transform.transform(X_test[['age', 'trestbps', 'chol', 'thalach', 'oldpeak', 'ca']])

import joblib
model = joblib.load('saved_model/heart-disease/MLP.pkl')

y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 297 entries, 0 to 296
Data columns (total 26 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   age                                   297 non-null    int64  
 1   trestbps                              297 non-null    int64  
 2   chol                                  297 non-null    int64  
 3   thalach                               297 non-null    int64  
 4   oldpeak                               297 non-null    float64
 5   ca                                    297 non-null    int64  
 6   sex_female                            297 non-null    int64  
 7   sex_male                              297 non-null    int64  
 8   cp_asymptomatic                       297 non-null    int64  
 9   cp_atypical angina                    297 non-null    int64  
 10  cp_non-anginal pain                   297 non-null    int64  
 11  cp_typical angina  

In [9]:
from anchor import anchor_tabular
class_names=['Healthy', 'Disease']
categorical_names = {
    6: ['no', 'yes'], 7: ['no', 'yes'], 8: ['no', 'yes'], 9: ['no', 'yes'], 10: ['no', 'yes'], 11: ['no', 'yes'], 12: ['no', 'yes'], 13: ['no', 'yes'], 14: ['no', 'yes'], 15: ['no', 'yes'],
    16: ['no', 'yes'], 17: ['no', 'yes'], 18: ['no', 'yes'], 19: ['no', 'yes'], 20: ['no', 'yes'], 21: ['no', 'yes'], 22: ['no', 'yes'], 23: ['no', 'yes'], 24: ['no', 'yes']
}
explainer = anchor_tabular.AnchorTabularExplainer(
    class_names,
    feature_names,
    np.array(X_train),
    categorical_names)

In [10]:
explanations = [] 

# First loop over idx, generate explanation, and store the results in explanations
for idx in range(90):
    exp = explainer.explain_instance(X_test.iloc[idx].values.reshape(1, -1), model.predict,threshold=0.95)
    explanations.append(exp)

In [11]:
# Then loop over k, and for each k iterate through all explanations
for k in range(1, 26):
    test_data = pd.read_csv('dataset/heart-disease/test_data.csv')
    bound = pd.read_csv('dataset/heart-disease/bound.csv')
    count = 0
    maxlen = 0
    for idx in range(90):
        exp = explanations[idx] # Use the previously stored explanation
        lista = exp.features()
        length = len(lista)
        count = count + length
        if length > maxlen:
            maxlen = length
        if length > k:
            for j in range(0,k):
                test_data.iat[idx,lista[j]]=bound.iat[1,lista[j]]
        elif length <= k:
            for j in range(0,length):
                test_data.iat[idx,lista[j]]=bound.iat[1,lista[j]]
    filename = 'explain_set/heart-disease/MLP/anchor/anchor_explain_'+str(k)+'.csv'
    test_data.to_csv(filename,index=False)
    mean_length = count / 90
print("mean_length:",mean_length)
print("maxlen:",maxlen)

mean_length: 3.2555555555555555
maxlen: 6


In [12]:
for k in range(1, 26):
    test_data = pd.read_csv('dataset/heart-disease/test_data.csv')
    keep_temp = pd.read_csv('dataset/heart-disease/keep_temp.csv')
    count = 0
    maxlen = 0
    for idx in range(90):
        exp = explanations[idx] # Use the previously stored explanation
        lista = exp.features()
        length = len(lista)
        count = count + length
        if length > maxlen:
            maxlen = length
        if length > k:
            for j in range(0,k):
                keep_temp.iat[idx,lista[j]]=test_data.iat[idx,lista[j]]
        elif length <= k:
            for j in range(0,length):
                keep_temp.iat[idx,lista[j]]=test_data.iat[idx,lista[j]]
    filename = 'explain_set/heart-disease/MLP/keep/anchor/anchor_explain_'+str(k)+'.csv'
    keep_temp.to_csv(filename,index=False)
    mean_length = count / 90
print("mean_length:",mean_length)
print("maxlen:",maxlen)

mean_length: 3.2555555555555555
maxlen: 6
