In [21]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd

df = pd.read_csv('dataset/heart-disease/heart-disease.csv')

X = df.drop('target',axis=1)

y=df['target']

feature_names = X.columns

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

from sklearn.preprocessing import StandardScaler
transform = StandardScaler()
X_train[['age', 'trestbps', 'chol', 'thalach', 'oldpeak', 'ca']] = transform.fit_transform(X_train[['age', 'trestbps', 'chol', 'thalach', 'oldpeak', 'ca']])
X_test[['age', 'trestbps', 'chol', 'thalach', 'oldpeak', 'ca']] = transform.transform(X_test[['age', 'trestbps', 'chol', 'thalach', 'oldpeak', 'ca']])

noise_std = 0.2
np.random.seed(0)
X_test_noisy = X_test + noise_std * np.random.randn(*X_test.shape)

In [22]:
import joblib
model = joblib.load('saved_model/heart-disease/MLP.pkl')

y_pred = model.predict(X_test_noisy)
y_pred_proba = model.predict_proba(X_test_noisy)

from sklearn.metrics import confusion_matrix
confusion_matrix_model = confusion_matrix(y_test,y_pred)
print("Confusion Matrix:\n", confusion_matrix_model)
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred,target_names=['Healthy', 'Disease']))

Confusion Matrix:
 [[38 11]
 [ 8 33]]
              precision    recall  f1-score   support

     Healthy       0.83      0.78      0.80        49
     Disease       0.75      0.80      0.78        41

    accuracy                           0.79        90
   macro avg       0.79      0.79      0.79        90
weighted avg       0.79      0.79      0.79        90



In [23]:
import lime
from lime import lime_tabular
explainer = lime_tabular.LimeTabularExplainer(
    training_data=np.array(X_train),
    feature_names=feature_names,
    class_names=['Healthy', 'Disease'],
    mode='classification'
)

In [24]:
explanations = [] 

# First loop over idx, generate explanation, and store the results in explanations
for idx in range(90):
    exp = explainer.explain_instance(
            data_row=X_test_noisy.iloc[idx], 
            predict_fn=model.predict_proba,
            num_features = 25
        )
    explanations.append(exp)

In [25]:
for k in range(1, 26):
    test_data = pd.read_csv('dataset/heart-disease/test_data.csv')
    bound = pd.read_csv('dataset/heart-disease/bound.csv')
    for idx in range(90):
        exp = explanations[idx]
        mapa = exp.as_map().values()
        lista = list(mapa)[0]
        for j in range(0,k):
            s = lista[j]
            test_data.iat[idx,s[0]]=bound.iat[1,s[0]]
    filename = 'explain_set/heart-disease/MLP/lime/noise/lime_explain_'+str(k)+'.csv'
    test_data.to_csv(filename,index=False)