In [11]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd

df = pd.read_csv('dataset/early-stage-diabetes-risk-prediction/diabetes.csv')

X = df.drop('class',axis=1)

y=df['class']

feature_names = X.columns

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

from sklearn.preprocessing import StandardScaler
transform = StandardScaler()
X_train['Age'] = transform.fit_transform(X_train[['Age']])
X_test['Age'] = transform.transform(X_test[['Age']])

#使用 np.random.randn(*x_train.shape)生成噪声，它将返回具有相同形状的正态分布均值为0，标准差为1的随机值数组。然后将生成的噪声与噪声的标准差(0.2)相乘，并将其添加到输入数据中。
noise_std = 0.2
np.random.seed(0)
X_test_noisy = X_test + noise_std * np.random.randn(*X_test.shape)

In [12]:
import joblib
model = joblib.load('saved_model/early-stage-diabetes-risk-prediction/RF.pkl')

y_pred = model.predict(X_test_noisy)
y_pred_proba = model.predict_proba(X_test_noisy)

from sklearn.metrics import confusion_matrix
confusion_matrix_model = confusion_matrix(y_test,y_pred)
print("Confusion Matrix:\n", confusion_matrix_model)
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred,target_names=['Negative', 'Positive']))

Confusion Matrix:
 [[104   4]
 [  3  60]]
              precision    recall  f1-score   support

      Benign       0.97      0.96      0.97       108
   Malignant       0.94      0.95      0.94        63

    accuracy                           0.96       171
   macro avg       0.95      0.96      0.96       171
weighted avg       0.96      0.96      0.96       171



In [13]:
from anchor import anchor_tabular
class_names=['Negative', 'Positive']
categorical_names = {
    1: ['Female', 'Male'], 2: ['Female', 'Male'], 3: ['No', 'Yes'], 4: ['No', 'Yes'], 5: ['No', 'Yes'], 6: ['No', 'Yes'], 7: ['No', 'Yes'], 8: ['No', 'Yes'], 9: ['No', 'Yes'], 10: ['No', 'Yes'],
    11: ['No', 'Yes'], 12: ['No', 'Yes'], 13: ['No', 'Yes'], 14: ['No', 'Yes'], 15: ['No', 'Yes'], 16: ['No', 'Yes'], 17: ['No', 'Yes'], 18: ['No', 'Yes'], 19: ['No', 'Yes'],
    20: ['No', 'Yes'], 21: ['No', 'Yes'], 22: ['No', 'Yes'], 23: ['No', 'Yes'], 24: ['No', 'Yes'], 25: ['No', 'Yes'], 26: ['No', 'Yes'], 27: ['No', 'Yes'], 28: ['No', 'Yes'], 29: ['No', 'Yes'], 30: ['No', 'Yes']
}
explainer = anchor_tabular.AnchorTabularExplainer(
    class_names,
    feature_names,
    np.array(X_train),
    categorical_names)

In [14]:
explanations = [] 

# First loop over idx, generate explanation, and store the results in explanations
for idx in range(171):
    exp = explainer.explain_instance(X_test_noisy[idx], model.predict,threshold=0.95)
    explanations.append(exp)

In [15]:
for k in range(1, 31):
    test_data = pd.read_csv('dataset/breast-cancer-wisconsin/test_data.csv')
    bound = pd.read_csv('dataset/breast-cancer-wisconsin/bound.csv')
    for idx in range(171):
        exp = explanations[idx] # Use the previously stored explanation
        lista = exp.features()
        length = len(lista)
        if length > k:
            for j in range(0,k):
                test_data.iat[idx,lista[j]+1]=bound.iat[1,lista[j]]+10
        elif length <= k:
            for j in range(0,length):
                test_data.iat[idx,lista[j]+1]=bound.iat[1,lista[j]]+10
    filename = 'explain_set/breast-cancer-wisconsin/MLP/anchor/noise/anchor_explain_'+str(k)+'.csv'
    test_data.to_csv(filename,index=False)