In [2]:
import tensorflow as tf
import numpy as np

In [3]:
# Load the model
class SequentialNN(tf.keras.models.Sequential):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def predict(self, x):
        pred_probs = super().predict(x)
        rounded_probs = tf.math.round(pred_probs)
        predictions = tf.cast(rounded_probs, tf.int32).numpy()
        return predictions.flatten()
    

model = tf.keras.models.load_model('model.h5', custom_objects={'SequentialNN': SequentialNN})

# load train and test data
X_train = np.load('train_data.npy')
y_train = np.load('train_labels.npy', allow_pickle=True)
X_test = np.load('test_data.npy')
y_test = np.load('test_labels.npy', allow_pickle=True)

# X_train = X_train.astype('float32')
# y_train = np.array([label[0] for label in y_train]).astype('int32')

# X_test = X_test.astype('float32')
# y_test = np.array([label[0] for label in y_test]).astype('int32')


In [4]:
# attacking the model

X_test_final = np.load('test_data.npy')
y_test_final = np.load('test_labels.npy', allow_pickle=True)


model.evaluate(X_test_final, y_test_final, verbose=2)

1/1 - 0s - loss: 1.4904 - accuracy: 0.9231 - 86ms/epoch - 86ms/step


[1.490388035774231, 0.9230769276618958]

In [5]:
# change second column of X_test_final to 0
X_test_final[:, 1] = 100

In [6]:
model.evaluate(X_test_final, y_test_final, verbose=2)

1/1 - 0s - loss: 313.5997 - accuracy: 0.5000 - 19ms/epoch - 19ms/step


[313.5997314453125, 0.5]

In [7]:
X_train.shape

(101, 5, 6)

In [13]:
from trustee import ClassificationTrustee
from sklearn.metrics import classification_report

clf = model
y_pred = clf.predict(X_test)

trustee = ClassificationTrustee(expert=clf)
trustee.fit(X_train, y_train, num_iter=50, num_stability_iter=10, samples_size=0.3, verbose=True)
dt, pruned_dt, agreement, reward = trustee.explain()
dt_y_pred = dt.predict(X_test)
dt_y_pred = pruned_dt.predict(X_test)

print("Model explanation global fidelity report:")
print(classification_report(y_pred, dt_y_pred))
print("Model explanation score report:")
print(classification_report(y_test, dt_y_pred))

Initializing training dataset using <__main__.SequentialNN object at 0x302c140d0> as expert model


ValueError: Must pass 2-d input. shape=(101, 5, 6)

In [5]:
# from sklearn.metrics import roc_curve, roc_curve, auc
# import matplotlib.pyplot as plt

# # Compute macro-average ROC curve and ROC area

# # First aggregate all false positive rates
# # all_fpr = [ np.array([0,0,1]), np.array([0,1,0]), np.array([1,0,0]) ]
# all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))

# # Then interpolate all ROC curves at this points
# mean_tpr = np.zeros_like(all_fpr)
# for i in range(n_classes):
#     mean_tpr += interp(all_fpr, fpr[i], tpr[i])

# # Finally average it and compute AUC
# mean_tpr /= n_classes

# fpr["macro"] = all_fpr
# tpr["macro"] = mean_tpr
# roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

# # Plot all ROC curves
# plt.figure()
# plt.plot(fpr["micro"], tpr["micro"],
#      label='micro-average ROC curve (area = {0:0.2f})'
#            ''.format(roc_auc["micro"]),
#      color='deeppink', linestyle=':', linewidth=4)

# plt.plot(fpr["macro"], tpr["macro"],
#      label='macro-average ROC curve (area = {0:0.2f})'
#            ''.format(roc_auc["macro"]),
#      color='navy', linestyle=':', linewidth=4)

# colors = cycle(['aqua', 'darkorange', 'cornflowerblue'])
# for i, color in zip(range(n_classes), colors):
#     plt.plot(fpr[i], tpr[i], color=color, lw=lw,
#          label='ROC curve of class {0} (area = {1:0.2f})'
#          ''.format(i, roc_auc[i]))

# plt.plot([0, 1], [0, 1], 'k--', lw=lw)
# plt.xlim([0.0, 1.0])
# plt.ylim([0.0, 1.05])
# plt.xlabel('False Positive Rate')
# plt.ylabel('True Positive Rate')
# plt.title('Some extension of Receiver operating characteristic to multi-class')
# plt.legend(loc="lower right")
# plt.show()

In [6]:
# predictions_no_attack = model.predict(X_test)
# predictions_attack = model.predict(X_test_final)

# from sklearn.metrics import roc_curve, roc_curve, auc
# import matplotlib.pyplot as plt

# fprn, tprn, thresholdsn = roc_curve(y_test, predictions_no_attack)
# fpra, tpra, thresholdsa = roc_curve(y_test_final, predictions_attack)

# roc_auc_c = auc(fprn, tprn)
# roc_auc_a = auc(fpra, tpra)

# fontSize = 14
# labelSize = 17
# # Plot the ROC curve
# plt.figure(figsize=(8, 6))
# plt.plot(fprn, tprn, color='darkorange', lw=2, label=f'No attack (AUC={roc_auc_c:.2f})', \
#         marker='o')
# plt.plot(fpra, tpra, color='red', lw=2, label=f'attack (AUC={roc_auc_a:.2f})', \
#         marker='*')
# plt.plot([0,1], [0,1], color='navy', lw=2, linestyle='--')
# plt.xlim([0.0, 1.0])
# plt.ylim([0.0, 1.01])
# plt.tick_params(axis='both', which='major', labelsize=labelSize)
# plt.xlabel('False Positive Rate', fontsize=fontSize)
# plt.ylabel('True Positive Rate', fontsize=fontSize)
# plt.legend(loc='lower right', fontsize=fontSize-1)
# plt.grid(True)
# plt.show()