In [None]:
import pandas as pd
import numpy as np
import pickle
import time

from collections import Counter

from copy import deepcopy

from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier

from skimage.metrics import structural_similarity as ssim
from skimage.metrics import mean_squared_error

# For keras dependencise
from keras.models import Sequential, load_model
from keras.layers import Dense, Activation
from keras import backend as K

# For LRP Visuals
from deepexplain.tensorflow import DeepExplain
from keras.models import Model

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
from dexplain import helpers

In [None]:
# Pick a value of k for the dataset
k = 1

In [None]:
# # load the models from disk
model = load_model("NN.h5")

In [None]:
# Load data
X_train = np.load("X_train.npy")
X_test = np.load("X_test.npy")
y_train = np.load("y_train.npy")
y_test = np.load("y_test.npy")

# Load the feature activations
X_train_act = np.load("X_train_act.npy")
X_test_act = np.load("X_test_act.npy")

# Load DeepLIFT contributions
X_train_cont = np.load("new_X_train_deeplift.npy")
X_test_cont = np.load("new_X_test_deeplift.npy")

## Contributions Search

In [None]:
nn_pred = model.predict_classes(X_test)

In [None]:
X_train_knn = X_train.reshape(X_train.shape[0], 28*28)
X_test_knn = X_test.reshape(X_test.shape[0], 28*28)

In [None]:
print("DeepLIFT-KNN:", X_train_cont.shape)
print("Activations:", X_train_act.shape)
print("Training:", X_train.shape)
print("Training k-NN:", X_train_knn.shape)
print("y_test:", y_test.shape)

In [None]:
techniques = [
    ["k-NN*", X_train_act, X_test_act],
    ["C-DeepLIFT", X_train_cont, X_test_cont]
             ]

for item in techniques:
    technique = item[0]
    train = item[1]
    test = item[2]

    print("==================================================")
    print(technique)
    print("==================================================")

    kNN = KNeighborsClassifier(n_neighbors=3, algorithm="brute") 
    kNN.fit(train, y_train)

    knn_predictions_test = kNN.predict(test)
    print("Accuracy:", accuracy_score(y_test, knn_predictions_test))
    print(confusion_matrix(y_test, knn_predictions_test, labels=None, sample_weight=None))
    print(" ")
    
    # What's the % that's different?
    correct = 0
    for i in range(len(nn_pred)):
        if knn_predictions_test[i] == nn_pred[i]:
            correct += 1
    print("Agreement", correct/len(nn_pred))

## Centroids

In [None]:
import numpy as np
from scipy.spatial import distance as D

In [None]:
# load the data
X = np.load('X_train.npy')
y = np.load('y_train.npy')

In [None]:
# we're going to sort our x data into its 10 classes
classes = dict()
centroids = dict()

In [None]:
for x in range(0, 10):
    # np.argwhere gives us the index of any item where the label is equal to the current class
    # we're basically using the y array (which gives us the class of each corresponding x item
    # to sort the x's into separate buckets for each label
    classes[x] = X[np.argwhere(y == x)]

In [None]:
m, ch, h, w  = 6000, 1, 28, 28
center_points = []

for class_index, cluster in classes.items():
    center_point = np.mean(cluster, keepdims=True)
    center_points.append(center_point)
    distances_arr = np.array([D.euclidean(cluster_point.flatten(), center_point.flatten()) for cluster_point in cluster])
    centroids[class_index] = cluster[np.argmin(distances_arr)]
    
    class_d_mean, class_d_std = distances_arr.mean(), distances_arr.std()
    print(f"= [{class_index}] - distance mean= {class_d_mean:0.4f} - distance std= {class_d_std:0.4f}")

In [None]:
centroids[0].shape

In [None]:
centroid_list = centroids.values()

## Get Frequency Matrix for User Study Reserach

In [None]:
distances, idxs = kNN.kneighbors(X=[X_test_cont[5]], n_neighbors=1, return_distance=True)
neighbour = idxs[0][0]
explanation = y_train[neighbour]

In [None]:
model.predict_classes(np.array([X_test[i]]))[0] == explanation

In [None]:
freq_dict = {1:[], 2:[], 3:[], 4:[], 5:[], 6:[], 7:[], 8:[]}

digits = [0,1,2,3,4,5,6,7,8,9]

for digit in digits:

    for i in range(len(X_test)): 

        if y_test[i] == digit:

            # Find Neighbour label and predicted label
            distances, idxs = kNN.kneighbors(X=[X_test_cont[i]], n_neighbors=3, return_distance=True)
            neighbour = idxs[0][0]
            exp_label = y_train[neighbour]
            exp_pred = model.predict_classes(np.array([X_train[neighbour]]))[0]
            CNN_pred = model.predict_classes(np.array([X_test[i]]))[0]
            
        
            # Is systems agree
            if CNN_pred == exp_pred:

                if CNN_pred == digit and exp_label == digit:
                    freq_dict[1].append(i) 

                if CNN_pred == digit and exp_label != digit:
                    freq_dict[2].append(i)

                if CNN_pred != digit and exp_label == digit:
                    freq_dict[3].append(i)

                if CNN_pred != digit and exp_label != digit:
                    freq_dict[4].append(i)
                    
            # If systems disagree       
            if CNN_pred != exp_pred:

                if CNN_pred == digit and exp_label == digit:
                    freq_dict[5].append(i) 

                if CNN_pred == digit and exp_label != digit:
                    freq_dict[6].append(i)

                if CNN_pred != digit and exp_label == digit:
                    freq_dict[7].append(i)

                if CNN_pred != digit and exp_label != digit:
                    freq_dict[8].append(i)

In [None]:
for key, value in freq_dict.items():
    print(key, len(value))

## Get Wrong Situations for User Study

In [None]:
freq_dict = {1:[], 2:[]}

digits = [0,1,2,3,4,5,6,7,8,9]

for digit in digits:

    for i in range(len(X_test)): 

        # Not terribly efficient code, but it'll do here
        if y_test[i] == digit:

            # Find Neighbour label and predicted label
            neighbour_idxs = kNN.kneighbors(X=[X_test_cont[i]], n_neighbors=3, return_distance=False)
            
            neighbour_idxs = neighbour_idxs[0]
            neighbour_labels = deepcopy(neighbour_idxs)
            neighbour_preds = deepcopy(neighbour_idxs)
            
            for j in range(len(neighbour_idxs)):
                neighbour_labels[j] = y_train[neighbour_idxs[j]]
                neighbour_preds[j] = model.predict_classes(np.array([X_train[neighbour_idxs[j]]]))[0]
            
            CNN_pred = model.predict_classes(np.array([X_test[i]]))[0]
            query_label = y_test[i]
            
        
            # Situation 1
            if CNN_pred != query_label:
                if len(Counter(neighbour_labels)) == 1 and len(Counter(neighbour_preds)) == 1:
                    if neighbour_labels[0] == neighbour_preds[0]:
                        if neighbour_labels[0] == CNN_pred:
                            freq_dict[1].append(i)
                    
            # Situation 2       
            if CNN_pred != query_label:
                if len(Counter(neighbour_labels)) == 2:
                    if Counter(neighbour_labels).most_common()[0][0] == CNN_pred:
                        freq_dict[2].append(i)

In [None]:
# freq_dict

## Find Most NB Feature for Each Class

In [None]:
nb_features = {0:[], 1:[], 2:[], 3:[], 4:[], 5:[], 6:[], 7:[], 8:[], 9:[]}

# Do simplist thing possible and just log the argmax of each instance
for i in range(len(X_train_cont)):
    idx = np.argmax(X_train_cont[i])
    nb_features[y_train[i]].append(idx)

In [None]:
# plt.hist(nb_features[9], bins=None)

In [None]:
# from collections import Counter
# Counter(nb_features[9])

## Examine Explanation with White Box

In [None]:
kNN = KNeighborsClassifier(n_neighbors=3, algorithm="brute") 
kNN.fit(X_train_cont, y_train)
knn_predictions_test = kNN.predict(X_test_cont)

In [None]:
accuracy_score(y_test, knn_predictions_test)

In [None]:
X_train = np.load('X_train.npy')
X_test = np.load('X_test.npy')
y_train = np.load('y_train.npy')
y_test = np.load('y_test.npy')

In [None]:
# for i in range(len(X_test)):
#     if y_test[i] != model.predict_classes(np.array([X_test[i]])):
#         print(i)

In [None]:
# Pull list of errors to examine

freq_dict

In [None]:
test_instance = 1075


# Find Neighbours
idxs = kNN.kneighbors(X=[X_test_cont[test_instance]], n_neighbors=3, return_distance=False)
neighbours = idxs[0]


print("Query Label:", y_test[test_instance])
print("Prediction:", model.predict_classes(np.array([X_test[test_instance]]))[0])
print(" ")
print("Neighbors:")
for i in range(len(neighbours)):
    print(y_train[neighbours[i]])

f, axarr = plt.subplots(1,4)

axarr[0].imshow(np.squeeze(X_test[test_instance]))
axarr[0].axis('off')

axarr[1].imshow(np.squeeze(X_train[neighbours[0]]))
axarr[1].axis('off')

axarr[2].imshow(np.squeeze(X_train[neighbours[1]]))
axarr[2].axis('off')

axarr[3].imshow(np.squeeze(X_train[neighbours[2]]))
axarr[3].axis('off')

In [None]:
#test instance centroid
test_inst_centroid = centroids[digit][0]
#neighbour centroid
neighbour_centroid = centroids[digit][0]
#difference between test instance and neighbour centroid
diff = test_inst_centroid - neighbour_centroid
#difference between test instance and test instance centroid
diff_centroid = test_inst_centroid - test_inst_centroid

# visualise test instance centroid
axarr[0].scatter(test_inst_centroid[0], test_inst_centroid[1], c='r', marker='x')

In [None]:
# Calculate SSIM of test instance and neighbors to centroid
ssim_test = []
print(y_test[test_instance])
ssim_neighbours = []
for i in range(len(neighbours)):
    ssim_test.append(ssim(np.squeeze(X_test[test_instance]), np.squeeze(X_train[neighbours[i]])))
    ssim_neighbours.append(ssim(np.squeeze(X_test[test_instance]), np.squeeze(X_train[neighbours[i]])))

In [None]:
import skimage.measure as measure

# MSE function

def mse(imageA, imageB):
    print(imageA.shape, imageB.shape)
    err = np.sum((imageA.astype("float") - imageB.astype("float")) ** 2)
    err /= float(imageA.shape[0] * imageB.shape[1])
    return err

# SSIM function
def compare_image(imageA, imageA_labels, centroid):
    imageB = centroid[imageA_labels]
    m= mse(imageA, imageB)
    s = measure.compare_ssim(imageA, imageB[0])
#     s= ssim(imageA, imageB[0])
    
    print('ssim: ', s)

In [None]:
test_instance = 1000


# Find Neighbours
idxs = kNN.kneighbors(X=[X_test_cont[test_instance]], n_neighbors=3, return_distance=False)
neighbours = idxs[0]

print("Query Label:", y_test[test_instance])
print("Prediction:", model.predict_classes(np.array([X_test[test_instance]]))[0])
print(" ")
print("Neighbors:")
for i in range(len(neighbours)):
    print(y_train[neighbours[i]])

f, axarr = plt.subplots(1,4)

axarr[0].imshow(np.squeeze(X_test[test_instance]))
axarr[0].axis('off')

axarr[1].imshow(np.squeeze(X_train[neighbours[0]]))
axarr[1].axis('off')
axarr[2].imshow(np.squeeze(X_train[neighbours[1]]))
axarr[2].axis('off')
axarr[3].imshow(np.squeeze(X_train[neighbours[2]]))
axarr[3].axis('off')


test_inst = X_test[test_instance]
test_inst_centroid = centroids[digit][0]
NNdigit0 = y_train[neighbours[0]]
NNdigit0_centroid = centroids[digit][0]
NNdigit1 = y_train[neighbours[1]]
NNdigit1_centroid = centroids[digit][1]
NNdigit2 = y_train[neighbours[2]]
NNdigit2_centroid = centroids[digit][2]


mse_test = mean_squared_error(test_inst, test_inst_centroid)
ssim_test = ssim(test_inst, test_inst_centroid)
mse_compare0 = mean_squared_error(NNdigit0, NNdigit0_centroid)
ssim_compare0 = ssim(NNdigit0, NNdigit0_centroid)
mse_compare1 = mean_squared_error(NNdigit1, NNdigit1_centroid)
ssim_compare1 = ssim(NNdigit1, NNdigit1_centroid)
mse_compare2 = mean_squared_error(NNdigit2, NNdigit2_centroid)
ssim_compare2 = ssim(NNdigit2, NNdigit2_centroid)


f, ax = plt.subplots(1,4, figsize=(10, 4),
                         sharex=True, sharey=True)
                         
ax[0].imshow(np.squeeze(X_test[test_instance]))
ax[0].set_title('Test Instance')
ax[0].set_xlabel(f'MSE: {mse_test:.4f}, SSIM: {ssim_test:.4f}')
ax[0].axis('off')

ax[1].imshow(np.squeeze(X_train[neighbours[0]]))
ax[1].set_title('NN 1')
ax[1].set_xlabel(f'MSE: {mse_compare0:.4f}, SSIM: {ssim_compare0:.4f}')
ax[1].axis('off')

ax[2].imshow(np.squeeze(X_train[neighbours[1]]))
ax[2].set_title('NN 2')
ax[2].set_xlabel(f'MSE: {mse_compare1:.4f}, SSIM: {ssim_compare1:.4f}')
ax[2].axis('off')

ax[3].imshow(np.squeeze(X_train[neighbours[2]]))
ax[3].set_title('NN 3')
ax[3].set_xlabel(f'MSE: {mse_compare2:.4f}, SSIM: {ssim_compare2:.4f}')
ax[3].axis('off')


#plt.savefig("Materials/Errors/KMNIST E 24.pdf") 


In [None]:
from skimage import metrics

from skimage.metrics import structural_similarity as ssim

test_instance = 1693

"""
To use SSIM (or in any case general similarity function) to train the KNN algo, create a function that takes 2 inputs and
retruns the similarity index that you aim for.

Pass this function to sklearn's KNN fit method, so that distnacce "metric" will be the function you wrote.
"""

idxs = kNN.kneighbors(X=[X_test_cont[test_instance]], n_neighbors=3, return_distance=False)
neighbours = idxs[0]

test_image = X_test[test_instance]
test_cont = X_test_cont[test_instance]
image_label = y_test[test_instance]
test_image_label = y_test[test_instance]

neighbour_images = X_train_cont[neighbours]
neighbour_labels = y_train[neighbours]


dist = compare_image(test_image, image_label, centroids)
print('distance test instance {} to own centroid {} : {}'.format(0, 0, dist))

dist = compare_image(test_image, image_label, neighbor_images[0])

neighbour_label, neighbour_image = neighbour_labels[0], neighbour_images[0],
dist = compare_image(neighbour_image, neighbour_label, centroids)
print('distance neighbour instance {} to neighbour centroid {} : {}'.format(0, 0, dist))
neighbour_label, neighbour_image = neighbour_labels[1], neighbour_images[1],
dist = compare_image(neighbour_image, neighbour_label, centroids)
print('distance neighbour instance {} to neighbour centroid {} : {}'.format(1, 1, dist))
neighbour_label, neighbour_image = neighbour_labels[2], neighbour_images[2],
dist = compare_image(neighbour_image, neighbour_label, centroids)
print('distance neighbour instance {} to neighbour centroid {} : {}'.format(2, 2, dist))