In [1]:
import pickle
import matplotlib.pyplot as plt
import numpy as np

In [2]:
from __future__ import print_function
import keras
from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras import optimizers
import numpy as np
from keras.layers.core import Lambda
from keras import backend as K
from keras import regularizers

Using TensorFlow backend.


In [3]:
class MNIST_vgg:
    def __init__(self,train=True):
        self.num_classes = 10
        self.weight_decay = 0.0005
        self.x_shape = [32,32,3]

        self.model = self.build_model()
        if train:
            self.model = self.train(self.model)
        else:
            self.model.load_weights('MNIST_vgg.h5')


    def build_model(self):
        # Build the network of vgg for 10 classes with massive dropout and weight decay as described in the paper.

        model = Sequential()
        weight_decay = self.weight_decay

        model.add(Conv2D(64, (3, 3), padding='same',
                         input_shape=self.x_shape,kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.3))

        model.add(Conv2D(64, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())

        model.add(MaxPooling2D(pool_size=(2, 2)))

        model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.4))

        model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())

        model.add(MaxPooling2D(pool_size=(2, 2)))

        model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.4))

        model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.4))

        model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())

        model.add(MaxPooling2D(pool_size=(2, 2)))


        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.4))

        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.4))

        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())

        model.add(MaxPooling2D(pool_size=(2, 2)))


        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.4))

        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.4))

        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())

        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.5))

        model.add(Flatten())
        model.add(Dense(512,kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())

        model.add(Dropout(0.5))
        model.add(Dense(self.num_classes))
        model.add(Activation('softmax'))
        return model


    def normalize(self,X_train,X_test):
        #this function normalize inputs for zero mean and unit variance
        # it is used when training a model.
        # Input: training set and test set
        # Output: normalized training set and test set according to the trianing set statistics.
        mean = np.mean(X_train,axis=(0,1,2,3))
        std = np.std(X_train, axis=(0, 1, 2, 3))
        X_train = (X_train-mean)/(std+1e-7)
        X_test = (X_test-mean)/(std+1e-7)
        return X_train, X_test

    def normalize_production(self,x):
        #this function is used to normalize instances in production according to saved training set statistics
        # Input: X - a training set
        # Output X - a normalized training set according to normalization constants.

        #these values produced during first training and are general for the standard cifar10 training set normalization
        mean = 120.707
        std = 64.15
        return (x-mean)/(std+1e-7)

    def predict(self,x,normalize=True,batch_size=50):
        if normalize:
            x = self.normalize_production(x)
        return self.model.predict(x,batch_size)

    def train(self,model):

        #training parameters
        batch_size = 128
        maxepoches = 25
        learning_rate = 0.1
        lr_decay = 1e-6
        lr_drop = 20
        # The data, shuffled and split between train and test sets:
        x_train,y_train,x_test,y_test = mnist_train_RGB_x,M_train_y,mnist_test_RGB_x,M_test_y
        x_train = x_train.astype('float32')
        x_test = x_test.astype('float32')
        x_train, x_test = self.normalize(x_train, x_test)

        y_train = keras.utils.to_categorical(y_train, self.num_classes)
        y_test = keras.utils.to_categorical(y_test, self.num_classes)

        def lr_scheduler(epoch):
            return learning_rate * (0.5 ** (epoch // lr_drop))
        reduce_lr = keras.callbacks.LearningRateScheduler(lr_scheduler)

        #data augmentation
        datagen = ImageDataGenerator(
            featurewise_center=False,  # set input mean to 0 over the dataset
            samplewise_center=False,  # set each sample mean to 0
            featurewise_std_normalization=False,  # divide inputs by std of the dataset
            samplewise_std_normalization=False,  # divide each input by its std
            zca_whitening=False,  # apply ZCA whitening
            rotation_range=15,  # randomly rotate images in the range (degrees, 0 to 180)
            width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
            height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
            horizontal_flip=True,  # randomly flip images
            vertical_flip=False)  # randomly flip images
        # (std, mean, and principal components if ZCA whitening is applied).
        datagen.fit(x_train)



        #optimization details
        sgd = optimizers.SGD(lr=learning_rate, decay=lr_decay, momentum=0.9, nesterov=True)
        model.compile(loss='categorical_crossentropy', optimizer=sgd,metrics=['accuracy'])


        # training process in a for loop with learning rate drop every 25 epoches.

        historytemp = model.fit_generator(datagen.flow(x_train, y_train,
                                         batch_size=batch_size),
                            steps_per_epoch=x_train.shape[0] // batch_size,
                            epochs=maxepoches,
                            validation_data=(x_test, y_test),callbacks=[reduce_lr],verbose=2)
        model.save_weights('MNIST_vgg.h5')
        return model

In [4]:
class cifar10vgg:
    def __init__(self,train=True):
        self.num_classes = 10
        self.weight_decay = 0.0005
        self.x_shape = [32,32,3]

        self.model = self.build_model()
        if train:
            self.model = self.train(self.model)
        else:
            self.model.load_weights('cifar10vgg.h5')


    def build_model(self):
        # Build the network of vgg for 10 classes with massive dropout and weight decay as described in the paper.

        model = Sequential()
        weight_decay = self.weight_decay

        model.add(Conv2D(64, (3, 3), padding='same',
                         input_shape=self.x_shape,kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.3))

        model.add(Conv2D(64, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())

        model.add(MaxPooling2D(pool_size=(2, 2)))

        model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.4))

        model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())

        model.add(MaxPooling2D(pool_size=(2, 2)))

        model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.4))

        model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.4))

        model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())

        model.add(MaxPooling2D(pool_size=(2, 2)))


        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.4))

        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.4))

        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())

        model.add(MaxPooling2D(pool_size=(2, 2)))


        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.4))

        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.4))

        model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())

        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.5))

        model.add(Flatten())
        model.add(Dense(512,kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())

        model.add(Dropout(0.5))
        model.add(Dense(self.num_classes))
        model.add(Activation('softmax'))
        return model


    def normalize(self,X_train,X_test):
        #this function normalize inputs for zero mean and unit variance
        # it is used when training a model.
        # Input: training set and test set
        # Output: normalized training set and test set according to the trianing set statistics.
        mean = np.mean(X_train,axis=(0,1,2,3))
        std = np.std(X_train, axis=(0, 1, 2, 3))
        X_train = (X_train-mean)/(std+1e-7)
        X_test = (X_test-mean)/(std+1e-7)
        return X_train, X_test

    def normalize_production(self,x):
        #this function is used to normalize instances in production according to saved training set statistics
        # Input: X - a training set
        # Output X - a normalized training set according to normalization constants.

        #these values produced during first training and are general for the standard cifar10 training set normalization
        mean = 120.707
        std = 64.15
        return (x-mean)/(std+1e-7)

    def predict(self,x,normalize=True,batch_size=50):
        if normalize:
            x = self.normalize_production(x)
        return self.model.predict(x,batch_size)

    def train(self,model):

        #training parameters
        batch_size = 128
        maxepoches = 250
        learning_rate = 0.1
        lr_decay = 1e-6
        lr_drop = 20
        # The data, shuffled and split between train and test sets:
        (x_train, y_train), (x_test, y_test) = cifar10.load_data()
        x_train = x_train.astype('float32')
        x_test = x_test.astype('float32')
        x_train, x_test = self.normalize(x_train, x_test)

        y_train = keras.utils.to_categorical(y_train, self.num_classes)
        y_test = keras.utils.to_categorical(y_test, self.num_classes)

        def lr_scheduler(epoch):
            return learning_rate * (0.5 ** (epoch // lr_drop))
        reduce_lr = keras.callbacks.LearningRateScheduler(lr_scheduler)

        #data augmentation
        datagen = ImageDataGenerator(
            featurewise_center=False,  # set input mean to 0 over the dataset
            samplewise_center=False,  # set each sample mean to 0
            featurewise_std_normalization=False,  # divide inputs by std of the dataset
            samplewise_std_normalization=False,  # divide each input by its std
            zca_whitening=False,  # apply ZCA whitening
            rotation_range=15,  # randomly rotate images in the range (degrees, 0 to 180)
            width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
            height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
            horizontal_flip=True,  # randomly flip images
            vertical_flip=False)  # randomly flip images
        # (std, mean, and principal components if ZCA whitening is applied).
        datagen.fit(x_train)



        #optimization details
        sgd = optimizers.SGD(lr=learning_rate, decay=lr_decay, momentum=0.9, nesterov=True)
        model.compile(loss='categorical_crossentropy', optimizer=sgd,metrics=['accuracy'])


        # training process in a for loop with learning rate drop every 25 epoches.

        historytemp = model.fit_generator(datagen.flow(x_train, y_train,
                                         batch_size=batch_size),
                            steps_per_epoch=x_train.shape[0] // batch_size,
                            epochs=maxepoches,
                            validation_data=(x_test, y_test),callbacks=[reduce_lr],verbose=2)
        model.save_weights('cifar10vgg.h5')
        return model

In [5]:
mnist = keras.datasets.mnist
(mnist_train_x, mnist_train_y), (mnist_test_x, mnist_test_y)\
    = mnist.load_data()
def MNIST_To_CIFAR_FORM(mnist_train_x, mnist_train_y,mnist_test_x, mnist_test_y):
    """
    Change the one-channel to RBG-channel on mnist_train_x and mnist_test_x
    Change the shape of mnist_train_y and mnist_test_y from (length) to (length,1)
    ---------------------------------------
    inputs:
    mnist_train_x, mnist_train_y,mnist_test_x, mnist_test_y which is all multi-dimension array
    It is recommended to use the following way to import the data
    ========================== codes ==========================
    mnist = keras.datasets.mnist
    (mnist_train_x, mnist_train_y), (mnist_test_x, mnist_test_y)\
    = mnist.load_data()
    ========================== codes ==========================
    outputs:
    mnist_train_RGB_x, M_train_y, mnist_test_RGB_x, M_test_y 
    """
    from skimage import exposure
    import imutils
    B= []
    for i in range(len(mnist_train_x)):
        A = mnist_train_x[i]
        A = exposure.rescale_intensity(A, out_range=(0, 255))
        A = imutils.resize(A, width=32)
        B.append(A)
    B = np.array(B)

    mnist_train_RGB_x = np.repeat(B[:,:, :, np.newaxis], 3, axis=3)
    B= []
    for i in range(len(mnist_test_x)):
        A = mnist_test_x[i]
        A = exposure.rescale_intensity(A, out_range=(0, 255))
        A = imutils.resize(A, width=32)
        B.append(A)
    B = np.array(B)

    mnist_test_RGB_x = np.repeat(B[:,:, :, np.newaxis], 3, axis=3)
    M_train_y = np.array([[mnist_train_y[i]] for i in range(len(mnist_train_y))])
    M_test_y = np.array([[mnist_test_y[i]] for i in range(len(mnist_test_y))])
    return mnist_train_RGB_x, M_train_y, mnist_test_RGB_x, M_test_y

In [6]:
fp = open("MNIST-VGG-1.pkl","rb+")
M_VGG_Model1 = pickle.load(fp, encoding='bytes')
fp = open("MNIST-VGG-2.pkl","rb+")
M_VGG_Model2 = pickle.load(fp, encoding='bytes')
fp = open("MNIST-VGG-3.pkl","rb+")
M_VGG_Model3 = pickle.load(fp, encoding='bytes')
fp = open("CIFAR-VGG-1.pkl","rb+")
C_VGG_Model1 = pickle.load(fp, encoding='bytes')
fp = open("CIFAR-VGG-2.pkl","rb+")
C_VGG_Model2 = pickle.load(fp, encoding='bytes')
fp = open("CIFAR-VGG-3.pkl","rb+")
C_VGG_Model3 = pickle.load(fp, encoding='bytes')

W1025 15:06:23.552950 19100 deprecation_wrapper.py:119] From c:\users\tiany\appdata\local\programs\python\python37\lib\site-packages\keras\backend\tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W1025 15:06:23.566946 19100 deprecation_wrapper.py:119] From c:\users\tiany\appdata\local\programs\python\python37\lib\site-packages\keras\backend\tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W1025 15:06:23.585863 19100 deprecation_wrapper.py:119] From c:\users\tiany\appdata\local\programs\python\python37\lib\site-packages\keras\backend\tensorflow_backend.py:245: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W1025 15:06:23.586894 19100 deprecation_wrapper.py:119] From c:\users\tiany\appdata\local\programs\python\python37\lib\site-packages\keras\backend\tensorflow_backend.py:174: The name tf.get_default_session is depre

In [7]:
mnist_train_RGB_x, M_train_y, mnist_test_RGB_x, M_test_y = MNIST_To_CIFAR_FORM(mnist_train_x, mnist_train_y,mnist_test_x, mnist_test_y)
(C_x_train, C_y_train), (C_x_test, C_y_test) = cifar10.load_data()

In [8]:
def flatten(arr):
    shape = arr.shape
    flatten = arr.reshape(shape[0],shape[1]*shape[2]*shape[3])
    return flatten

In [9]:
mnist_train_RGB_x_flatten = flatten(mnist_train_RGB_x)
mnist_test_RGB_x_flatten = flatten(mnist_test_RGB_x)
C_x_train_flatten = flatten(C_x_train)
C_x_test_flatten = flatten(C_x_test)

In [10]:
from sklearn.metrics.pairwise import cosine_similarity
def get_the_orignal_index_after_ranking(arr, isReverse = True):
    """
    This function return the orignal index of the after the array is sorted
    inputs:
    arr || one dimension list or ndarray
    isReverse || boolean, if it is "True" the rank is decending; if it is "False" the rank is ascending 
    outputs:
    A || an arr including the orignal index before ranking
    ========================= examples =========================
    For example, arr = [4,7,2,9]
    we have the mapping relationship:
    index    value
      0        4
      1        7
      2        2
      3        9
    After sorting, say decendingly, we have:
    orignal_index     value
      3                 9
      1                 7
      0                 4
      2                 2    
    the result is for this function is [3,1,0,2].
    """
    import operator
    similarity_dict = dict(zip(list(range(len(arr))),arr))
    sorted_similarity_dict = sorted(similarity_dict.items(), reverse=isReverse, key=operator.itemgetter(1))
    A = [sorted_similarity_dict[i][0] for i in range(len(arr))]
    return A

In [19]:
def find_idx(arr, target):
    ans = []
    for i in range(len(arr)):
        if arr[i] == target:
            ans.append(i)
    return ans
def get_submax(arr):
    arr = np.array(arr)
    MAX = np.max(arr)
    idx = find_idx(arr, MAX)
    arr_without_max = np.delete(arr,idx)
    return np.max(arr_without_max)
def find_statistics(Prob_Mat):
    Prob_diff = []
    MAX_Prob_Mat = []
    MAX_Prob_Mat_idx = []
    subMAX_Prob_Mat = []
    subMAX_Prob_Mat_idx = []
    for i in range(len(Prob_Mat)):
        MAX = np.max(Prob_Mat[i])
        MAX_idx = find_idx(Prob_Mat[i], MAX)[0]
        subMAX = get_submax(Prob_Mat[i])
        subMAX_idx = find_idx(Prob_Mat[i], subMAX)[0]
        prob_difference = MAX - subMAX
        Prob_diff.append(prob_difference)
        MAX_Prob_Mat.append(MAX)
        subMAX_Prob_Mat.append(subMAX)
        MAX_Prob_Mat_idx.append(MAX_idx)
        subMAX_Prob_Mat_idx.append(subMAX_idx)
    return Prob_diff,MAX_Prob_Mat,MAX_Prob_Mat_idx,subMAX_Prob_Mat,subMAX_Prob_Mat_idx
def separate_one_class(target_class_label, x_train, y_train, x_test, y_test):
    with_train_idx = find_idx(y_train, target_class_label)
    with_test_idx = find_idx(y_test, target_class_label)
    without_train_idx = list(set(range(len(y_train))).difference(set(with_train_idx)))
    without_test_idx = list(set(range(len(y_test))).difference(set(with_test_idx)))
    with_train = x_train[with_train_idx]
    with_test = x_test[with_test_idx]
    without_train = x_train[without_train_idx]
    without_test = x_test[without_test_idx]
    with_train_y = y_train[with_train_idx]
    with_test_y = y_test[with_test_idx]
    without_train_y = y_train[without_train_idx]
    without_test_y = y_test[without_test_idx]
    return with_train, with_train_y, without_train, without_train_y, with_test, with_test_y, without_test, without_test_y
def minkowski_distance(x,y,n):
    if np.isinf(n):
        if n>0:
            return np.max(np.abs(x-y))
        else:
            return np.min(np.abs(x-y))
    else:
        return np.power(np.sum(np.power(np.abs(x-y),n)),1/n)
def minkowski_similarity(x,Y,n):
    arr = []
    for y in Y:
        arr.append(minkowski_distance(x,y,n))
    return np.array(arr)

def get_KNN_stats(k,testarr_one_sample, testarr_waiting_to_compare, 
                  testarr_waiting_to_compare_label, Model, similarity_method = 'cosine_similarity', minkowski_power = 2):
    """
    Inputs Example:
    k = 50
    testarr_one_sample = [mnist_train_RGB_x[0]]
    testarr_waiting_to_compare = [C_x_train[i] for i in range(5000)]
    testarr_waiting_to_compare_label = C_y_train[:5000].reshape(5000)
    Model = C_VGG_Model1.model
    
    Inputs:
    k: int, the number of the nearest neighbour
    testarr_one_sample: multi-dimensional ndarray, shape = (1,num_pixel_x,num_pixel_y,num_channel)
    testarr_waiting_to_compare_label: multi-dimensional ndarray, shape = (num_neighbour_candidate,num_pixel_x,num_pixel_y,num_channel)
    testarr_waiting_to_compare_label: one-dimensional ndarray, shape = (num_neighbour_candidate,)
    Model: keras backend model
    similarity_method: String, 'cosine_similarity', 'minkowski_similarity'. Default = 'cosine_similarity'
    minkowski_power: int, the p-value in the minkowski_distance. Only useful when similarity_method = 'minkowski_similarity'
    
    Outputs:
    similarity: one-dimensional ndarray, shape = (num_neighbour_candidate,)
    K_nearest_neighbour_orignal_label: one-dimensional ndarray, shape = (k,)
    K_nearest_neighbour: multi-dimensional ndarray, shape = (k,num_pixel_x,num_pixel_y,num_channel)
    KNN_oringal_class: dictionary, counts of the orignal class
    max_ratio_KNN_from_one_class: float, the max of the ratio of KNN are from one class
    """
    from keras import backend as K
    testarr_waiting_to_compare = np.array(testarr_waiting_to_compare)
    inp = Model.model.input                                           # input placeholder
    outputs = Model.model.layers[58].output          # all layer outputs
    functors = K.function([inp, K.learning_phase()], [outputs])   # evaluation functions

    # Testing
    test1 = testarr_one_sample
    layer_outs_one_sample = functors([test1, 0.])
    layer_outs_one_sample = np.array(layer_outs_one_sample)[0]

    test2 = testarr_waiting_to_compare
    layer_outs_waiting_to_compare = functors([test2, 0.])
    layer_outs_waiting_to_compare = np.array(layer_outs_waiting_to_compare)[0]
    if similarity_method == 'cosine_similarity':
        from sklearn.metrics.pairwise import cosine_similarity
        similarity = cosine_similarity(layer_outs_one_sample, layer_outs_waiting_to_compare)
        similarity = np.array(similarity[0])
        K_nearest_neighbour_orignal_label = get_the_orignal_index_after_ranking(similarity)[:k]
    elif similarity_method == 'minkowski_similarity':
        similarity = minkowski_similarity(layer_outs_one_sample, layer_outs_waiting_to_compare, minkowski_power)
        K_nearest_neighbour_orignal_label = get_the_orignal_index_after_ranking(similarity, isReverse = False)[:k]
    else:
        raise Exception("invalid similarity method")
    
    K_nearest_neighbour_orignal_label = np.array(K_nearest_neighbour_orignal_label)
    K_nearest_neighbour = testarr_waiting_to_compare[K_nearest_neighbour_orignal_label]
    from collections import Counter
    KNN_oringal_class = Counter(testarr_waiting_to_compare_label[K_nearest_neighbour_orignal_label])
    max_ratio_KNN_from_one_class = max(KNN_oringal_class.values())/k
    return similarity, K_nearest_neighbour_orignal_label, K_nearest_neighbour, KNN_oringal_class, max_ratio_KNN_from_one_class

In [12]:
def show(columns,rows,arr):
    w=10
    h=10
    fig=plt.figure(figsize=(8, 8))
    for i in range(1, columns*rows +1):
        img = arr[i-1]
        fig.add_subplot(rows, columns, i)
        plt.imshow(img)
    plt.show()
    return

In [13]:
def get_KNN_in_pixel_representation(testarr_one_sample, testarr_waiting_to_compare, testarr_waiting_to_compare_label, similarity_method = 'cosine_similarity', minkowski_power = 2):
    testarr_one_sample = np.array(testarr_one_sample)
    testarr_waiting_to_compare = np.array(testarr_waiting_to_compare)
    testarr_one_sample_flatten = flatten(testarr_one_sample)
    testarr_waiting_to_compare_flatten = flatten(testarr_waiting_to_compare)
    if similarity_method == 'cosine_similarity':
        from sklearn.metrics.pairwise import cosine_similarity
        similarity = cosine_similarity(testarr_one_sample_flatten, testarr_waiting_to_compare_flatten)
        similarity = np.array(similarity[0])
        K_nearest_neighbour_orignal_label = get_the_orignal_index_after_ranking(similarity)[:k]
    elif similarity_method == 'minkowski_similarity':
        similarity = minkowski_similarity(testarr_one_sample_flatten, testarr_waiting_to_compare_flatten, minkowski_power)
        K_nearest_neighbour_orignal_label = get_the_orignal_index_after_ranking(similarity, isReverse= False)[:k]
    else:
        raise Exception("invalid similarity method")
    
    K_nearest_neighbour_orignal_label = np.array(K_nearest_neighbour_orignal_label)
    K_nearest_neighbour = testarr_waiting_to_compare_flatten[K_nearest_neighbour_orignal_label]
    from collections import Counter
    KNN_oringal_class = Counter(testarr_waiting_to_compare_label[K_nearest_neighbour_orignal_label])
    max_ratio_KNN_from_one_class = max(KNN_oringal_class.values())/k
    import operator
    max_KNN_class_label = max(KNN_oringal_class.items(), key=operator.itemgetter(1))[0]
    return similarity, K_nearest_neighbour_orignal_label, K_nearest_neighbour, KNN_oringal_class, max_ratio_KNN_from_one_class, max_KNN_class_label

In [24]:
# Pixel representation
k = 50
testarr_one_sample_1 = [C_x_train[0]]
testarr_waiting_to_compare_1 = [mnist_train_RGB_x[i] for i in range(5000)]
testarr_waiting_to_compare_label_1 = M_train_y[:5000].reshape(5000)

In [25]:
result_l2_1 = get_KNN_in_pixel_representation(testarr_one_sample_1, testarr_waiting_to_compare_1, testarr_waiting_to_compare_label_1, similarity_method = 'minkowski_similarity', minkowski_power = 2)

In [26]:
result_l2_1[3]

Counter({7: 8, 0: 10, 9: 12, 2: 4, 5: 3, 3: 3, 4: 9, 8: 1})

In [27]:
sorted(result_l2_1[0],reverse = True)

[573.7159575957427,
 573.4439815709987,
 573.4413657907842,
 573.1378542724254,
 572.6936353758439,
 572.6360100447753,
 572.5146286340638,
 572.167807552994,
 572.1503298959112,
 572.1127511251607,
 572.0742958742335,
 572.067303732699,
 571.9230717500387,
 571.7123402551322,
 571.5120296196748,
 571.3536558034787,
 571.338778659387,
 571.2460065505928,
 571.233752504174,
 571.1917366349062,
 571.1777306583302,
 571.1777306583302,
 571.1645997433665,
 571.1427142142321,
 571.1182014259394,
 571.1120730644731,
 571.0429055683994,
 571.041154383815,
 571.0192641233743,
 571.0148859705848,
 571.0113834241836,
 570.9422037299397,
 570.9308189264265,
 570.9255643251579,
 570.924688553578,
 570.9045454364503,
 570.9036696326273,
 570.894035701898,
 570.8835257738657,
 570.8817741003824,
 570.84761539311,
 570.8300973144286,
 570.8222140036248,
 570.8187102749874,
 570.781043833798,
 570.7591435973671,
 570.7565155125257,
 570.7004468195202,
 570.6654010889393,
 570.6426202098824,
 570.64086

In [17]:
def compare_representations(testarr_one_sample, testarr_waiting_to_compare,testarr_waiting_to_compare_label, MODEL, similarity_method = 'minkowski_similarity', minkowski_power = 2):
    arr_KNN_from_same_class_ratio = []
    arr_KNN_max_class_label = []
    testarr_waiting_to_compare = testarr_waiting_to_compare[:5000]
    testarr_waiting_to_compare_label = testarr_waiting_to_compare_label[:5000].reshape(5000)
    for j in range(500):
        #print("current sample: ", j)
        k = 50
        testarr_one_sample_1 = [testarr_one_sample[j]]
        result_l2 = get_KNN_in_pixel_representation(testarr_one_sample_1, testarr_waiting_to_compare,
                                                    testarr_waiting_to_compare_label, similarity_method = 'minkowski_similarity', minkowski_power = 2)
        arr_KNN_from_same_class_ratio.append(result_l2[4])
        arr_KNN_max_class_label.append(result_l2[5])
    #print(np.mean(arr_KNN_from_same_class_ratio))
    pred_labels = MODEL.predict(testarr_one_sample[:500])
    import pandas as pd
    df = pd.DataFrame({'arr_KNN_max_class_label':arr_KNN_max_class_label,
                   'arr_KNN_from_same_class_ratio':arr_KNN_from_same_class_ratio,
                   'predicted_label':find_statistics(pred_labels)[2],
                  'predicted_prob':find_statistics(pred_labels)[1],})
    return arr_KNN_from_same_class_ratio, arr_KNN_max_class_label, df

In [18]:
compare_representations(C_x_train,mnist_train_RGB_x,M_train_y,M_VGG_Model1)[2]

Unnamed: 0,arr_KNN_max_class_label,arr_KNN_from_same_class_ratio,predicted_label,predicted_prob
0,9,0.24,4,0.929314
1,2,0.26,4,0.936445
2,0,0.24,7,0.826902
3,3,0.24,7,0.581013
4,7,0.40,4,0.736561
5,2,0.28,4,0.719364
6,0,0.32,7,0.977451
7,0,0.26,3,0.535689
8,4,0.28,2,0.355403
9,2,0.22,7,0.978295


In [15]:
# pixel representation
k = 50
testarr_one_sample_2 = [mnist_train_RGB_x[0]]
testarr_waiting_to_compare_2 = [C_x_train[i] for i in range(5000)]
testarr_waiting_to_compare_label_2 = C_y_train[:5000].reshape(5000)

In [16]:
result_l2_2 = get_KNN_in_pixel_representation(testarr_one_sample_2, testarr_waiting_to_compare_2, testarr_waiting_to_compare_label_2, similarity_method = 'minkowski_similarity', minkowski_power = 2)

In [29]:
result_l2_2[3]

Counter({0: 16, 2: 10, 3: 5, 9: 1, 6: 5, 8: 5, 4: 3, 1: 1, 5: 4})

In [30]:
result_l2_2[4]

0.32

In [31]:
# The similarity between the testarr_one_sample and testarr_waiting_to_compare
sorted(result_l2_2[0],reverse = True)

[637.5523507916821,
 626.76630413576,
 622.0112539174834,
 621.6823947965713,
 618.6840873984072,
 617.7369990538044,
 616.644143732834,
 615.3429937847671,
 610.7470834969251,
 608.2902267832354,
 607.720330415233,
 607.5541128162988,
 607.1326049554579,
 606.2821125515744,
 606.0033003210461,
 605.9009820094369,
 605.5551172271604,
 604.4683614549234,
 603.7557453142786,
 603.5983764060337,
 603.264452789985,
 602.7105441254533,
 600.6546428689285,
 600.1741413956453,
 600.0549974793977,
 599.9091597900468,
 599.769122246219,
 599.7632866389872,
 599.7574509749754,
 599.7516152541817,
 599.6649064269144,
 599.5681779414248,
 599.1719285814381,
 599.1393827816696,
 599.0158595563227,
 598.8722735275027,
 598.8180024013974,
 598.7127858998837,
 598.5423961592028,
 598.5031328238808,
 598.4504992060747,
 598.3761358877875,
 598.3076131890684,
 598.2967491136819,
 598.1805747431122,
 598.0158860766159,
 597.7767141667531,
 597.7633310935023,
 597.6754972390954,
 597.5809568585665,
 597.4

In [20]:
compare_representations(mnist_train_RGB_x,C_x_train,C_y_train,C_VGG_Model1)[2]

Unnamed: 0,arr_KNN_max_class_label,arr_KNN_from_same_class_ratio,predicted_label,predicted_prob
0,0,0.38,2,0.541207
1,0,0.36,2,0.998652
2,0,0.38,0,0.998847
3,0,0.36,0,0.801601
4,0,0.34,2,0.999509
5,0,0.34,2,0.966314
6,0,0.34,2,0.997170
7,0,0.36,1,0.980144
8,0,0.34,0,0.973287
9,0,0.38,0,0.999313
