In [None]:
###############
## Libraries ##
###############

import tensorflow as tf
import matplotlib.pyplot as plt 
from tensorflow.keras import datasets, layers, models, losses

import saliency
from matplotlib import cm

from keras.callbacks import ModelCheckpoint

%matplotlib inline
import numpy as np

In [None]:
####################################################################################
## This cell is for selecting the dataset --- Digits or Fashion (MNIST toy world) ##
####################################################################################

DATASET = '_DigitMNIST' 
# DATASET = '_FashionMNIST'
REGIME = '_TRAINED_' # '_RANDOM_'

if DATASET == '_DigitMNIST':
    # Load the digit-mnist pre-shuffled train data and test data
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() #digit_mnist
    print("x_train shape:", x_train.shape, "y_train shape:", y_train.shape)

    # Define the text labels
    labels = ["0",  # index 0
                            "1",  # index 1
                            "2",  # index 2 
                            "3",  # index 3 
                            "4",  # index 4
                            "5",  # index 5
                            "6",  # index 6 
                            "7",  # index 7 
                            "8",  # index 8 
                            "9"]  # index 9

else:
    # Load the fashion-mnist pre-shuffled train data and test data
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data() #fashion_mnist
    print("x_train shape:", x_train.shape, "y_train shape:", y_train.shape)

    # Define the text labels
    labels = ["T-shirt",  # index 0
                            "Trouser",      # index 1
                            "Pullover",     # index 2 
                            "Dress",        # index 3 
                            "Coat",         # index 4
                            "Sandal",       # index 5
                            "Shirt",        # index 6 
                            "Sneaker",      # index 7 
                            "Bag",          # index 8 
                            "Ankle boot"]   # index 9

# Print training set shape - note there are 60,000 training data of image size of 28x28, 60,000 train labels)
print("x_train shape:", x_train.shape, "y_train shape:", y_train.shape)

# save train labels
y_train_labels = y_train
y_test_labels = y_test

# Print the number of training and test datasets
print(x_train.shape[0], 'train set')
print(x_test.shape[0], 'test set')


x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255

# Further break training data into train / validation sets (# put 5000 into validation set and keep remaining 55,000 for train)
(x_train, x_valid) = x_train[5000:], x_train[:5000] 
(y_train, y_valid) = y_train[5000:], y_train[:5000]

# Reshape input data from (28, 28) to (28, 28, 1)
w, h = 28, 28
x_train = x_train.reshape(x_train.shape[0], w, h, 1)
x_valid = x_valid.reshape(x_valid.shape[0], w, h, 1)
x_test = x_test.reshape(x_test.shape[0], w, h, 1)

# Validation set
y_valid = tf.keras.utils.to_categorical(y_valid, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)
    
# Image index, you can pick any number between 0 and 59,999
img_index = 5
# y_train contains the lables, ranging from 0 to 9
label_index = y_train[img_index]
# Print the label, for example 2 Pullover
print ("y = " + str(label_index) + " " +(labels[label_index]))
# # Show one of the images from the training dataset
plt.imshow(x_train[img_index])
plt.show()

In [None]:
################################################################
## This cell runs the serial reproduction training procedure. ##
################################################################

# Number of iterations in the serial reproduction
MAX_ITER = 25
# Number of epochs per training run
EPOCHS = 10

for iteration in range(0,MAX_ITER):
    # If iteration is seed, train on original target vectors, else, train on y_hat from time t-1
    if iteration == 0:
        # One-hot encode the labels
        y_train = tf.keras.utils.to_categorical(y_train, 10)
        mpth = 'model.weights.best.hdf5'
        y_hat_test_name = 'y_hat_test_seed'
        y_hat_train_name = 'y_hat_train_seed'      
    elif iteration > 0:
        # Key step: set new targets as y_hat
        y_train = y_hat    
        mpth = 'model.weights.best.' + 'iter' + str(iteration) + '.hdf5'
        y_hat_test_name = 'y_hat_test_' + 'iter' + str(iteration)
        y_hat_train_name = 'y_hat_train_' + 'iter' + str(iteration)

    # Define the model: a small CNN model (could probably be done outside loop)
    model = tf.keras.Sequential()

    # Must define the input shape in the first layer of the neural network
    model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=2, padding='same', activation='relu', input_shape=(28,28,1))) 
    model.add(tf.keras.layers.MaxPooling2D(pool_size=2))
    model.add(tf.keras.layers.Dropout(0.3))

    model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=2, padding='same', activation='relu'))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=2))
    model.add(tf.keras.layers.Dropout(0.3))

    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(256, activation='relu'))
    model.add(tf.keras.layers.Dropout(0.5))
    model.add(tf.keras.layers.Dense(10, activation='softmax'))

    # Take a look at the model summary
    # model.summary()

    # define optimization and energy parameters
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    # Save checkpoints
    checkpointer = ModelCheckpoint(filepath= 'weights/weights_digits_6/'+ mpth, verbose = 1, save_best_only=False) #True
    # Train the model
    model.fit(x_train,
             y_train,
             batch_size=64,
             epochs=EPOCHS,
             validation_data=(x_valid, y_valid),
             callbacks=[checkpointer])

    # Load the weights with the best validation accuracy
    y_hat = model.predict(x_train) #feed back serial reproduction targets
    y_hat_test = model.predict(x_test)
    
    model.load_weights('weights/weights_digits_6/' + mpth)
    # Evaluate the model on test set
    score = model.evaluate(x_test, y_test, verbose=0)
    # Print test accuracy
    print('\n', 'Test accuracy:', score[1])

    # Save results for each iteration in the serial reproduction chain
    np.save('weights/weights_digits_6/' + y_hat_train_name + DATASET + REGIME + '.npy', y_train)
    print('weights/weights_digits_6/' + y_hat_train_name)
#     np.save('weights/' + y_hat_test_name + DATASET + REGIME + '.npy', y_test)
    np.save('weights/weights_digits_6/' + y_hat_test_name + DATASET + REGIME + '.npy', y_hat_test)
    print('weights/weights_digits_6/' + y_hat_test_name)


In [None]:
###########################################################
## Visualize some image examples and cnn classifications ##
###########################################################

rdn = range(25,50)
exs = enumerate(rdn)

%matplotlib inline
# Plot a random sample of 10 test images, their predicted labels and ground truth
figure = plt.figure(figsize=(20, 8))
for i, index in exs:
    ax = figure.add_subplot(5, 5, i + 1, xticks=[], yticks=[])
    # Display each image
    ax.imshow(np.squeeze(x_test[index]))
    predict_index = np.argmax(y_hat_test[index])
    true_index = np.argmax(y_test[index])
    # Set the title for each image
    ax.set_title("{} ({})".format(labels[predict_index], 
                                  labels[true_index]),
                                  color=("green" if predict_index == true_index else "red"))
plt.show()    

In [None]:
##################################################################################################
## Read prediction softmax layer activations for all test set images, and across all iterations ##
##################################################################################################

y_hat_arr = np.zeros([y_test.shape[0], MAX_ITER, len(labels)])
y_hat_mu = np.zeros([MAX_ITER, len(labels), len(labels)])

# Read in all test y_hat for all iterations and store into y_hat_arr
for i in range(MAX_ITER):
    if i == 0:
        y_hat_test_name = 'y_hat_test_seed'
    else:
        y_hat_test_name = 'y_hat_test_' + 'iter' + str(i)
        # Load test set softmax outputs 
    yh = np.load('weights/' + y_hat_test_name + DATASET + REGIME + '.npy')  
    # Store into y_hat_arr
    y_hat_arr[:,i,:] = yh
    

# Compile Softmax Outputs into Matrices

In [None]:
###########################################################################################
## Read prediction softmax layer activations for all test set images, and all iterations ##
###########################################################################################

# just including this so I don't need to rerun cell 3 for now!
MAX_ITER = 25


# Build arrays of dimensions: N training images X L labels X P iterations

y_hat_train_arr = np.zeros([y_train.shape[0], len(labels), MAX_ITER])
y_hat_test_arr = np.zeros([y_test.shape[0], len(labels), MAX_ITER])

for i in range(MAX_ITER):
    if i == 0:
        y_hat_train_name = 'y_hat_train_seed'
        y_hat_test_name = 'y_hat_test_seed'
    
    else:
        y_hat_train_name = 'y_hat_train_' + 'iter' + str(i)
        y_hat_test_name = 'y_hat_test_' + 'iter' + str(i)
        
    # Load test set softmax outputs 
    yhtr = np.load('weights/weights_digits_1/' + y_hat_train_name + DATASET + REGIME + '.npy')
    yhte = np.load('weights/weights_digits_1/' + y_hat_test_name + DATASET + REGIME + '.npy')  

    # The first time through, use binary weight vectors to save correct class array
    if i == 0:
        true_class_tr = np.nonzero(yhtr)[1]
        true_class_te = np.nonzero(yhte)[1]
        
    y_hat_train_arr[:, :, i] = yhtr
    y_hat_test_arr[:, :, i] = yhte


print(y_hat_train_arr.shape)
# (55000, 50, 10)
print(y_hat_test_arr.shape)
# (10000, 50, 10)


# Visualize N least and most confusing images given rank

In [None]:
########################
## Visualize N images ##
########################

from mpl_toolkits.axes_grid1 import make_axes_locatable
import warnings
warnings.filterwarnings('ignore')
import matplotlib.backends.backend_pdf

def visualize_N_images(CLASS, NUM_IMAGES, rank):
    pdf = matplotlib.backends.backend_pdf.PdfPages("Outputs/MNIST_Nx4_Plots/least_confusing_" + str(labels[CLASS]) + "s.pdf")
    # NUM_IMAGES softmax outputs least and most confusing images for CLASS
    class_data_least_confusing = class_data[rank[:NUM_IMAGES]]
    class_data_most_confusing = class_data[rank[-NUM_IMAGES:]]


    # extract NUM_IMAGES images of CLASS data from the overall training images
    class_images = x_train[np.where(true_class_tr == CLASS)]

    images_least_confusing = class_images[rank[:NUM_IMAGES]]
    images_most_confusing = class_images[rank[-NUM_IMAGES:]]

    # visualize each image
    print(str(NUM_IMAGES) + " least confusing " + str(labels[CLASS]) + "s")
    for i in range(NUM_IMAGES):
        current = class_data_least_confusing[i]

        figure = plt.figure(figsize=(40, 40))
        # plot image
        ax1 = figure.add_subplot(8, 8, 1, xticks=[], yticks=[])
        im1 = ax1.imshow(images_least_confusing[i])
        ax1.set_title("Image")

        # plot weights graph
        ax2 = figure.add_subplot(8, 8, 2)
        im2 = ax2.imshow(current.T, cmap='Wistia')

        divider = make_axes_locatable(ax2)
        cax = divider.append_axes('right', size='5%', pad=0.05)
        cbar = figure.colorbar(im2, cax=cax, orientation='vertical', ticks=[0, 1])
        cbar.ax.set_yticklabels(['0', '1'])

        ax2.set(xlabel='Classes', ylabel='Iterations', title='Softmax Outputs')
        ax2.set_xticks(ticks=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
        ax2.set_xticklabels(labels)

        # plot correlation graph
        corr_arr = np.corrcoef(current.T)

        ax3 = figure.add_subplot(8, 8, 3)
        im3 = ax3.imshow(corr_arr, cmap='Wistia')
        divider = make_axes_locatable(ax3)
        cax = divider.append_axes('right', size='5%', pad=0.05)
        cbar = figure.colorbar(im2, cax=cax, orientation='vertical', ticks=[0, 1])
        cbar.ax.set_yticklabels(['0', '1'])
        ax3.set_title("Correlation Matrix")

        # plot eigenvalues graph
        eigs, _ = np.linalg.eig(corr_arr)
        num_nonzero = np.count_nonzero(np.around(eigs, 2))

        if (num_nonzero == 1):
            title_str = "Sorted Eigenvalues (" + str(num_nonzero) + " nonzero)"
        else:
            title_str = "Sorted Eigenvalues (" + str(num_nonzero) + " nonzeros)"

        ax4 = figure.add_subplot(8, 8, 4)
        im4 = ax4.plot(eigs, marker='o')
        ax4.set(xlabel="PC Number", xlim=[0,len(eigs)], ylim=[0,MAX_ITER], title=title_str)
        
        pdf.savefig(figure, bbox_inches='tight')
        plt.show()
        
    pdf.close()

    pdf = matplotlib.backends.backend_pdf.PdfPages("Outputs/MNIST_Nx4_Plots/most_confusing_" + str(labels[CLASS]) + "s.pdf")
    print(str(NUM_IMAGES) + " most confusing " + str(labels[CLASS]) + "s")
    for i in range(NUM_IMAGES):
        current = class_data_most_confusing[i]

        figure = plt.figure(figsize=(40, 40))
        # plot image
        ax1 = figure.add_subplot(8, 8, 1, xticks=[], yticks=[])
        im1 = ax1.imshow(images_most_confusing[i])
        ax1.set_title("Image")

        # plot weights graph
        ax2 = figure.add_subplot(8, 8, 2)
        im2 = ax2.imshow(current.T, cmap='Wistia')

        divider = make_axes_locatable(ax2)
        cax = divider.append_axes('right', size='5%', pad=0.05)
        figure.colorbar(im2, cax=cax, orientation='vertical', ticks=[0, 1])

        ax2.set(xlabel='Classes', ylabel='Iterations', title='Softmax Outputs')
        ax2.set_xticks(ticks=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
        ax2.set_xticklabels(labels)

        # plot correlation graph
        corr_arr = np.corrcoef(current.T)

        ax3 = figure.add_subplot(8, 8, 3)
        im3 = ax3.imshow(corr_arr, cmap='Wistia')
        divider = make_axes_locatable(ax3)
        cax = divider.append_axes('right', size='5%', pad=0.05)
        cbar = figure.colorbar(im2, cax=cax, orientation='vertical', ticks=[0, 1])
        cbar.ax.set_yticklabels(['0', '1'])
        ax3.set_title("Correlation Matrix")

        # plot eigenvalues graph
        eigs, _ = np.linalg.eig(corr_arr)
        num_nonzero = np.count_nonzero(np.around(eigs, 2))

        if (num_nonzero == 1):
            title_str = "Sorted Eigenvalues (" + str(num_nonzero) + " nonzero)"
        else:
            title_str = "Sorted Eigenvalues (" + str(num_nonzero) + " nonzeros)"

        ax4 = figure.add_subplot(8, 8, 4)
        im4 = ax4.plot(eigs, marker='o')
        ax4.set(xlabel="PC Number", xlim=[0,len(eigs)], ylim=[0, MAX_ITER], title=title_str)

        
        pdf.savefig(figure, bbox_inches='tight')
        plt.show()
    pdf.close()

# Rank Images by Difference to Binary Seed Vector and Visualize

In [None]:
##############################################
## Rank by difference to binary seed vector ##
##############################################
print("Rank method: Binary seed vector difference")

for i in range(10):
    # choose which class to analyze
    CLASS = i

    # take difference of predictionn probability along each row for indicated class
    class_data = y_hat_train_arr[np.where(true_class_tr == CLASS), :, :].squeeze()

    # calculate differences in weights for each image between true and final iter
    diff_arr = class_data[:, CLASS, 0] - class_data[:, CLASS, MAX_ITER - 1]

    #rank images by magnitude of difference from correct prediction, default ascending
    seed_diff_rank = np.argsort(diff_arr)
    
    visualize_N_images(i, 100, seed_diff_rank)

# Rank images by entropy and Visualize

In [None]:
#####################
## Rank by entropy ##
#####################
print("Rank method: Entropy")

from scipy.stats import entropy

for i in range(10):
    # choose which class to analyze
    CLASS = i

    # take difference of predictionn probability along each row for indicated class
    class_data = y_hat_train_arr[np.where(true_class_tr == CLASS), :, :].squeeze()

    ents = []
    for i in range(class_data.shape[0]):
        ents.append(entropy(class_data[i, :, :], class_data[0, :, :], base=2, axis=1))
    ents = np.asarray(ents)

    # rank images by entropy, averaged across rows/images
    ents_rank = np.argsort(np.mean(ents, axis=1))
    
    visualize_N_images(i, 100, ents_rank)

# Incorporate monotonicity and Visualize

In [None]:
############################################
## Rank by monotonicity * absolute change ##
############################################

for i in range(10):
    # choose which class to analyze
    CLASS = i

    # take difference of predictionn probability along each row for indicated class
    class_data = y_hat_train_arr[np.where(true_class_tr == CLASS), :, :].squeeze()

    # calculate element wise differences across iterations to track oscillations
    diff_arr = np.diff(class_data, axis=2)[:, CLASS, :]
    neg_where = diff_arr < 0
    neg_count = neg_where.sum(axis=1)
    oscillation_factor = neg_count / neg_where.shape[1]
    
    # calculate differences in weights for each image between true and final iter
    absolute_diff_arr = class_data[:, CLASS, 0] - class_data[:, CLASS, MAX_ITER - 1]
    
    # multiply absolute difference by oscillation factor
    confusion = absolute_diff_arr * oscillation_factor

    #rank images by magnitude of difference from correct prediction, default ascending
    monotonicity_rank = np.argsort(confusion)

    visualize_N_images(i, 100, monotonicity_rank)


# Correlation between methods

In [None]:
###################################
## Seed_diff/Entropy Correlation ##
###################################
from scipy.stats import spearmanr 
print("seed_diff_rank & entropy: ", spearmanr(seed_diff_rank, ents_rank))
print("\nseed_diff_rank & monotonicity: ", spearmanr(seed_diff_rank, monotonicity_rank))
print("\nentropy & monotonicity: ", spearmanr(ents_rank, monotonicity_rank))


In [None]:
#################################################
## Correlation between different training runs ##
#################################################
from scipy.stats import pearsonr 
import random
MAX_ITER = 25

y_hat_train_arr_1 = np.zeros([y_train.shape[0], len(labels), MAX_ITER])
y_hat_test_arr_1 = np.zeros([y_test.shape[0], len(labels), MAX_ITER])

y_hat_train_arr_2 = np.zeros([y_train.shape[0], len(labels), MAX_ITER])
y_hat_test_arr_2 = np.zeros([y_test.shape[0], len(labels), MAX_ITER])

for i in range(MAX_ITER):
    if i == 0:
        y_hat_train_name = 'y_hat_train_seed'
        y_hat_test_name = 'y_hat_test_seed'
    
    else:
        y_hat_train_name = 'y_hat_train_' + 'iter' + str(i)
        y_hat_test_name = 'y_hat_test_' + 'iter' + str(i)
        
    # Load test set softmax outputs 
    yhtr_1 = np.load('weights/weights_digits_1/' + y_hat_train_name + DATASET + REGIME + '.npy')
    yhte_1 = np.load('weights/weights_digits_1/' + y_hat_test_name + DATASET + REGIME + '.npy')  
    
    yhtr_2 = np.load('weights/weights_digits_6/' + y_hat_train_name + DATASET + REGIME + '.npy')
    yhte_2 = np.load('weights/weights_digits_6/' + y_hat_test_name + DATASET + REGIME + '.npy')  

    # The first time through, use binary weight vectors to save correct class array
    if i == 0:
        true_class_tr_1 = np.nonzero(yhtr_1)[1]
        true_class_te_1 = np.nonzero(yhte_1)[1]
        
        true_class_tr_2 = np.nonzero(yhtr_2)[1]
        true_class_te_2 = np.nonzero(yhte_2)[1]
        
    y_hat_train_arr_1[:, :, i] = yhtr_1
    y_hat_test_arr_1[:, :, i] = yhte_1
    
    y_hat_train_arr_2[:, :, i] = yhtr_2
    y_hat_test_arr_2[:, :, i] = yhte_2
    
# print correlation for each class
# build large correlation matrix of N classes X M images X L iters

_, counts = np.unique(true_class_tr_1, return_counts=True)
max_count = np.amax(counts)

correlation_arr = np.zeros([10, max_count, MAX_ITER])

for i in range(10):
    CLASS = i
    print("\nCLASS: " + str(CLASS) + "\n")
    
    
    # np.where(true_class_tr_1 == CLASS and np.where(true_class_tr_2 == CLASS both 
    # identify the same indices for the class data in the larger training data
    
    class_data_1 = y_hat_train_arr_1[np.where(true_class_tr_1 == CLASS), :, :].squeeze()
    class_data_2 = y_hat_train_arr_2[np.where(true_class_tr_2 == CLASS), :, :].squeeze()
    
    diff_arr_1 = class_data_1[:, CLASS, 0] - class_data_1[:, CLASS, MAX_ITER - 1]
    diff_arr_2 = class_data_2[:, CLASS, 0] - class_data_2[:, CLASS, MAX_ITER - 1]
    seed_diff_rank_1 = np.argsort(diff_arr_1)
    seed_diff_rank_2 = np.argsort(diff_arr_2)
    
    # get the indices of the most confusing 1000 images for each class
    confusing_idxs = seed_diff_rank_1[-1000:]
    
    for j, idx in enumerate(confusing_idxs):
        # isolate the softmax outputs for each confusing image 
        outputs_1 = class_data_1[idx, :, :].T
        outputs_2 = class_data_2[idx, :, :].T
        
        image_corr_sum = 0
        for iter_num in range(outputs_1.shape[0]):
            weights_1 = outputs_1[iter_num]
            weights_2 = outputs_2[iter_num]
            corr = np.corrcoef(weights_1, weights_2)[0, 1]
            
            image_corr_sum += corr
            correlation_arr[i, j, iter_num] = corr
            
        image_corr_avg = image_corr_sum / outputs_1.shape[0]
        print(image_corr_avg)


In [None]:
print(correlation_arr.shape)
for i in range(correlation_arr.shape[0]):
    print(np.mean(correlation_arr[i, :1000, :]))

In [None]:
###################################################################################
## Correlation between different training runs where rankings have been shuffled ##
###################################################################################

correlation_arr_shuffled = np.zeros([10, max_count, MAX_ITER])

for i in range(10):
    CLASS = i
    print("\nCLASS: " + str(CLASS) + "\n")
    
    
    # np.where(true_class_tr_1 == CLASS and np.where(true_class_tr_2 == CLASS both 
    # identify the same indices for the class data in the larger training data
    
    class_data_1 = y_hat_train_arr_1[np.where(true_class_tr_1 == CLASS), :, :].squeeze()
    class_data_2 = y_hat_train_arr_2[np.where(true_class_tr_2 == CLASS), :, :].squeeze()
    
    diff_arr_1 = class_data_1[:, CLASS, 0] - class_data_1[:, CLASS, MAX_ITER - 1]
    diff_arr_2 = class_data_2[:, CLASS, 0] - class_data_2[:, CLASS, MAX_ITER - 1]
    seed_diff_rank_1 = np.argsort(diff_arr_1)
    seed_diff_rank_2 = np.argsort(diff_arr_2)
    
    # get the indices of the most confusing 1000 images for each class
    confusing_idxs = seed_diff_rank_1[-1000:]
    
    class_data_1_confusing = class_data_1[confusing_idxs]
    class_data_2_confusing = class_data_2[confusing_idxs]
    
    np.random.shuffle(class_data_1_confusing)
    np.random.shuffle(class_data_2_confusing)
    
    for j in range(len(confusing_idxs)):
        # isolate the softmax outputs for each confusing image 
        outputs_1 = class_data_1_confusing[j, :, :].T
        outputs_2 = class_data_2_confusing[j, :, :].T
        
        image_corr_sum = 0
        for iter_num in range(outputs_1.shape[0]):
            weights_1 = outputs_1[iter_num]
            weights_2 = outputs_2[iter_num]
            corr = np.corrcoef(weights_1, weights_2)[0, 1]
            
            image_corr_sum += corr
            correlation_arr_shuffled[i, j, iter_num] = corr
            
        image_corr_avg = image_corr_sum / outputs_1.shape[0]
        print(image_corr_avg)


In [None]:
print(correlation_arr_shuffled.shape)
for i in range(correlation_arr_shuffled.shape[0]):
    print(np.mean(correlation_arr_shuffled[i, :1000, :]))

# Convergence Plots

In [None]:
#############################################
## Examine softmax weights for convergence ##
#############################################

# find norm of difference between each iteration of softmax outputs for all images
diff_arr = np.diff(y_hat_train_arr, axis=-1)
norm_diff_arr = np.linalg.norm(diff_arr, axis=1)

# average across all images
avg_change = np.mean(norm_diff_arr, axis=0)

# visualize
figure = plt.figure(figsize=(20, 8))
_ = plt.plot(avg_change)
_ = plt.title("Avg change over iterations")
_ = plt.xlabel("Iterations")
_ = plt.ylabel("Norm of softmax output differences by iteration")
_ = plt.xticks(np.arange(0, 50, step=1))
_ = plt.xlim(0, 50)


In [None]:
#############################################
## Examine softmax weights for convergence ##
#############################################

# find norm of difference between each iteration and the seed vector
seed_vector_arr =  y_hat_train_arr[:, :, 0]
diff_arr = y_hat_train_arr - seed_vector_arr[:, :, None]
norm_diff_arr = np.linalg.norm(diff_arr, axis=1)

# average across all images
avg_change = np.mean(norm_diff_arr, axis=0)

# visualize
figure = plt.figure(figsize=(20, 8))
_ = plt.plot(avg_change)
_ = plt.title("Avg change over iterations")
_ = plt.xlabel("Iterations")
_ = plt.ylabel("Norm of softmax output differences from seed vector")
_ = plt.xticks(np.arange(0, 25, step=1))
_ = plt.xlim(0, 25)
_ = plt.ylim(0,)

In [None]:
##########################
## Convergence by digit ##
##########################
import matplotlib.backends.backend_pdf
pdf = matplotlib.backends.backend_pdf.PdfPages("Outputs/convergence_by_class.pdf")
for i in range(y_hat_train_arr.shape[1]):
    CLASS = i
    class_data = y_hat_train_arr[np.where(true_class_tr == CLASS), :, :].squeeze()
    
    seed_vector_arr =  class_data[:, :, 0]
    diff_arr = class_data - seed_vector_arr[:, :, None]
    norm_diff_arr = np.linalg.norm(diff_arr, axis=1)
    
    avg_change_by_class = np.mean(norm_diff_arr, axis=0)
    
    figure = plt.figure(figsize=(20, 8))
    _ = plt.plot(avg_change_by_class)
    _ = plt.title("Avg change over iterations, class = " + str(CLASS))
    _ = plt.xlabel("Iterations")
    _ = plt.ylabel("Norm of softmax output differences from seed vector")
    _ = plt.xticks(np.arange(0, 50, step=1))
    _ = plt.xlim(0, 50)
    _ = plt.ylim(0,)
    
    pdf.savefig(figure, bbox_inches='tight')
pdf.close()

In [None]:
##############################################################
## Convergence over 4 rounds of 50 iteratinos, AVG + digits ##
##############################################################

pdf = matplotlib.backends.backend_pdf.PdfPages("Outputs/convergence_50_iterations_avgs_and_classes.pdf")
for a in range(2, 6):
    
    # load in correct weights
    y_hat_train_arr = np.zeros([y_train.shape[0], len(labels), MAX_ITER])
    y_hat_test_arr = np.zeros([y_test.shape[0], len(labels), MAX_ITER])

    for i in range(MAX_ITER):
        if i == 0:
            y_hat_train_name = 'y_hat_train_seed'
            y_hat_test_name = 'y_hat_test_seed'

        else:
            y_hat_train_name = 'y_hat_train_' + 'iter' + str(i)
            y_hat_test_name = 'y_hat_test_' + 'iter' + str(i)

        # Load test set softmax outputs 
        yhtr = np.load('weights/weights_fashion_1/' + y_hat_train_name + DATASET + REGIME + '.npy')
        yhte = np.load('weights/weights_fashion_1/' + y_hat_test_name + DATASET + REGIME + '.npy')  

        # The first time through, use binary weight vectors to save correct class array
        if i == 0:
            true_class_tr = np.nonzero(yhtr)[1]
            true_class_te = np.nonzero(yhte)[1]

        y_hat_train_arr[:, :, i] = yhtr
        y_hat_test_arr[:, :, i] = yhte
    
    
    
    # find norm of difference between each iteration and the seed vector
    seed_vector_arr =  y_hat_train_arr[:, :, 0]
    diff_arr = y_hat_train_arr - seed_vector_arr[:, :, None]
    norm_diff_arr = np.linalg.norm(diff_arr, axis=1)

    # average across all images
    avg_change = np.mean(norm_diff_arr, axis=0)

    # visualize
    figure = plt.figure(figsize=(20, 8))
    _ = plt.plot(avg_change, linewidth=5, label="average")
    
    for b in range(y_hat_train_arr.shape[1]):
        CLASS = b
        class_data = y_hat_train_arr[np.where(true_class_tr == CLASS), :, :].squeeze()

        seed_vector_arr =  class_data[:, :, 0]
        diff_arr = class_data - seed_vector_arr[:, :, None]
        norm_diff_arr = np.linalg.norm(diff_arr, axis=1)

        avg_change_by_class = np.mean(norm_diff_arr, axis=0)
        _ = plt.plot(avg_change_by_class, label=labels[b])
    
    
    
    _ = plt.title("Avg change over iterations")
    _ = plt.xlabel("Iterations")
    _ = plt.ylabel("Norm of softmax output differences from seed vector")
    _ = plt.xticks(np.arange(0, 50, step=1))
    _ = plt.xlim(0, 50)
    _ = plt.ylim(0,)
    _ = plt.legend(loc='upper right')
    _ = plt.show()
    pdf.savefig(figure)
    
pdf.close()

In [None]:
###########################################################
## Convergence over 4 rounds of 50 iteratinos, only AVGs ##
###########################################################


pdf = matplotlib.backends.backend_pdf.PdfPages("Outputs/convergence_50_iterations_avgs.pdf")
figure = plt.figure(figsize=(20, 8))
for a in range(2, 6):
    
    # load in correct weights
    y_hat_train_arr = np.zeros([y_train.shape[0], len(labels), MAX_ITER])
    y_hat_test_arr = np.zeros([y_test.shape[0], len(labels), MAX_ITER])

    for i in range(MAX_ITER):
        if i == 0:
            y_hat_train_name = 'y_hat_train_seed'
            y_hat_test_name = 'y_hat_test_seed'

        else:
            y_hat_train_name = 'y_hat_train_' + 'iter' + str(i)
            y_hat_test_name = 'y_hat_test_' + 'iter' + str(i)

        # Load test set softmax outputs 
        yhtr = np.load('weights' + str(a) + '/' + y_hat_train_name + DATASET + REGIME + '.npy')
        yhte = np.load('weights' + str(a) + '/' + y_hat_test_name + DATASET + REGIME + '.npy')  

        # The first time through, use binary weight vectors to save correct class array
        if i == 0:
            true_class_tr = np.nonzero(yhtr)[1]
            true_class_te = np.nonzero(yhte)[1]

        y_hat_train_arr[:, :, i] = yhtr
        y_hat_test_arr[:, :, i] = yhte
    
    
    
    # find norm of difference between each iteration and the seed vector
    seed_vector_arr =  y_hat_train_arr[:, :, 0]
    diff_arr = y_hat_train_arr - seed_vector_arr[:, :, None]
    norm_diff_arr = np.linalg.norm(diff_arr, axis=1)

    # average across all images
    avg_change = np.mean(norm_diff_arr, axis=0)

    # visualize

    _ = plt.plot(avg_change, label=("iteration: " + str(a - 2)))


_ = plt.title("Avg change over iterations")
_ = plt.xlabel("Iterations")
_ = plt.ylabel("Norm of softmax output differences from seed vector")
_ = plt.xticks(np.arange(0, 50, step=1))
_ = plt.xlim(0, 50)
_ = plt.ylim(0,)
_ = plt.legend(loc='upper left')
_ = plt.show()
pdf.savefig(figure)
    
pdf.close()

# Scratchwork/incomplete cells below

In [None]:
# Wrapper function/ Custom Loss function

# vector of length N images, higher weights for more confusable images
confusion_rank = []
def wrapper(confusion_rank):
def custom_loss(y_true, y_pred):
    # calculate loss between y_true and y_pred
    # element wise multiply the loss by confusion rank
    
    # why take the mean? 
    
    return loss
return custom_loss



# define weights based on serial reproduction results, for each input image --- contains larger weights for images that get confused with the wrong class following serial reproduction.
weights = <a vector of weights the length of the inputs y_true and y_pred>
# a wrapper function that weighs the loss using the weights vector before averaging the squared error.
def wrapper(weights):
def custom_loss_1(y_true, y_pred):
  diff = math_ops.squared_difference(y_pred, y_true)  #squared difference
  loss = diff * weights # elementwise multiply
  loss = K.mean(diff, axis=-1) #mean
  return loss
return custom_loss_1 (edited) 

In [None]:
# Select digit/class
for CATEGORY in range(10):
    
    # select '1' for correct predictions, '0' for incorrect predictions, or '2' for all
    CORRECT = 2
    I = 0

    # get correct/wrong classifications in final iteration
    final = y_hat_arr[:,MAX_ITER-1,:]
    # store binarized predictions from final iteration
    final_bin = np.zeros(final.shape)
    
    for i in range(final.shape[0]):
        final_bin[i,np.argmax(final[i,:])] = 1.0
    # incorrect ones
    idx = final_bin[:,CATEGORY]

    # 0s indices
    idx0 = np.argwhere(y_hat_arr[:,0,CATEGORY] == 1.0)
    print(idx0.shape)

    if CORRECT == 1:
        colors = plt.cm.winter(np.linspace(0,1,MAX_ITER))
        idx1 = [idx for idx, v in enumerate(idx) if v == 1.0]
        c = np.in1d(idx0,idx1)
        idx0 = idx0[c]        
    elif CORRECT == 0:
        colors = plt.cm.cool(np.linspace(0,1,MAX_ITER))
        idx1 = [idx for idx, v in enumerate(idx) if v != 1.0]
        c = np.in1d(idx0,idx1)
        idx0 = idx0[c]
        
    print(idx0.shape)

    ##########################################################
    ## Draw the plot with the results across all iterations ##
    ##########################################################
    
    MRK = 100
    LNW = 5
    figure = plt.figure(figsize=(40, 40))
    ax = plt.subplot(111)
    for i in range(0,MAX_ITER-1):
        plt.plot(range(10), np.mean(y_hat_arr[idx0,i,:],0).flatten(), "o:", color=colors[i], linewidth=LNW, markersize=MRK)
    plt.xticks(range(len(labels)),labels)
    plt.xticks(rotation = 90) 
    plt.xticks(fontsize=100)
    plt.yticks(fontsize=100)
    plt.xlabel('class', fontsize=120)
    plt.ylabel('Probability of class (average)', fontsize=120)
    plt.title('Average predictions by iteration: ' + labels[CATEGORY],fontsize=100)
    plt.show()  