In [None]:
###############
## Libraries ##
###############

import tensorflow as tf
import matplotlib.pyplot as plt 
import numpy as np
from tensorflow.keras import datasets, layers, models, losses
from keras.callbacks import ModelCheckpoint
from keras.models import load_model
from keras import backend as K
from tensorflow import keras

# Training the Model

In [None]:
##############################################
## Load, partition, and resize MNIST Digits ##
##############################################
def loadData():
    all_data = np.load("/scratch/gpfs/eysu/src_data/mnist.npz")

    x_test = all_data['x_test']
    x_train = all_data['x_train']
    y_train = all_data['y_train']
    y_test = all_data['y_test']

    labels = ["0",  # index 0
              "1",  # index 1
              "2",  # index 2 
              "3",  # index 3 
              "4",  # index 4
              "5",  # index 5
              "6",  # index 6 
              "7",  # index 7 
              "8",  # index 8 
              "9"]  # index 9

    # save train labels
    x_train = x_train.astype('float32') / 255
    x_test = x_test.astype('float32') / 255


    # y_train_labels = y_train
    # y_test_labels = y_test

    # Further break training data into train / validation sets (# put 5000 into validation set and keep remaining 55,000 for train)
    (x_train, x_valid) = x_train[5000:], x_train[:5000] 
    (y_train, y_valid) = y_train[5000:], np.array(y_train[:5000]).squeeze()

    # Reshape input data from (28, 28) to (28, 28, 1)
    w, h = 28, 28
    x_train = x_train.reshape(x_train.shape[0], w, h, 1)
    x_valid = x_valid.reshape(x_valid.shape[0], w, h, 1)
    x_test = x_test.reshape(x_test.shape[0], w, h, 1)
    
    return x_train, x_valid, x_test, y_train, y_valid, y_test

# Pretrain the model on a few of the labels

In [None]:
######################################################
## PRETRAINING STEP 1: isolate a subset of the data ##
######################################################

## Pretrain the model on incrementing slices of data
for i in range(1,11):
    # set END_IDX to the size of the pretraining subset
    END_IDX = i
    
    # load in MNIST digit data
    x_train, x_valid, x_test, y_train, y_valid, y_test = loadData()
  
    ###########################################
    ## Slice the train, valid, and test data ##
    ## based on the END_IDX                  ##
    ###########################################
    
    idx_train = np.where(y_train < END_IDX)
    y_train_subset = y_train[idx_train]
    x_train_subset = x_train[idx_train]

    idx_valid = np.where(y_valid < END_IDX)
    y_valid_subset = y_valid[idx_valid]
    x_valid_subset = x_valid[idx_valid]

    idx_test = np.where(y_test < END_IDX)
    y_test_subset = y_test[idx_test]
    x_test_subset = x_test[idx_test]
    
    # pretrain the model on the data subset and save weights
    pretrain(x_train_subset, y_train_subset, x_valid_subset, y_valid_subset, x_test_subset, y_test_subset, END_IDX)


In [None]:
###################################################
## PRETRAINING STEP 2: train data on data subset ##
###################################################
def pretrain(x_train_subset, y_train_subset, x_valid_subset, y_valid_subset, x_test_subset, y_test_subset, end_idx):
    
    # Define the model: a small CNN model
    model = tf.keras.Sequential()

    # Must define the input shape in the first layer of the neural network
    model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=2, padding='same', activation='relu', input_shape=(28,28,1))) 
    model.add(tf.keras.layers.MaxPooling2D(pool_size=2))
    model.add(tf.keras.layers.Dropout(0.3))

    model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=2, padding='same', activation='relu'))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=2))
    model.add(tf.keras.layers.Dropout(0.3))

    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(256, activation='relu'))
    model.add(tf.keras.layers.Dropout(0.5))
    model.add(tf.keras.layers.Dense(10, activation='softmax'))

    # Take a look at the model summary
    # model.summary()

    # Number of epochs per training run
    EPOCHS = 10
    # Where to store output model weights and softmax predictions
    # Save each set of weights under the relevant folder
    save_path = "/scratch/gpfs/eysu/low_shot_weights/" + str(end_idx) + "/"

    # fix dimensions
    y_train_subset = tf.keras.utils.to_categorical(y_train_subset, 10)
    y_valid_subset = tf.keras.utils.to_categorical(y_valid_subset, 10)
    y_test_subset = tf.keras.utils.to_categorical(y_test_subset, 10)

    mpth = 'model.weights.best.pretrain.hdf5'
    y_hat_test_name = 'y_hat_test_pretrain'
    y_hat_train_name = 'y_hat_train_pretrain'


    # define optimization and energy parameters
    # set learning rate and exponential decay rate 
    opt = keras.optimizers.Adam(learning_rate=0.001, beta_1 =0.9)
    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

    # Save checkpoints
    checkpointer = ModelCheckpoint(filepath= save_path + mpth, verbose = 1, save_best_only=True) #True
    # Train the model
    model.fit(x_train_subset,
             y_train_subset,
             batch_size=64,
             epochs=EPOCHS,
             validation_data=(x_valid_subset, y_valid_subset),
             callbacks=[checkpointer])

    # y_hat = model.predict(x_train_subset) #feed back serial reproduction targets
    y_hat_test = model.predict(x_test_subset)

    # Load the weights with the best validation accuracy
    model.load_weights(save_path + mpth)
    # Evaluate the model on test set
    score = model.evaluate(x_test_subset, y_test_subset, verbose=0)
    # Print test accuracy
    print('\n', 'Test accuracy:', score[1]) 

    # Save results for each iteration in the serial reproduction chain
    np.save(save_path + y_hat_train_name + '.npy', y_train_subset)
    print(save_path + y_hat_train_name)

    np.save(save_path + y_hat_test_name + '.npy', y_hat_test)
    print(save_path + y_hat_test_name)

# Iterated Retraining Process

In [None]:
#############################################################
## Create a Shuffled y_train such that the training images ##
## and labels no longer match up                           ##
#############################################################
def shuffle_all(y_train):
    y_train_shuffle = np.copy(y_train)
    np.random.shuffle(y_train_shuffle)
    
    return y_train_shuffle

In [None]:
#############################################################
## Create a Shuffled y_train such that the training images ##
## and labels no longer match up ONLY for the unseen images##
#############################################################
def shuffle_some(x_train, y_train, END_IDX):
    # indices of the images that the model was pretrained on
    idx_train = np.where(y_train < END_IDX)[0]

    # split training data and randomize labels of unseen 
    x_train_true = x_train[idx_train]
    y_train_true = y_train[idx_train]
    x_train_shuffle = np.delete(x_train, idx_train, axis=0)
    y_train_shuffle = np.delete(y_train, idx_train, axis=0)
    np.random.shuffle(y_train_shuffle)

    # recombine the data such that some of the labels are true and others are randomized
    x_train = np.concatenate((x_train_true, x_train_shuffle), axis=0)
    y_train = np.concatenate((y_train_true, y_train_shuffle))

    # mix the true and randomized labels 
    mix_order = np.random.permutation(len(y_train))
    x_train = x_train[mix_order]
    y_train = y_train[mix_order]
    
    return x_train, y_train

In [None]:
##############################################################
## This cell runs the iterated learning training procedure. ##
## USING RANDOMIZED LABELS.                                 ##
##############################################################
for END_IDX in range(1, 11):
    # Number of learning iterations
    MAX_ITER = 25
    # Number of epochs per training run (decrease this to learn less)
    EPOCHS = 10
    # Where to store output model weights and softmax predictions
    save_path = "/scratch/gpfs/eysu/low_shot_weights/" + str(END_IDX) + "/"
    
    x_train, x_valid, x_test, y_train, y_valid, y_test = loadData()
    
    # y_train = shuffle_all(y_train)
    
#     x_train, y_train = shuffle_some(x_train, y_train, END_IDX)

    for iteration in range(0,MAX_ITER):
        # If iteration is seed, train on original target vectors, else, train on y_hat from time t-1
        if iteration == 0:
            # One-hot encode the labels
            # Pass the randomized labels to the model as y_train
            y_train = tf.keras.utils.to_categorical(y_train, 10)
            y_valid = tf.keras.utils.to_categorical(y_valid, 10)
            y_test = tf.keras.utils.to_categorical(y_test, 10)

            mpth = 'model.weights.best.hdf5'
            y_hat_test_name = 'y_hat_test_seed'
            y_hat_train_name = 'y_hat_train_seed'      
        elif iteration > 0:
            # Key step: set new targets as y_hat
            y_train = y_hat    
            mpth = 'model.weights.best.' + 'iter' + str(iteration) + '.hdf5'
            y_hat_test_name = 'y_hat_test_' + 'iter' + str(iteration)
            y_hat_train_name = 'y_hat_train_' + 'iter' + str(iteration)

        # Define the model: a small CNN model
        model = tf.keras.Sequential()

        # Must define the input shape in the first layer of the neural network
        model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=2, padding='same', activation='relu', input_shape=(28,28,1))) 
        model.add(tf.keras.layers.MaxPooling2D(pool_size=2))
        model.add(tf.keras.layers.Dropout(0.3))

        model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=2, padding='same', activation='relu'))
        model.add(tf.keras.layers.MaxPooling2D(pool_size=2))
        model.add(tf.keras.layers.Dropout(0.3))

        model.add(tf.keras.layers.Flatten())
        model.add(tf.keras.layers.Dense(256, activation='relu'))
        model.add(tf.keras.layers.Dropout(0.5))
        model.add(tf.keras.layers.Dense(10, activation='softmax'))

        # Each time, use the pretrained model with the prior from the lo shot training
        model.load_weights(save_path + 'model.weights.best.pretrain.hdf5')
        # model.summary()

        # define optimization and energy parameters
        # default learning_Rate = 0.001
        # default beta_1 = 0.9
        # reduce both significantly to slow down learning! *** BORROWING FROM INTERLEAVING PAPER **** , 

        opt = keras.optimizers.Adam(learning_rate=0.0001, beta_1 = 0.0001)
        model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

        # Save checkpoints
        checkpointer = ModelCheckpoint(filepath= save_path + "shuffle_none/" + mpth, verbose = 1, save_best_only=False) #True
        # Train the model
        model.fit(x_train,
                 y_train,
                 batch_size=64,
                 epochs=EPOCHS,
                 validation_data=(x_valid, y_valid),
                 callbacks=[checkpointer])

        # Load the weights with the best validation accuracy
        y_hat = model.predict(x_train) #feed back serial reproduction targets
        y_hat_test = model.predict(x_test)

        model.load_weights(save_path + mpth)
        # Evaluate the model on test set
        score = model.evaluate(x_test, y_test, verbose=0)
        # Print test accuracy
        print('\n', 'Test accuracy:', score[1])

        # Save results for each iteration in the serial reproduction chain
        np.save(save_path + "shuffle_none/" + y_hat_train_name + '.npy', y_train)
        print(save_path + "shuffle_none/" + y_hat_train_name)

        np.save(save_path + "shuffle_none/" + y_hat_test_name + '.npy', y_hat_test)
        print(save_path + "shuffle_none/" + y_hat_test_name)



# Visualization 

In [None]:
########################
## Visualize N images ##
########################

from mpl_toolkits.axes_grid1 import make_axes_locatable
import warnings
warnings.filterwarnings('ignore')
import matplotlib.backends.backend_pdf

def visualize_softmax(image_idx):

    # visualize each image
    figure = plt.figure(figsize=(40, 40))
    # plot image
    ax1 = figure.add_subplot(8, 8, 1, xticks=[], yticks=[])
    im1 = ax1.imshow(x_train[image_idx])
    ax1.set_title("Image")

    # plot weights graph
    ax2 = figure.add_subplot(8, 8, 2)
    im2 = ax2.imshow(y_hat_train_arr[image_idx, :, :].T, cmap='Wistia')

    divider = make_axes_locatable(ax2)
    cax = divider.append_axes('right', size='5%', pad=0.05)
    cbar = figure.colorbar(im2, cax=cax, orientation='vertical', ticks=[0, 1])
    cbar.ax.set_yticklabels(['0', '1'])

    ax2.set(xlabel='Classes', ylabel='Iterations', title='Softmax Outputs')
    ax2.set_xticks(ticks=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    ax2.set_xticklabels(labels)

    pdf.savefig(figure, bbox_inches='tight')
    plt.show()


In [None]:
############################################################
## Look at softmax output matrices for all sets of images ##
############################################################

# define constants and params
MAX_ITER = 25
labels = ["0",  # index 0
              "1",  # index 1
              "2",  # index 2 
              "3",  # index 3 
              "4",  # index 4
              "5",  # index 5
              "6",  # index 6 
              "7",  # index 7 
              "8",  # index 8 
              "9"]  # index 9

# for every pretrained model, generate images for random, 
# semi random, and non random training processes
for END_IDX in range(10, 11):

    x_train, x_valid, x_test, y_train, y_valid, y_test = loadData()
    # create empty array to store softmax outputs
    y_hat_train_arr = np.zeros([y_train.shape[0], 10, MAX_ITER])
    
    for case in ["LR_adjusted/","shuffle_some/", "shuffle_none/"]:
        save_path = "/scratch/gpfs/eysu/low_shot_weights/" + str(END_IDX) + "/" + case
        
        edit_name = False
        if case == "LR_adjusted/":
            edit_name = True
            
        # store the softmax vector from every iteration of training into y_hat_train_arr
        for i in range(MAX_ITER):
            if i == 0:
                if edit_name: y_hat_train_name = 'LR_adjustedy_hat_train_seed'
                else: y_hat_train_name = 'y_hat_train_seed'

            else:
                if edit_name: y_hat_train_name = 'LR_adjustedy_hat_train_' + 'iter' + str(i)
                else: y_hat_train_name = 'y_hat_train_' + 'iter' + str(i)

            # Load test set softmax outputs 
            yhtr = np.load(save_path + y_hat_train_name + '.npy')
            
            if i == 0:
                true_class_tr = np.nonzero(yhtr)[1]   
            y_hat_train_arr[:, :, i] = yhtr
            
        print(str(END_IDX) + ":" + case)
        save_image_path = "Outputs/" + case + str(END_IDX) + ".pdf"
        pdf = matplotlib.backends.backend_pdf.PdfPages(save_image_path)
        for j in range(100):
            visualize_softmax(np.random.randint(0, y_hat_train_arr.shape[0]))
        pdf.close()

# More Visualization

In [None]:
##############################
## Compare Softmax Matrices ##
##############################

from mpl_toolkits.axes_grid1 import make_axes_locatable
import warnings
warnings.filterwarnings('ignore')
import matplotlib.backends.backend_pdf

def visualize_all_softmax(image_idx):

    # visualize each image
    figure = plt.figure(figsize=(40, 40))
    # plot image
    ax1 = figure.add_subplot(8, 8, 1, xticks=[], yticks=[])
    im1 = ax1.imshow(x_train[image_idx])
    ax1.set_title("Image")

    # plot weights graph
    ax2 = figure.add_subplot(8, 8, 2)
    im2 = ax2.imshow(y_hat_train_arr_LR[image_idx, :, :].T, cmap='Wistia')

    divider = make_axes_locatable(ax2)
    cax = divider.append_axes('right', size='5%', pad=0.05)
    cbar = figure.colorbar(im2, cax=cax, orientation='vertical', ticks=[0, 1])
    cbar.ax.set_yticklabels(['0', '1'])

    ax2.set(xlabel='Classes', ylabel='Iterations', title='Softmax Outputs')
    ax2.set_xticks(ticks=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    ax2.set_xticklabels(labels)
    
    # plot weights graph
    ax2 = figure.add_subplot(8, 8, 3)
    im2 = ax2.imshow(y_hat_train_arr_some[image_idx, :, :].T, cmap='Wistia')

    divider = make_axes_locatable(ax2)
    cax = divider.append_axes('right', size='5%', pad=0.05)
    cbar = figure.colorbar(im2, cax=cax, orientation='vertical', ticks=[0, 1])
    cbar.ax.set_yticklabels(['0', '1'])

    ax2.set(xlabel='Classes', ylabel='Iterations', title='Softmax Outputs')
    ax2.set_xticks(ticks=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    ax2.set_xticklabels(labels)
    
    # plot weights graph
    ax2 = figure.add_subplot(8, 8, 4)
    im2 = ax2.imshow(y_hat_train_arr_none[image_idx, :, :].T, cmap='Wistia')

    divider = make_axes_locatable(ax2)
    cax = divider.append_axes('right', size='5%', pad=0.05)
    cbar = figure.colorbar(im2, cax=cax, orientation='vertical', ticks=[0, 1])
    cbar.ax.set_yticklabels(['0', '1'])

    ax2.set(xlabel='Classes', ylabel='Iterations', title='Softmax Outputs')
    ax2.set_xticks(ticks=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    ax2.set_xticklabels(labels)

    pdf.savefig(figure, bbox_inches='tight')
    plt.show()


In [None]:
################################################
## Compare all softmax matrices for one image ##
################################################

# define constants and params
MAX_ITER = 25
labels = ["0",  # index 0
              "1",  # index 1
              "2",  # index 2 
              "3",  # index 3 
              "4",  # index 4
              "5",  # index 5
              "6",  # index 6 
              "7",  # index 7 
              "8",  # index 8 
              "9"]  # index 9

# for every pretrained model, generate images for random, 
# semi random, and non random training processes
for END_IDX in range(1, 11):
    save_path = "/scratch/gpfs/eysu/low_shot_weights/" + str(END_IDX) 
    
    x_train, x_valid, x_test, y_train, y_valid, y_test = loadData()
    # create empty array to store softmax outputs
    y_hat_train_arr_LR = np.zeros([y_train.shape[0], 10, MAX_ITER])
    y_hat_train_arr_some = np.zeros([y_train.shape[0], 10, MAX_ITER])
    y_hat_train_arr_none = np.zeros([y_train.shape[0], 10, MAX_ITER])
    
    # store the softmax vector from every iteration of training into y_hat_train_arr
    for i in range(MAX_ITER):
        
        if i == 0:
            y_hat_train_name = 'y_hat_train_seed'

        else:
            y_hat_train_name = 'y_hat_train_' + 'iter' + str(i)

        # Load test set softmax outputs 
        yhtr_LR = np.load(save_path + "/LR_adjusted/LR_adjusted" + y_hat_train_name + '.npy')
        yhtr_some = np.load(save_path + "/shuffle_some/" + y_hat_train_name + '.npy')
        yhtr_none = np.load(save_path + "/shuffle_none/" + y_hat_train_name + '.npy')

        y_hat_train_arr_LR[:, :, i] = yhtr_LR
        y_hat_train_arr_some[:, :, i] = yhtr_some
        y_hat_train_arr_none[:, :, i] = yhtr_none

    save_image_path = "Outputs/compare_all_methods/" + str(END_IDX) + ".pdf"
    pdf = matplotlib.backends.backend_pdf.PdfPages(save_image_path)
    for j in range(200):
        visualize_all_softmax(np.random.randint(0, y_hat_train_arr.shape[0]))
    pdf.close()

# TESTING

In [None]:
##########################
## Load in MNIST Digits ##
##########################

all_data = np.load("/scratch/gpfs/eysu/src_data/mnist.npz")
print(all_data.files)
x_test = all_data['x_test']
x_train = all_data['x_train']
y_train = all_data['y_train']
y_test = all_data['y_test']

print(x_test.shape)
print(x_train.shape)
print(y_train.shape)
print(y_test.shape)

# examine
# print(x_train)
# print(x_test)
# print(y_train)
# print(y_test)

In [None]:
###############################
## Partition and resize data ##
###############################

labels = ["0",  # index 0
          "1",  # index 1
          "2",  # index 2 
          "3",  # index 3 
          "4",  # index 4
          "5",  # index 5
          "6",  # index 6 
          "7",  # index 7 
          "8",  # index 8 
          "9"]  # index 9

# save train labels
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255


# y_train_labels = y_train
# y_test_labels = y_test

# Further break training data into train / validation sets (# put 5000 into validation set and keep remaining 55,000 for train)
(x_train, x_valid) = x_train[5000:], x_train[:5000] 
(y_train, y_valid) = y_train[5000:], np.array(y_train[:5000]).squeeze()

# Reshape input data from (28, 28) to (28, 28, 1)
w, h = 28, 28
x_train = x_train.reshape(x_train.shape[0], w, h, 1)
x_valid = x_valid.reshape(x_valid.shape[0], w, h, 1)
x_test = x_test.reshape(x_test.shape[0], w, h, 1)

# examine
print(y_train.shape)
print(np.unique(y_train, return_counts = True))
print(y_train)
print(x_train.shape)

print(y_valid.shape)
print(np.unique(y_valid, return_counts = True))
print(y_valid)
print(x_valid.shape)

print(y_test.shape)
print(np.unique(y_test, return_counts = True))
print(y_test)
print(x_test.shape)


In [None]:
# Take subset of data
END_IDX = 0

idx_train = np.where(y_train < END_IDX)
y_train_subset = y_train[idx_train]
x_train_subset = x_train[idx_train]

idx_valid = np.where(y_valid < END_IDX)
y_valid_subset = y_valid[idx_valid]
x_valid_subset = x_valid[idx_valid]

idx_test = np.where(y_test < END_IDX)
y_test_subset = y_test[idx_test]
x_test_subset = x_test[idx_test]


#examine
print(y_train_subset.shape)
print(np.unique(y_train_subset, return_counts = True))
print(y_train_subset)
print(x_train_subset.shape)

print(y_valid_subset.shape)
print(np.unique(y_valid_subset, return_counts = True))
print(y_valid_subset)
print(x_valid_subset.shape)

print(y_test_subset.shape)
print(np.unique(y_test_subset, return_counts = True))
print(y_test_subset)
print(x_test_subset.shape)


In [None]:
# Define the model: a small CNN model
model = tf.keras.Sequential()

# Must define the input shape in the first layer of the neural network
model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=2, padding='same', activation='relu', input_shape=(28,28,1))) 
model.add(tf.keras.layers.MaxPooling2D(pool_size=2))
model.add(tf.keras.layers.Dropout(0.3))

model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=2, padding='same', activation='relu'))
model.add(tf.keras.layers.MaxPooling2D(pool_size=2))
model.add(tf.keras.layers.Dropout(0.3))

model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(256, activation='relu'))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(10, activation='softmax'))

# Take a look at the model summary
# model.summary()

# Number of epochs per training run
EPOCHS = 10
# Where to store output model weights and softmax predictions
save_path = "/scratch/gpfs/eysu/low_shot_weights/"

# fix dimensions
y_train_subset = tf.keras.utils.to_categorical(y_train_subset, 10)
y_valid_subset = tf.keras.utils.to_categorical(y_valid_subset, 10)
y_test_subset = tf.keras.utils.to_categorical(y_test_subset, 10)

print(np.unique(np.where(y_train_subset ==1)[1]))

mpth = 'model.weights.best.pretrain.hdf5'
y_hat_test_name = 'y_hat_test_pretrain'
y_hat_train_name = 'y_hat_train_pretrain'


# define optimization and energy parameters
# set learning rate and exponential decay rate *** BORROWING FROM INTERLEAVING PAPER **** , 
opt = keras.optimizers.Adam(learning_rate=0.001, beta_1 =0.9)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

# Save checkpoints
checkpointer = ModelCheckpoint(filepath= save_path + mpth, verbose = 1, save_best_only=True) #True
# Train the model
model.fit(x_train_subset,
         y_train_subset,
         batch_size=64,
         epochs=EPOCHS,
         validation_data=(x_valid_subset, y_valid_subset),
         callbacks=[checkpointer])

# y_hat = model.predict(x_train_subset) #feed back serial reproduction targets
y_hat_test = model.predict(x_test_subset)

# Load the weights with the best validation accuracy
model.load_weights(save_path + mpth)
# Evaluate the model on test set
score = model.evaluate(x_test_subset, y_test_subset, verbose=0)
# Print test accuracy
print('\n', 'Test accuracy:', score[1]) 

# Save results for each iteration in the serial reproduction chain
np.save(save_path + y_hat_train_name + '.npy', y_train_subset)
print(save_path + y_hat_train_name)

np.save(save_path + y_hat_test_name + '.npy', y_hat_test)
print(save_path + y_hat_test_name)

# Begin the Iterated Learning Retraining Process

In [None]:
#############################################################
## Create a Shuffled y_train such that the training images ##
## and labels no longer match up                           ##
#############################################################
y_train = all_data['y_train'][5000:]
y_train_shuffle = np.copy(y_train)
np.random.shuffle(y_train_shuffle)

# Examine
print(y_train)
print(y_train_shuffle)
print(y_train_shuffle.shape)

In [None]:
#############################################################
## Create a Shuffled y_train such that the training images ##
## and labels no longer match up                           ##
#############################################################
x_train, x_valid, x_test, y_train, y_valid, y_test = loadData()

END_IDX = 9

# indices of the images that the model was NOT pretrained on
idx_train = np.where(y_train < END_IDX)[0]

print(x_train.shape)
print(y_train.shape)

x_train_true = x_train[idx_train]
y_train_true = y_train[idx_train]
x_train_shuffle = np.delete(x_train, idx_train, axis=0)
y_train_shuffle = np.delete(y_train, idx_train, axis=0)

print(x_train_true.shape)
print(y_train_true.shape)
print(x_train_shuffle.shape)
print(y_train_shuffle.shape)

print(y_train_shuffle)
np.random.shuffle(y_train_shuffle)
print(y_train_shuffle)


x_train = np.concatenate((x_train_true, x_train_shuffle), axis=0)
y_train = np.concatenate((y_train_true, y_train_shuffle))

print(x_train.shape)
print(y_train.shape)
print(y_train)

mix_order = np.random.permutation(len(y_train))
x_train = x_train[mix_order]
y_train = y_train[mix_order]

print(x_train.shape)
print(y_train.shape)
print(y_train)





# print(idx_train)
# print(y_train_shuffle[idx_train])
# np.random.shuffle(y_train_shuffle[idx_train])
# print(y_train_shuffle[idx_train])

# y_train_shuffle = np.copy(y_train)
# np.random.shuffle(y_train_shuffle[idx_train])

# print(np.unique(y_train_shuffle - y_train))

# idx_valid = np.where(y_valid < END_IDX)
#     y_valid_subset = y_valid[idx_valid]
#     x_valid_subset = x_valid[idx_valid]

# idx_test = np.where(y_test < END_IDX)
#     y_test_subset = y_test[idx_test]
#     x_test_subset = x_test[idx_test]


# y_train = all_data['y_train'][5000:]
# y_train_shuffle = np.copy(y_train)
# np.random.shuffle(y_train_shuffle)

# print(y_train)
# print(y_train_shuffle)

# Ideas
- visualize the softmax matrices of 200 random images. One hot seed vectors should be random. Does the model perform better at categorizing each time? How will things converge?
- quantify the final categories predicted. What is the distribution between the 10 classes?
    - interesting thing to try might be line graphs showing the fraction of images in each class taken every 5 iterations. Hopefully will see changes in the lines over time?
- What if we don't provide an input # classes? This is done thorugh the .to_categorical step. Look into if there is a way to let this be determined naturally

# Initial Visualization

In [None]:
#######################################################
## Look at softmax output matrices for random images ##
#######################################################
# Number of learning iterations
MAX_ITER = 25
save_path = "/scratch/gpfs/eysu/low_shot_weights/10/"

x_train, x_valid, x_test, y_train, y_valid, y_test = loadData()

y_hat_train_arr = np.zeros([y_train.shape[0], 10, MAX_ITER])
# y_hat_train_arr = np.zeros([y_train.shape[0], len(labels), MAX_ITER])
for i in range(MAX_ITER):
    if i == 0:
        y_hat_train_name = 'LR_adjustedy_hat_train_seed'
       
    else:
        y_hat_train_name = 'LR_adjustedy_hat_train_' + 'iter' + str(i)
        
    # Load test set softmax outputs 
    yhtr = np.load(save_path + y_hat_train_name + '.npy')

    # The first time through, use binary weight vectors to save seed array
    # Recall that these initial labels were randomized and do not correlate to 
    # the image's given class in the dataset
    
    if i == 0:
        true_class_tr = np.nonzero(yhtr)[1]   
    y_hat_train_arr[:, :, i] = yhtr

print(y_hat_train_arr.shape)
# (55000, 10, 25)

In [None]:
labels = ["0",  # index 0
              "1",  # index 1
              "2",  # index 2 
              "3",  # index 3 
              "4",  # index 4
              "5",  # index 5
              "6",  # index 6 
              "7",  # index 7 
              "8",  # index 8 
              "9"]  # index 9

In [None]:
########################
## Visualize N images ##
########################

from mpl_toolkits.axes_grid1 import make_axes_locatable
import warnings
warnings.filterwarnings('ignore')
import matplotlib.backends.backend_pdf

def visualize_softmax(image_idx):

    # visualize each image
    figure = plt.figure(figsize=(40, 40))
    # plot image
    ax1 = figure.add_subplot(8, 8, 1, xticks=[], yticks=[])
    im1 = ax1.imshow(x_train[image_idx])
    ax1.set_title("Image")

    # plot weights graph
    ax2 = figure.add_subplot(8, 8, 2)
    im2 = ax2.imshow(y_hat_train_arr[image_idx, :, :].T, cmap='Wistia')

    divider = make_axes_locatable(ax2)
    cax = divider.append_axes('right', size='5%', pad=0.05)
    cbar = figure.colorbar(im2, cax=cax, orientation='vertical', ticks=[0, 1])
    cbar.ax.set_yticklabels(['0', '1'])

    ax2.set(xlabel='Classes', ylabel='Iterations', title='Softmax Outputs')
    ax2.set_xticks(ticks=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    ax2.set_xticklabels(labels)

    pdf.savefig(figure, bbox_inches='tight')
    plt.show()
        
save_image_path = "Outputs/"
pdf = matplotlib.backends.backend_pdf.PdfPages(save_image_path + "200_lo_shot_10_class_LR_adjusted.pdf")
for i in range(200):
    visualize_softmax(np.random.randint(0, y_hat_train_arr.shape[0]))
pdf.close()

# Examine output softmax predictions

In [None]:
print(np.unique(np.argmax(y_hat_train_arr[:, :, 24], axis=1)))

# this shows us that the digit predicted with the highest probability is always 1 for every image in the training set
# not spread equally across all classes -> this is just model bias

In [None]:
### OK THIS WORKS


# Define the model: a small CNN model (could probably be done outside loop)
model = tf.keras.Sequential()

# Must define the input shape in the first layer of the neural network
model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=2, padding='same', activation='relu', input_shape=(28,28,1))) 
model.add(tf.keras.layers.MaxPooling2D(pool_size=2))
model.add(tf.keras.layers.Dropout(0.3))

model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=2, padding='same', activation='relu'))
model.add(tf.keras.layers.MaxPooling2D(pool_size=2))
model.add(tf.keras.layers.Dropout(0.3))

model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(256, activation='relu'))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(10, activation='softmax'))

# Take a look at the model summary
# model.summary()

# define optimization and energy parameters
# default learning rate for adam is 0.001
opt = keras.optimizers.Adam(learning_rate=0.001, beta_1 = 0.0001)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

# Save checkpoints
checkpointer = ModelCheckpoint(filepath= "tester", verbose = 1, save_best_only=True) #True
# y_train = tf.keras.utils.to_categorical(y_train, 10)
# y_valid = tf.keras.utils.to_categorical(y_valid, 10)
# y_test = tf.keras.utils.to_categorical(y_test, 10)

x_train = x_train_subset
x_valid = x_valid_subset
x_test = x_test_subset

y_train = tf.keras.utils.to_categorical(y_train_subset, 10)
y_valid = tf.keras.utils.to_categorical(y_valid_subset, 10)
y_test = tf.keras.utils.to_categorical(y_test_subset, 10)


# Train the model
model.fit(x_train,
         y_train,
         batch_size=64,
         epochs=EPOCHS,
         validation_data=(x_valid, y_valid),
         callbacks=[checkpointer])

# Load the weights with the best validation accuracy
y_hat = model.predict(x_train) #feed back serial reproduction targets
y_hat_test = model.predict(x_test)

model.load_weights("tester")
# Evaluate the model on test set
score = model.evaluate(x_test, y_test, verbose=0)
# Print test accuracy
print('\n', 'Test accuracy:', score[1])

#     # Save results for each iteration in the serial reproduction chain
#     np.save(save_path + y_hat_train_name + '.npy', y_train)
#     print(save_path + y_hat_train_name)

#     np.save(save_path + y_hat_test_name + '.npy', y_hat_test)
#     print(save_path + y_hat_test_name)

In [None]:
print(np.unique(y_hat_train_arr[:, 1, 24]))

# confirms that the predicted probability that the input image is of class 1 is always the same across all images

In [None]:
print(np.unique(y_hat_train_arr[:, 0, 24]))
print(np.unique(y_hat_train_arr[:, 1, 24]))
print(np.unique(y_hat_train_arr[:, 2, 24]))
print(np.unique(y_hat_train_arr[:, 3, 24]))
print(np.unique(y_hat_train_arr[:, 4, 24]))
print(np.unique(y_hat_train_arr[:, 5, 24]))
print(np.unique(y_hat_train_arr[:, 6, 24]))
print(np.unique(y_hat_train_arr[:, 7, 24]))
print(np.unique(y_hat_train_arr[:, 8, 24]))
print(np.unique(y_hat_train_arr[:, 9, 24]))

# confirms that the predicted probabilities for every class is the same regardless of the input images
# the randomization of the labels prevents the model from ever converging in its classification predictions