# Mock Supervised Training Method
Except adapted for CIFAR10 dataset

In [None]:
###############
## Libraries ##
###############

import tensorflow as tf
import matplotlib.pyplot as plt 
import numpy as np
from tensorflow.keras import datasets, layers, models, losses
from keras.callbacks import ModelCheckpoint
from keras.models import load_model
from keras import backend as K
from tensorflow import keras

# Training the Model

In [None]:
######################################
## Import CIFAR10 data from Scratch ##
######################################
import pickle

# unpickle the binary files
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

# paths to each batch of data
batch1 = unpickle("/scratch/gpfs/eysu/src_data/cifar-10-batches-py/data_batch_1")
batch2 = unpickle("/scratch/gpfs/eysu/src_data/cifar-10-batches-py/data_batch_2")
batch3 = unpickle("/scratch/gpfs/eysu/src_data/cifar-10-batches-py/data_batch_3")
batch4 = unpickle("/scratch/gpfs/eysu/src_data/cifar-10-batches-py/data_batch_4")
batch5 = unpickle("/scratch/gpfs/eysu/src_data/cifar-10-batches-py/data_batch_5")
meta = unpickle("/scratch/gpfs/eysu/src_data/cifar-10-batches-py/batches.meta")
test = unpickle("/scratch/gpfs/eysu/src_data/cifar-10-batches-py/test_batch")

# separate labels and image data from each batch
y_train1 = batch1[b'labels']
x_train1 = batch1[b'data']
y_train2 = batch2[b'labels']
x_train2 = batch2[b'data']
y_train3 = batch3[b'labels']
x_train3 = batch3[b'data']
y_train4 = batch4[b'labels']
x_train4 = batch4[b'data']
y_train5 = batch5[b'labels']
x_train5 = batch5[b'data']

# concatenate into big training and testing arrays
y_train = np.concatenate((y_train1, y_train2, y_train3, y_train4, y_train5))
x_train = np.concatenate((x_train1, x_train2, x_train3, x_train4, x_train5), axis=0)

y_test = test[b'labels']
x_test = test[b'data']

In [None]:
#################################################
## Preprocess data by reshaping and separating ##
#################################################
labels = ['airplane',  # index 0
          'automobile',  # index 1
          'bird',  # index 2 
          'cat',  # index 3 
          'deer',  # index 4
          'dog',  # index 5
          'frog',  # index 6 
          'horse',  # index 7 
          'ship',  # index 8 
          'truck']  # index 9

# Further break training data into train / validation sets 
# put 5000 into validation set and keep remaining 45,000 for train
(x_train, x_valid) = x_train[5000:], x_train[:5000] 
(y_train, y_valid) = y_train[5000:], y_train[:5000]

# reshape data to match dimensions of cifar10.load_data
x_train = x_train.reshape(45000, 3, 32, 32)
x_train = x_train.transpose(0, 2, 3, 1)
x_train = x_train.astype('float32')
x_train /= 255

# y_train = tf.keras.utils.to_categorical(y_train, 10)

x_valid = x_valid.reshape(5000, 3, 32, 32)
x_valid = x_valid.transpose(0, 2, 3, 1)
x_valid = x_valid.astype('float32')
x_valid /= 255
y_valid = tf.keras.utils.to_categorical(y_valid, 10)

x_test = x_test.reshape(10000, 3, 32, 32)
x_test = x_test.transpose(0, 2, 3, 1)
x_test = x_test.astype('float32')
x_test /= 255
y_test = tf.keras.utils.to_categorical(y_test, 10)

# assert dimensions of data
print("TRAINING DATA")
print(x_train.shape)
print(y_train.shape)

print("VALIDATION DATA")
print(x_valid.shape)
print(y_valid.shape)

print("TESTING DATA")
print(x_test.shape)
print(y_test.shape)


In [None]:
# Examine any image

# Image index, you can pick any number between 0 and 44,999
img_index = 35
label_index = y_train[img_index]
# Print the label, for example 2 Pullover
print("y = " + str(label_index) + " (" +(labels[label_index]) + ")")
plt.imshow(x_train[img_index])
plt.show()

In [None]:
###################################################
## Shuffle y_train such that the training images ##
## and labels no longer match up                 ##
###################################################

y_train_shuffle = np.copy(y_train)
np.random.shuffle(y_train_shuffle)
print(y_train)
print(y_train_shuffle)

In [None]:
##############################################################
## This cell runs the iterated learning training procedure. ##
##############################################################

# Number of learning iterations
MAX_ITER = 25
# Number of epochs per training run
EPOCHS = 10
# Where to store output model weights and softmax predictions
save_path = "/scratch/gpfs/eysu/CIFAR10_results/mock_supervised/"
#save_path = "/scratch/gpfs/eysu/mock_supervised_weights/LR_adjusted/"

for iteration in range(0,MAX_ITER):
    # If iteration is seed, train on original target vectors, else, train on y_hat from time t-1
    if iteration == 0:
        # One-hot encode the labels
        # Pass the randomized labels to the model as y_train
        y_train = tf.keras.utils.to_categorical(y_train_shuffle, 10)
        mpth = 'model.weights.best.hdf5'
        y_hat_test_name = 'y_hat_test_seed'
        y_hat_train_name = 'y_hat_train_seed'      
    elif iteration > 0:
        # Key step: set new targets as y_hat
        y_train = y_hat    
        mpth = 'model.weights.best.' + 'iter' + str(iteration) + '.hdf5'
        y_hat_test_name = 'y_hat_test_' + 'iter' + str(iteration)
        y_hat_train_name = 'y_hat_train_' + 'iter' + str(iteration)

    # Define the model: a small CNN model (could probably be done outside loop)
    model = tf.keras.Sequential()

    # Must define the input shape in the first layer of the neural network
    model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=2, padding='same', activation='relu', input_shape=(32,32,3))) 
    model.add(tf.keras.layers.MaxPooling2D(pool_size=2))
    model.add(tf.keras.layers.Dropout(0.3))

    model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=2, padding='same', activation='relu'))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=2))
    model.add(tf.keras.layers.Dropout(0.3))

    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(256, activation='relu'))
    model.add(tf.keras.layers.Dropout(0.5))
    model.add(tf.keras.layers.Dense(10, activation='softmax'))

    # Take a look at the model summary
    # model.summary()

    # define optimization and energy parameters
    # default learning rate for adam is 0.001
    opt = keras.optimizers.Adam(learning_rate=0.0001)
    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

    # Save checkpoints
    checkpointer = ModelCheckpoint(filepath= save_path + mpth, verbose = 1, save_best_only=True) #True
    # Train the model
    model.fit(x_train,
             y_train,
             batch_size=64,
             epochs=EPOCHS,
             validation_data=(x_valid, y_valid),
             callbacks=[checkpointer])

    # Load the weights with the best validation accuracy
    y_hat = model.predict(x_train) #feed back serial reproduction targets
    y_hat_test = model.predict(x_test)
    
    model.load_weights(save_path + mpth)
    # Evaluate the model on test set
    score = model.evaluate(x_test, y_test, verbose=0)
    # Print test accuracy
    print('\n', 'Test accuracy:', score[1])

    # Save results for each iteration in the serial reproduction chain
    np.save(save_path + y_hat_train_name + '.npy', y_train)
    print(save_path + y_hat_train_name)

    np.save(save_path + y_hat_test_name + '.npy', y_hat_test)
    print(save_path + y_hat_test_name)



# Ideas
- visualize the softmax matrices of 200 random images. One hot seed vectors should be random. Does the model perform better at categorizing each time? How will things converge?
- quantify the final categories predicted. What is the distribution between the 10 classes?
    - interesting thing to try might be line graphs showing the fraction of images in each class taken every 5 iterations. Hopefully will see changes in the lines over time?
- What if we don't provide an input # classes? This is done thorugh the .to_categorical step. Look into if there is a way to let this be determined naturally

# Initial Visualization

In [None]:
#######################################################
## Look at softmax output matrices for random images ##
#######################################################
# Number of learning iterations
MAX_ITER = 25
save_path = "/scratch/gpfs/eysu/CIFAR10_results/mock_supervised/"

y_hat_train_arr = np.zeros([y_train.shape[0], len(labels), MAX_ITER])
for i in range(MAX_ITER):
    if i == 0:
        y_hat_train_name = 'y_hat_train_seed'
       
    else:
        y_hat_train_name = 'y_hat_train_' + 'iter' + str(i)
        
    # Load test set softmax outputs 
    yhtr = np.load(save_path + y_hat_train_name + '.npy')

    # The first time through, use binary weight vectors to save seed array
    # Recall that these initial labels were randomized and do not correlate to 
    # the image's given class in the dataset
    
    if i == 0:
        true_class_tr = np.nonzero(yhtr)[1]   
    y_hat_train_arr[:, :, i] = yhtr

print(y_hat_train_arr.shape)
# (55000, 10, 25)

In [None]:
# check to see if all final prediction vectors are identical 

print(np.unique(y_hat_train_arr[:, 0, 24]))
print(np.unique(y_hat_train_arr[:, 1, 24]))
print(np.unique(y_hat_train_arr[:, 2, 24]))
print(np.unique(y_hat_train_arr[:, 3, 24]))
print(np.unique(y_hat_train_arr[:, 4, 24]))
print(np.unique(y_hat_train_arr[:, 5, 24]))
print(np.unique(y_hat_train_arr[:, 6, 24]))
print(np.unique(y_hat_train_arr[:, 7, 24]))
print(np.unique(y_hat_train_arr[:, 8, 24]))
print(np.unique(y_hat_train_arr[:, 9, 24]))

In [None]:
###################################################################
## Helper visualization function to print image/softmax matrices ##
###################################################################

from mpl_toolkits.axes_grid1 import make_axes_locatable
import warnings
warnings.filterwarnings('ignore')

def visualize_softmax(image_idx):

    # visualize each image
    figure = plt.figure(figsize=(40, 40))
    # plot image
    ax1 = figure.add_subplot(8, 8, 1, xticks=[], yticks=[])
    im1 = ax1.imshow(x_train[image_idx])
    ax1.set_title("Image: " + str(labels[y_train[image_idx]]))
    
    # plot weights graph
    ax2 = figure.add_subplot(8, 8, 2)
    im2 = ax2.imshow(y_hat_train_arr[image_idx, :, :].T, cmap='Wistia')

    divider = make_axes_locatable(ax2)
    cax = divider.append_axes('right', size='5%', pad=0.05)
    cbar = figure.colorbar(im2, cax=cax, orientation='vertical', ticks=[0, 1])
    cbar.ax.set_yticklabels(['0', '1'])

    ax2.set(xlabel='Classes', ylabel='Iterations', title='Softmax Outputs')
    ax2.set_xticks(ticks=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    ax2.set_xticklabels(labels, rotation = "vertical")

    pdf.savefig(figure, bbox_inches='tight')
    plt.show()
        


In [None]:
import warnings
warnings.filterwarnings('ignore')
import matplotlib.backends.backend_pdf

# rank images by range in final sofmax prediction vector

save_image_path = "Outputs/"
pdf = matplotlib.backends.backend_pdf.PdfPages(save_image_path + "200_mock_unsupervised_softmax_outputs.pdf")
for i in range(200):
    visualize_softmax(np.random.randint(0, y_hat_train_arr.shape[0]))
pdf.close()

# Examine output softmax predictions

In [None]:
print(np.unique(np.argmax(y_hat_train_arr[:, :, 24], axis=1)))

# this shows us that the digit predicted with the highest probability is always 1 for every image in the training set
# not spread equally across all classes -> this is just model bias

In [None]:
print(np.unique(y_hat_train_arr[:, 1, 24]))

# confirms that the predicted probability that the input image is of class 1 is always the same across all images

In [None]:
print(np.unique(y_hat_train_arr[:, 0, 24]))
print(np.unique(y_hat_train_arr[:, 1, 24]))
print(np.unique(y_hat_train_arr[:, 2, 24]))
print(np.unique(y_hat_train_arr[:, 3, 24]))
print(np.unique(y_hat_train_arr[:, 4, 24]))
print(np.unique(y_hat_train_arr[:, 5, 24]))
print(np.unique(y_hat_train_arr[:, 6, 24]))
print(np.unique(y_hat_train_arr[:, 7, 24]))
print(np.unique(y_hat_train_arr[:, 8, 24]))
print(np.unique(y_hat_train_arr[:, 9, 24]))

# confirms that the predicted probabilities for every class is the same regardless of the input images
# the randomization of the labels prevents the model from ever converging in its classification predictions