In [1]:
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
import tensorflow.keras as keras
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import optimizers
import tensorflow as tf
from keras.utils import np_utils
from keras.models import load_model
from keras.datasets import cifar10
from keras.preprocessing import image
import numpy as np
import matplotlib.pyplot as plt

from simple_contrastive_loss import contrastive_loss

# Load CIFAR10 Dataset

In [2]:
##############################################
## Load, partition, and resize CIFAR10 Data ##
##############################################
def loadData():
    import pickle

    # unpickle the binary files
    def unpickle(file):
        with open(file, 'rb') as fo:
            dict = pickle.load(fo, encoding='bytes')
        return dict

    labels = ['airplane',  # index 0
          'automobile',  # index 1
          'bird',  # index 2 
          'cat',  # index 3 
          'deer',  # index 4
          'dog',  # index 5
          'frog',  # index 6 
          'horse',  # index 7 
          'ship',  # index 8 
          'truck']  # index 9
    
    # paths to each batch of data
    batch1 = unpickle("/scratch/gpfs/eysu/src_data/cifar-10-batches-py/data_batch_1")
    batch2 = unpickle("/scratch/gpfs/eysu/src_data/cifar-10-batches-py/data_batch_2")
    batch3 = unpickle("/scratch/gpfs/eysu/src_data/cifar-10-batches-py/data_batch_3")
    batch4 = unpickle("/scratch/gpfs/eysu/src_data/cifar-10-batches-py/data_batch_4")
    batch5 = unpickle("/scratch/gpfs/eysu/src_data/cifar-10-batches-py/data_batch_5")
    meta = unpickle("/scratch/gpfs/eysu/src_data/cifar-10-batches-py/batches.meta")
    test = unpickle("/scratch/gpfs/eysu/src_data/cifar-10-batches-py/test_batch")

    # separate labels and image data from each batch
    y_train1 = batch1[b'labels']
    x_train1 = batch1[b'data']
    y_train2 = batch2[b'labels']
    x_train2 = batch2[b'data']
    y_train3 = batch3[b'labels']
    x_train3 = batch3[b'data']
    y_train4 = batch4[b'labels']
    x_train4 = batch4[b'data']
    y_train5 = batch5[b'labels']
    x_train5 = batch5[b'data']

    # concatenate into big training and testing arrays
    y_train = np.concatenate((y_train1, y_train2, y_train3, y_train4, y_train5))
    x_train = np.concatenate((x_train1, x_train2, x_train3, x_train4, x_train5), axis=0)
    
    y_test = test[b'labels']
    x_test = test[b'data']
    
    # Further break training data into train / validation sets 
    # put 5000 into validation set and keep remaining 45,000 for train
    (x_train, x_valid) = x_train[1000:], x_train[:1000] 
    (y_train, y_valid) = y_train[1000:], y_train[:1000]

    # reshape data to match dimensions of cifar10.load_data
    x_train = x_train.reshape(49000, 3, 32, 32)
    x_train = x_train.transpose(0, 2, 3, 1)
    x_train = x_train.astype('float32')
    x_train /= 255

    x_valid = x_valid.reshape(1000, 3, 32, 32)
    x_valid = x_valid.transpose(0, 2, 3, 1)
    x_valid = x_valid.astype('float32')
    x_valid /= 255

    x_test = x_test.reshape(10000, 3, 32, 32)
    x_test = x_test.transpose(0, 2, 3, 1)
    x_test = x_test.astype('float32')
    x_test /= 255
    
    y_train = np.array(y_train)
    y_valid = np.array(y_valid)
    y_test = np.array(y_test)
    
#     y_train = tf.keras.utils.to_categorical(y_train, 10)
#     y_valid = tf.keras.utils.to_categorical(y_valid, 10)
#     y_test = tf.keras.utils.to_categorical(y_test, 10)

    
    # preprocess data to convert from RGB -> BGR and to zero center around ImageNet dataset
    x_train = tf.keras.applications.resnet50.preprocess_input(x_train)
    x_valid = tf.keras.applications.resnet50.preprocess_input(x_valid)
    x_test = tf.keras.applications.resnet50.preprocess_input(x_test)
    
    return x_train, x_valid, x_test, y_train, y_valid, y_test, labels

In [3]:
x_train, x_valid, x_test, y_train, y_valid, y_test, labels = loadData()


# Build Model with Custom Contrastive Loss Function

In [4]:
#####################################################
## Load weights for ResNet50 & add classifier head ##
#####################################################
def build_base_model(sims, bsz):
    conv_base = tf.keras.models.load_model("~/scratch/gpfs/eysu/SoftCL/models/ResNet50_weights")
    
    # add classifier on top of conv_base
    model = models.Sequential()

    # upsample to resize inputs of CIFAR10 from (32x32x3) to (256x256x3)
    model.add(layers.UpSampling2D(size=(2,2)))
    model.add(layers.UpSampling2D(size=(2,2)))
    model.add(layers.UpSampling2D(size=(2,2)))
    model.add(conv_base)
    model.add(layers.Flatten())
    model.add(layers.BatchNormalization())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dropout(0.5))
    model.add(layers.BatchNormalization())
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dropout(0.5))
    model.add(layers.BatchNormalization())
    model.add(layers.Dense(10, activation='softmax'))
    
    embed_layer = model.layers[-4]
    
    loss_params = (sims, embed_layer, bsz)
    
    model.compile(optimizer='adam', loss=contrastive_loss(loss_params), metrics=['accuracy'], run_eagerly=True)
 
    return model

In [5]:
# utility to display training and validation curves
def plot_metrics(metric_name, title):
    
    plt.plot(history.history[metric_name],color='blue',label='training_' + metric_name)
    plt.plot(history.history['val_' + metric_name],color='green',label='val_' + metric_name)
    
    if metric_name == 'loss':
        plt.ylim([0,200])

    elif metric_name == 'accuracy':
        plt.ylim([0,1])

    plt.xlabel("epochs")
    plt.legend()
    plt.title(title)
    plt.show()

In [None]:
# plot metrics
plot_metrics("loss", "Loss Curve")
plot_metrics("accuracy", "Accuracy Curve")

# Use SimCLR similarity data

In [None]:
from simCLR_CIFAR import findSims
# load data
x_train, x_valid, x_test, y_train, y_valid, y_test, labels = loadData()

# generate similarity matrix
sims = findSims(x_test, y_test)

# build model
model = build_base_model(sims, 64)

# expand labels
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_valid = tf.keras.utils.to_categorical(y_valid, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

# train model
history = model.fit(x_test, y_test, epochs=100, batch_size=64, validation_data = (x_valid, y_valid))

# plot metrics
plot_metrics("loss", "Loss Curve")
plot_metrics("accuracy", "Accuracy Curve")

# Use SupCon Similarity Data

In [None]:
from supCon_CIFAR import findSims
# load data
x_train, x_valid, x_test, y_train, y_valid, y_test, labels = loadData()

# generate similarity matrix
sims = findSims(x_test, y_test)

# build model
model = build_base_model(sims, 64)

# expand labels
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_valid = tf.keras.utils.to_categorical(y_valid, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

# train model
history = model.fit(x_test, y_test, epochs=100, batch_size=64, validation_data = (x_valid, y_valid))

# plot metrics
plot_metrics("loss", "Loss Curve")
plot_metrics("accuracy", "Accuracy Curve")

# Use SoftCL Similarity Data

In [None]:
from softCLSims import findSims
# load data
x_train, x_valid, x_test, y_train, y_valid, y_test, labels = loadData()

# generate similarity matrix
soft_sims = np.load('/scratch/gpfs/eysu/src_data/cifar-10h/data/pairwise_sims.npy')
sims = findSims(x_test, y_test, soft_sims)

# build model
model = build_base_model(sims, 64)

# expand labels
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_valid = tf.keras.utils.to_categorical(y_valid, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

# train model
history = model.fit(x_test, y_test, epochs=100, batch_size=64, validation_data = (x_valid, y_valid))

# plot metrics
plot_metrics("loss", "Loss Curve")
plot_metrics("accuracy", "Accuracy Curve")