In [1]:
import numpy as np
import torch
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.activations import relu,linear
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.optimizers import Adam
import logging
from time import time

# Normal MNIST Images: 300 training, 600 test

In [2]:
#Now let's import non-distilled images from the MNIST dataset
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()
print(train_labels.shape)
print(test_labels.shape)


train_count = 300
test_count = 600
train_images = train_images[:train_count]
train_labels = train_labels[:train_count]
test_images = test_images[:test_count]
test_labels = test_labels[:test_count]


print("Training images shape:", train_images.shape)
print("Training labels shape:", train_labels.shape)
print("Testing images shape:", test_images.shape)
print("Testing labels shape:", test_labels.shape)

height, width = 28, 28
channels = 1

(60000,)
(10000,)
Training images shape: (300, 28, 28)
Training labels shape: (300,)
Testing images shape: (600, 28, 28)
Testing labels shape: (600,)


In [3]:
#Set global seed so that results don't vary across runs
tf.random.set_seed(1234)

#model trained on non-distilled MNIST images
model_normal = Sequential(
    [
    #conv layer 1 (relu)
    Conv2D(input_shape = (height, width, channels), filters=6, kernel_size=5, strides=1, padding='same', activation='relu', name='conv1'),
    #avg pooling
    AveragePooling2D(pool_size=2, strides=2, name='pooling1'),
    #conv layer 2 (relu)
    Conv2D(filters=16, kernel_size=5, strides=1, padding='valid', activation='relu', name='conv2'),
    #avg pooling
    AveragePooling2D(pool_size=2, strides=2, name='pooling2'),
    #conv layer 3 (relu)
    Conv2D(filters=120, kernel_size=5, strides=1, padding='valid', activation='relu', name='conv3'),
    #flatten
    Flatten(),
    #fully connected layer 1 (relu)
    Dense(84, activation='relu', name='dense1', kernel_regularizer=tf.keras.regularizers.l2(0.1)),
    #fully connected layer 2 (linear)
    Dense(10, activation='linear', name='dense2')
    ], name='LeNet-5'
)
# filters are the same as output channel

model_normal.compile(
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=1e-2),
    metrics=['accuracy']
)

In [4]:
start = time()

tf.keras.backend.clear_session()
model_normal.fit(train_images, train_labels, epochs=300)

#Display duration
duration_norm = time() - start
print(f'Took {duration_norm:.5f} seconds')

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

In [5]:
model_normal.summary()

Model: "LeNet-5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1 (Conv2D)              (None, 28, 28, 6)         156       
                                                                 
 pooling1 (AveragePooling2D  (None, 14, 14, 6)         0         
 )                                                               
                                                                 
 conv2 (Conv2D)              (None, 10, 10, 16)        2416      
                                                                 
 pooling2 (AveragePooling2D  (None, 5, 5, 16)          0         
 )                                                               
                                                                 
 conv3 (Conv2D)              (None, 1, 1, 120)         48120     
                                                                 
 flatten (Flatten)           (None, 120)               0   

In [6]:
# Calculate the categorization error
#y: target value
#yhat: predicted value
#cerr: % incorrect

def eval_cat_err(y, yhat):
    m = len(y)
    incorrect = 0
    for i in range(m):
       if yhat[i] != y[i]:
            incorrect+=1
    cerr = incorrect / m
    
    return(cerr)

# 1 Image per Class (5 samples = 50 images total): 30 training, 10 cv, 10 test

In [7]:
input = torch.load('<--filepath-->')        #<------Use filepath to the 1ipc .pt file

#Check visualization of a single synthetic set without labels
# print(input['data'][0][0])

#Reshape tensor objects from batch_size, channels, height, width -> batch_size, height, width, channels using torch.permute() and numpy
#Create list of tensor objects without labels
#Framework conflict: Must convert PyTorch tensors into TensorFlow tensors by converting list into numpy array then converting to TF tensors
X=[]
for i in range(5):
    permuted = input['data'][i][0].permute(0, 2, 3, 1)
    numpy_X = permuted.numpy()
    tf_X = tf.convert_to_tensor(numpy_X, dtype = tf.float32)
    X.append(tf_X)

#Split image data into 60, 20, 20 for training, testing, and cross-validation sets
X_train = X[:3]
X_cv = X[3]
X_test = X[4]

#Extract input_shape of a single image: (height, width, channels)
#Batch size = 1ipc * 10 classes
batch_size, height, width, channels = X_train[0].shape
print('Batch_size: ', batch_size, ' Height: ', height, ' Width: ', width, ' Channels: ', channels)

#Create list of label tensor objects
#Convert PyTorch tensors to TensorFlow tensors
y=[]
for i in range(5):
    numpy_y = input['data'][i][1].numpy()
    tf_y = tf.convert_to_tensor(numpy_y, dtype = tf.float32)
    y.append(tf_y)

#Split labels into 60, 20, 20 for training, testing, and cross-validation sets
y_train = y[:3]
y_cv = y[3]
y_test = y[4]

Batch_size:  10  Height:  28  Width:  28  Channels:  1


In [8]:
logging.getLogger("tensorflow").setLevel(logging.ERROR)

#Set global seed so that results don't vary across runs
tf.random.set_seed(1234)

model_condensed = Sequential(
    [
    #conv layer 1 (relu)
    Conv2D(input_shape = (height, width, channels), filters=6, kernel_size=5, strides=1, padding='same', activation='relu', name='conv1'),
    #avg pooling
    AveragePooling2D(pool_size=2, strides=2, name='pooling1'),
    #conv layer 2 (relu)
    Conv2D(filters=16, kernel_size=5, strides=1, padding='valid', activation='relu', name='conv2'),
    #avg pooling
    AveragePooling2D(pool_size=2, strides=2, name='pooling2'),
    #conv layer 3 (relu)
    Conv2D(filters=120, kernel_size=5, strides=1, padding='valid', activation='relu', name='conv3'),
    #flatten
    Flatten(),
    #fully connected layer 1 (relu)
    Dense(84, activation='relu', name='dense1', kernel_regularizer=tf.keras.regularizers.l2(0.1)),
    #fully connected layer 2 (linear)
    Dense(10, activation='linear', name='dense2')
    ], name='LeNet-5'
)
# filters are the same as output channel

model_condensed.compile(
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=1e-2),
    metrics=['accuracy']
)

In [9]:
#Combine input tensors since model expects 1 input tensor (simplifies process so we can treat input as single entity instead of multiple for each tensor)
#Initialize lists so that their values are reset every time the Jupyter notebook is run
X_train_combined=[]
y_train_combined=[]
X_temp=X_train[0]
y_temp=y_train[0]

#Essentially a running sum where you add the ith + 1 element in each iteration until reaching the end of the list
for i in range(len(X_train)):
    try:
        X_temp = np.concatenate((X_temp, X_train[i+1]), axis=0)
        y_temp = np.concatenate((y_temp, y_train[i+1]), axis=0)
    except:
        X_train_combined = X_temp
        y_train_combined = y_temp
        break

start = time()

tf.keras.backend.clear_session()
model_condensed.fit(X_train_combined, y_train_combined, epochs=300, validation_data=(X_cv, y_cv))

#Display duration
duration_1 = time() - start
print(f'Took {duration_1:.5f} seconds')

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

In [10]:
#make a model for plotting routines to call
model_predict = lambda Xl: np.argmax(tf.nn.softmax(model_condensed.predict(Xl)).numpy(),axis=1)

training_cerr_1 = eval_cat_err(y_train_combined, model_predict(X_train_combined))
cv_cerr_1 = eval_cat_err(y_cv, model_predict(X_cv))
test_cerr_1 = eval_cat_err(y_test, model_predict(X_test))
print(f"Training Accuracy         (regularized, 1ipc): {1-training_cerr_1:0.7f}" )
print(f"Cross Validation Accuracy (regularized, 1ipc): {1-cv_cerr_1:0.7f}" )
print(f"Test Accuracy             (regularized, 1ipc): {1-test_cerr_1:0.7f}" )

Training Accuracy         (regularized, 1ipc): 1.0000000
Cross Validation Accuracy (regularized, 1ipc): 1.0000000
Test Accuracy             (regularized, 1ipc): 1.0000000


# 10 Images per Class (5 samples = 500 images total): 300 training, 100 cv, 100 test

In [11]:
input_10 = torch.load('<--filepath-->')      #<------Use filepath to the 10ipc .pt file

# print(input_10['data'][0])

#Create list of tensor objects without labels
X_10=[]
for i in range(5):
    permuted = input_10['data'][i][0].permute(0, 2, 3, 1)
    numpy_X = permuted.numpy()
    tf_X = tf.convert_to_tensor(numpy_X, dtype = tf.float32)
    X_10.append(tf_X)

#Split image data into 60, 20, 20 for training, testing, and cross-validation sets
X10_train = X_10[:3]
X10_cv = X_10[3]
X10_test = X_10[4]

#Batch size = 10ipc * 10 classes
batch_size_10, height_10, width_10, channels_10 = X10_train[0].shape
print('Batch_size: ', batch_size_10, ' Height: ', height_10, ' Width: ', width_10, ' Channels: ', channels_10)

#Create list of label tensor objects
y_10=[]
for i in range(5):
    numpy_y = input_10['data'][i][1].numpy()
    tf_y = tf.convert_to_tensor(numpy_y, dtype = tf.float32)
    y_10.append(tf_y)

#Split labels into 60, 20, 20 for training, testing, and cross-validation sets
y10_train = y_10[:3]
y10_cv = y_10[3]
y10_test = y_10[4]

Batch_size:  100  Height:  28  Width:  28  Channels:  1


In [12]:
#To prevent an excessive messages when running
logging.getLogger("tensorflow").setLevel(logging.ERROR)

#Set global seed so that results don't vary across runs
tf.random.set_seed(1234)

model_10_condensed = Sequential(
    [
    #conv layer 1 (relu)
    Conv2D(input_shape = (height_10, width_10, channels_10), filters=6, kernel_size=5, strides=1, padding='same', activation='relu', name='conv1'),
    #avg pooling
    AveragePooling2D(pool_size=2, strides=2, name='pooling1'),
    #conv layer 2 (relu)
    Conv2D(filters=16, kernel_size=5, strides=1, padding='valid', activation='relu', name='conv2'),
    #avg pooling
    AveragePooling2D(pool_size=2, strides=2, name='pooling2'),
    #conv layer 3 (relu)
    Conv2D(filters=120, kernel_size=5, strides=1, padding='valid', activation='relu', name='conv3'),
    #flatten
    Flatten(),
    #fully connected layer 1 (relu)
    Dense(84, activation='relu', name='dense1', kernel_regularizer=tf.keras.regularizers.l2(0.1)),
    #fully connected layer 2 (linear)
    Dense(10, activation='linear', name='dense2')
    ], name='LeNet-5'
)
# filters are the same as output channel

model_10_condensed.compile(
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=1e-2),
    metrics=['accuracy']
)

In [19]:
#Initialize lists so that their values are reset every time the Jupyter notebook is run
X10_train_combined=[]
y10_train_combined=[]
X_temp=X10_train[0]
y_temp=y10_train[0]

#Essentially a running sum where you add the ith + 1 element in each iteration until reaching the end of the list
for i in range(len(X10_train)):
    try:
        X_temp = np.concatenate((X_temp, X10_train[i+1]), axis=0)
        y_temp = np.concatenate((y_temp, y10_train[i+1]), axis=0)
    except:
        X10_train_combined = X_temp
        y10_train_combined = y_temp
        break

start = time()

tf.keras.backend.clear_session()
model_10_condensed.fit(X10_train_combined, y10_train_combined, epochs=300, validation_data=(X10_cv, y10_cv))

# calculate and report duration of concatenation
duration_10 = time() - start
print(f'Took {duration_10:.5f} seconds')

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

In [20]:
#make a model for plotting routines to call
model_predict_10 = lambda Xl: np.argmax(tf.nn.softmax(model_10_condensed.predict(Xl)).numpy(),axis=1)

training_cerr_10 = eval_cat_err(y10_train_combined, model_predict_10(X10_train_combined))
cv_cerr_10 = eval_cat_err(y10_cv, model_predict_10(X10_cv))
test_cerr_10 = eval_cat_err(y10_test, model_predict_10(X10_test))
print(f"Training Accuracy         (regularized, 10ipc): {1-training_cerr_10:0.7f}" )
print(f"Cross Validation Accuracy (regularized, 10ipc): {1-cv_cerr_10:0.7f}" )
print(f"Test Accuracy             (regularized, 10ipc): {1-test_cerr_10:0.7f}" )

Training Accuracy         (regularized, 10ipc): 0.9966667
Cross Validation Accuracy (regularized, 10ipc): 0.9300000
Test Accuracy             (regularized, 10ipc): 0.9300000


# Comparing Training Accuracies

In [21]:
#Model trained on normal MNIST data
#Tested on normal MNIST data

#make a model for plotting routines to call
model_predict = lambda Xl: np.argmax(tf.nn.softmax(model_normal.predict(Xl)).numpy(),axis=1)

training_cerr_norm = eval_cat_err(train_labels, model_predict(train_images))
test_cerr_norm = eval_cat_err(test_labels, model_predict(test_images))



In [22]:
#Model trained on 1ipc distilled MNIST data
#Tested on normal MNIST data

#make a model for plotting routines to call
model_predict = lambda Xl: np.argmax(tf.nn.softmax(model_condensed.predict(Xl)).numpy(),axis=1)

training_cerr_1 = eval_cat_err(y_train_combined, model_predict(X_train_combined))
test_cerr_1 = eval_cat_err(test_labels, model_predict(test_images))



In [23]:
#Model trained on 10ipc distilled MNIST data
#Tested on normal MNIST data

#make a model for plotting routines to call
model_predict_10 = lambda Xl: np.argmax(tf.nn.softmax(model_10_condensed.predict(Xl)).numpy(),axis=1)

training_cerr_10 = eval_cat_err(y10_train_combined, model_predict_10(X10_train_combined))
test_cerr_10 = eval_cat_err(test_labels, model_predict_10(test_images))



In [24]:
print(f"Training Accuracy     (regularized, non-distilled, 300 images): {1-training_cerr_norm:0.7f}" )
print(f"Test Accuracy         (regularized, non-distilled, 600 images): {1-test_cerr_norm:0.7f}" )
print(f"Time to Train                     (regularized, non-distilled): {duration_norm:0.5f}" )
print(f"Training Accuracy    (regularized, distilled: 1ipc, 30 images): {1-training_cerr_1:0.7f}" )
print(f"Test Accuracy        (regularized, distilled: 1ipc, 10 images): {1-test_cerr_1:0.7f}" )
print(f"Training Accuracy  (regularized, distilled: 10ipc, 300 images): {1-training_cerr_10:0.7f}" )
print(f"Test Accuracy      (regularized, distilled: 10ipc, 600 images): {1-test_cerr_10:0.7f}" )
print(f"Time to Train      (regularized, distilled: 10ipc, 300 images): {duration_10:0.5f}" )

Training Accuracy     (regularized, non-distilled, 300 images): 0.9500000
Test Accuracy         (regularized, non-distilled, 600 images): 0.7216667
Time to Train                     (regularized, non-distilled): 14.38021
Training Accuracy    (regularized, distilled: 1ipc, 30 images): 1.0000000
Test Accuracy        (regularized, distilled: 1ipc, 10 images): 0.7283333
Training Accuracy  (regularized, distilled: 10ipc, 300 images): 0.9966667
Test Accuracy      (regularized, distilled: 10ipc, 600 images): 0.9250000
Time to Train      (regularized, distilled: 10ipc, 300 images): 16.44799
