In [None]:
#As can be seen from runs prior to run_2021_12_29-00_26_20 ForcedLearning20Cifar100Adam, the categorical
#accuracy progresses much more slowly than the vanilla variant of the model. I hypothesize that this is
#because the model has to play "Catch-up" with itself, because each block is learning independently. This
#causes each successive block in the model to spend the next batch adjusting to what the previous batch just
#learned, and not gaining any intelligence. I have rectified this by allowing each training step to have the
#model pass through one layer, update that specific model_block, pass through another, update that block wrt
#the first and second block, and so on. This has increased the time, but vastly increased the speed at which
#the model learns, even surpassing the vanilla model at the beginning of the training.

#This new forced learning seems to cap out at about 68 percent cat. accuracy, perhaps it is unable to learn
#lower features? Perhaps it would fair well with a larger model, and different dataset. I think I should try
#creating different optimizers with different learning rates for the model.

In [5]:
import tensorflow as tf
import numpy as np

batch_size = 128

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar100.load_data()

x_test, x_val = np.array_split(x_test, 2)
y_test, y_val = np.array_split(y_test, 2)

assert x_train.shape == (50000, 32, 32, 3)
assert x_val.shape == (5000, 32, 32, 3)
assert x_test.shape == (5000, 32, 32, 3)

assert y_train.shape == (50000, 1)
assert y_val.shape == (5000, 1)
assert y_test.shape == (5000, 1)

import os

root_logdir = os.path.join(os.curdir, "my_logs")
def get_run_logdir(model_name):
    import time
    run_id = time.strftime("run_%Y_%m_%d-%H_%M_%S") + " " + model_name
    return os.path.join(root_logdir, run_id)

In [6]:
y_train_onehot = tf.one_hot(np.squeeze(y_train), 100)
y_val_onehot = tf.one_hot(np.squeeze(y_val), 100)
y_test_onehot = tf.one_hot(np.squeeze(y_test), 100)

x_train = x_train.astype("float32")/255
x_val = x_val.astype("float32")/255
x_test = x_test.astype("float32")/255


In [7]:
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train_onehot))
val_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test_onehot))
test_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val_onehot))

train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size).prefetch(1)
val_dataset = val_dataset.shuffle(buffer_size=1024).batch(batch_size).prefetch(1)
test_dataset = test_dataset.shuffle(buffer_size=1024).batch(batch_size).prefetch(1)

In [36]:
for line in train_dataset.take(1):
    input_shape = line[0][0].numpy().shape
    
tf.keras.Input(shape=input_shape)

<KerasTensor: shape=(None, 32, 32, 3) dtype=float32 (created by layer 'input_3')>

In [None]:
model_16 = tf.keras.Sequential(
    [
        tf.keras.Input(shape=(32,32,3)),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
    ], name = "block_1"
)

model_32 = tf.keras.Sequential(
    [
        tf.keras.Input(shape=(32,32,16)),
        tf.keras.layers.Conv2D(32, 3, strides=(2,2), padding="same", activation='relu'),
        tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
    ], name = "block_2"
)

model_64 = tf.keras.Sequential(
    [
        tf.keras.Input(shape=(16,16,32)),
        tf.keras.layers.Conv2D(64, 3, strides=(2,2), padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.AveragePooling2D(pool_size=(8,8), strides=(1, 1), padding='valid'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(100, activation="softmax")
    ], name = "block_3"
)

model_16.summary()
model_32.summary()
model_64.summary()

In [None]:
block_1 = tf.keras.Sequential(
    [
        tf.keras.Input(shape=(32,32,3)),
        tf.keras.layers.Conv2D(64, 1, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(256, 1, padding="same", activation='relu'),
        
        tf.keras.layers.Conv2D(64, 1, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(256, 1, padding="same", activation='relu'),
        
        tf.keras.layers.Conv2D(64, 1, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(256, 1, padding="same", activation='relu')
    ], name = "block_1"
)

block_2 = tf.keras.Sequential(
    [
        tf.keras.Input(shape=(32,32,256)),
        tf.keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2,2), padding="same"),
        tf.keras.layers.Conv2D(128, 1, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(128, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(512, 1, padding="same", activation='relu'),
        
        tf.keras.layers.Conv2D(128, 1, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(128, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(512, 1, padding="same", activation='relu'),
        
        tf.keras.layers.Conv2D(128, 1, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(128, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(512, 1, padding="same", activation='relu'),
        
        tf.keras.layers.Conv2D(128, 1, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(128, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(512, 1, padding="same", activation='relu'),
    ], name = "block_2"
)

block_3 = tf.keras.Sequential(
    [
        tf.keras.Input(shape=(16,16,512)),
        tf.keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2,2), padding="same"),
        tf.keras.layers.Conv2D(512, 1, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(512, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(2048, 1, padding="same", activation='relu'),
        
        tf.keras.layers.Conv2D(512, 1, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(512, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(2048, 1, padding="same", activation='relu'),
        
        tf.keras.layers.Conv2D(512, 1, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(512, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(2048, 1, padding="same", activation='relu'),
        tf.keras.layers.AveragePooling2D(pool_size=(7,7), strides=(1, 1), padding='valid'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(100, activation="softmax")
    ], name = "block_3"
)

block_1.summary()
block_2.summary()
block_3.summary()

In [None]:
model_20_vanilla = tf.keras.Sequential(
    [
        tf.keras.Input(shape=(32,32,3)),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),    
        tf.keras.layers.Conv2D(32, 3, strides=(2,2), padding="same", activation='relu'),
        tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, strides=(2,2), padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.AveragePooling2D(pool_size=(8,8), strides=(1, 1), padding='valid'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(100, activation="softmax")
    ]
)

In [55]:
#Miniature Model, to be compared to ForcedLearner Small variants

mini_model = tf.keras.Sequential(
    [
        tf.keras.Input(shape=(32,32,3)),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),

        tf.keras.layers.AveragePooling2D(pool_size=(32,32), strides=(1, 1), padding='valid'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(100, activation="softmax")
    ], name="mini_model"
)

mini_model.summary()

Model: "mini_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_39 (Conv2D)          (None, 32, 32, 16)        448       
                                                                 
 average_pooling2d_15 (Avera  (None, 1, 1, 16)         0         
 gePooling2D)                                                    
                                                                 
 flatten_15 (Flatten)        (None, 16)                0         
                                                                 
 dense_15 (Dense)            (None, 100)               1700      
                                                                 
Total params: 2,148
Trainable params: 2,148
Non-trainable params: 0
_________________________________________________________________


In [1]:
#Boilerplater model architectures for ForcedLearner Small variants

small_1 = tf.keras.Sequential(
    [
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
        tf.keras.layers.Conv2D(32, 3, strides=(2,2), padding="same", activation='relu'),
        tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
    ], name="small_1"
)

auxillary = tf.keras.Sequential(
    [
        tf.keras.layers.AveragePooling2D(pool_size=(8,8), strides=(1, 1), padding='valid'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(100, activation="softmax")
    ]
)

inputs = tf.keras.Input(shape=(32,32,3))
x = small_1(inputs)
outputs = auxillary(x)

small_model_pretrain = tf.keras.Model(inputs=inputs, outputs=outputs,name="small_model_pretrain")

small_2 = tf.keras.Sequential(
    [
        tf.keras.Input(shape=(16,16,32)),
        tf.keras.layers.Conv2D(64, 3, strides=(2,2), padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.AveragePooling2D(pool_size=(8,8), strides=(1, 1), padding='valid'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(100, activation="softmax")
    ], name="small_2"
)

small_1.summary()
smal#Training algorithm and model setup for ForcedLearner Small variants

mae_metric_1 = tf.keras.metrics.MeanAbsoluteError(name="mae")
accuracy_metric_1 = tf.keras.metrics.CategoricalAccuracy(name="categorical_accuracy")

mae_metric_2 = tf.keras.metrics.MeanAbsoluteError(name="mae")
accuracy_metric_2 = tf.keras.metrics.CategoricalAccuracy(name="categorical_accuracy")

class ForcedNetSmall(tf.keras.Model):
    def __init__(self, block_1, block_2, auxillary):
        super(ForcedNetSmall, self).__init__()
        
        inputs = tf.keras.Input(shape=(32,32,3))
        
        self.auxillary_1 = auxillary
        
        self.model_1 = block_1
        
        self.model_2 = block_2

        
    def compile(self, optimizer, loss_fn):
        super(ForcedNetSmall, self).compile()
        self.optimizer = optimizer
        self.loss_fn = loss_fn
    
    @property
    def metrics(self):
        return [mae_metric_1, accuracy_metric_1, 
                mae_metric_2, accuracy_metric_2]
        
    def call(self, images):
        x = self.block_1(images)
        x = self.block_2(x)
        return self.block_3(x)
    
    def summary(self):
        self.model_1.summary()
        self.model_2.summary()
        print("\nAuxillary Layers:")
        self.auxillary_1.summary()
        
    def train_step(self, data):
        images = data[0]
        labels = data[1]

        with tf.GradientTape(persistent=True) as tape:   
            x = self.model_1(images)
            predictions_1 = self.auxillary_1(x)
            
            loss_1 = self.loss_fn(labels, predictions_1)
        
        
        #Second model part
        with tf.GradientTape(persistent=True) as tape:
            x = self.model_2(images)
            predictions_2 = self.auxillary_2(x)
            
            loss_2 = self.loss_fn(labels, predictions_2)
            
        grads = tape.gradient(loss_2, self.model_2.trainable_weights)
        grads_output = tape.gradient(loss_2, self.auxillary_2.trainable_weights)
        
        self.optimizer.apply_gradients(
            zip(grads, self.model_2.trainable_weights,)
        )
        self.optimizer.apply_gradients(
            zip(grads_output, self.auxillary_2.trainable_weights)
        )
        
        mae_metric_1.update_state(labels, predictions_1)
        accuracy_metric_1.update_state(labels, predictions_1) 
        
        mae_metric_2.update_state(labels, predictions_2)
        accuracy_metric_2.update_state(labels, predictions_2)
        
        return {"Block_1_Loss": loss_1,
                "Block_2_Loss": loss_2,
                
                "Block_1_MAE": mae_metric_1.result(),
                "Block_2_MAE": mae_metric_2.result(), 
                
                "Block_1_Accuracy": accuracy_metric_1.result(),
                "Block_2_Accuracy": accuracy_metric_2.result(), }l_2.summary()
mini_model.summary()

NameError: name 'tf' is not defined

In [93]:
model_list = [
        
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
        tf.keras.layers.Conv2D(32, 3, strides=(2,2), padding="same", activation='relu'),
        tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, strides=(2,2), padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
]

mini_model = [
    tf.keras.Sequential(
        [
            tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
            tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
            tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
            tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
            tf.keras.layers.Conv2D(32, 3, strides=(2,2), padding="same", activation='relu'),
            tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
            tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
            tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
        ], name="small_1"
    ), tf.keras.Sequential(
        [
            tf.keras.Input(shape=(16,16,32)),
            tf.keras.layers.Conv2D(64, 3, strides=(2,2), padding="same", activation='relu'),
            tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
            tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
            tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        ], name="small_2"
    )
    
]

type(mini_model)

list

In [94]:
auxillary = tf.keras.Sequential(
    [
        tf.keras.layers.AveragePooling2D(pool_size=(8,8), strides=(1, 1), padding='valid'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(100, activation="softmax")
    ]
)
callback = [tf.keras.callbacks.ReduceLROnPlateau(monitor='Block_2_Loss', patience=7)]

for i in range(12):
    x = tf.keras.Input(shape=(32,32,3))
    x = [model_list(x) for _ in range(0, i)]
    outputs = auxillary(x)
    
    model = 
    
    model = ForcedNetSmall(model_pretrain, layer)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.003),
        loss_fn=tf.keras.losses.CategoricalCrossentropy(),
    )
    
    history = model.fit(train_dataset, epochs=16, callbacks=[callback])
    model.pop()
    model_pretrain = model

Consider rewriting this model with the Functional API.


ValueError: Exception encountered when calling layer "sequential_29" (type Sequential).

Layer "average_pooling2d_28" expects 1 input(s), but it received 0 input tensors. Inputs received: []

Call arguments received:
  • inputs=[]
  • training=None
  • mask=None

In [7]:
#Training algorithm and model setup for ForcedLearner Small variants

mae_metric_1 = tf.keras.metrics.MeanAbsoluteError(name="mae")
accuracy_metric_1 = tf.keras.metrics.CategoricalAccuracy(name="categorical_accuracy")
print(
mae_metric_2 = tf.keras.metrics.MeanAbsoluteError(name="mae")
accuracy_metric_2 = tf.keras.metrics.CategoricalAccuracy(name="categorical_accuracy")

class ForcedNetSmall(tf.keras.Model):
    def __init__(self, block_1, block_2, auxillary):
        super(ForcedNetSmall, self).__init__()
        
        inputs = tf.keras.Input(shape=(32,32,3))
        
        self.auxillary_1 = auxillary
        
        self.model_1 = block_1
        
        self.model_2 = block_2

        
    def compile(self, optimizer, loss_fn):
        super(ForcedNetSmall, self).compile()
        self.optimizer = optimizer
        self.loss_fn = loss_fn
    
    @property
    def metrics(self):
        return [mae_metric_1, accuracy_metric_1, 
                mae_metric_2, accuracy_metric_2]
        
    def call(self, images):
        x = self.block_1(images)
        x = self.block_2(x)
        return self.block_3(x)
    
    def summary(self):
        self.model_1.summary()
        self.model_2.summary()
        print("\nAuxillary Layers:")
        self.auxillary_1.summary()
        
    def train_step(self, data):
        images = data[0]
        labels = data[1]

        with tf.GradientTape(persistent=True) as tape:   
            x = self.model_1(images)
            predictions_1 = self.auxillary_1(x)
            
            loss_1 = self.loss_fn(labels, predictions_1)
        
        
        #Second model part
        with tf.GradientTape(persistent=True) as tape:
            x = self.model_2(images)
            predictions_2 = self.auxillary_2(x)
            
            loss_2 = self.loss_fn(labels, predictions_2)
            
        grads = tape.gradient(loss_2, self.model_2.trainable_weights)
        grads_output = tape.gradient(loss_2, self.auxillary_2.trainable_weights)
        
        self.optimizer.apply_gradients(
            zip(grads, self.model_2.trainable_weights,)
        )
        self.optimizer.apply_gradients(
            zip(grads_output, self.auxillary_2.trainable_weights)
        )
        
        mae_metric_1.update_state(labels, predictions_1)
        accuracy_metric_1.update_state(labels, predictions_1) 
        
        mae_metric_2.update_state(labels, predictions_2)
        accuracy_metric_2.update_state(labels, predictions_2)
        
        return {"Block_1_Loss": loss_1,
                "Block_2_Loss": loss_2,
                
                "Block_1_MAE": mae_metric_1.result(),
                "Block_2_MAE": mae_metric_2.result(), 
                
                "Block_1_Accuracy": accuracy_metric_1.result(),
                "Block_2_Accuracy": accuracy_metric_2.result(), }

In [8]:
#Model compilation and training for standard "Mini model", to be compared to ForcedLearner small

run_logdir = get_run_logdir("MiniModelVanilla_Cifar100")
callback = [tf.keras.callbacks.ReduceLROnPlateau(monitor='loss', patience=7),
            tf.keras.callbacks.TensorBoard(run_logdir)]

metrics = [tf.keras.metrics.MeanAbsoluteError(name="mae"),
            tf.keras.metrics.CategoricalAccuracy(name="categorical_accuracy")]

mini_model.compile(loss=tf.keras.losses.CategoricalCrossentropy(),
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.0003), metrics=metrics)

history = mini_model.fit(train_dataset, epochs=200, callbacks=[callback])

Epoch 1/200


2022-01-01 21:08:25.174892: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8200
2022-01-01 21:08:28.200164: I tensorflow/stream_executor/cuda/cuda_blas.cc:1774] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
 55/391 [===>..........................] - ETA: 1s - loss: 2.7718 - mae: 0.0166 - categorical_accuracy: 0.2991

KeyboardInterrupt: 

In [None]:
#ForcedLearner Small compilation and training

metrics = [tf.keras.metrics.MeanAbsoluteError(name="mae"),
            tf.keras.metrics.CategoricalAccuracy(name="categorical_accuracy")]

run_logdir = get_run_logdir("SmallForced_pretrain_Cifar100")

callback = [tf.keras.callbacks.ReduceLROnPlateau(monitor='loss', patience=7)]
small_model_pretrain.compile(
    loss=tf.keras.losses.CategoricalCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.003), metrics=[metrics]
)

history = small_model_pretrain.fit(train_dataset, epochs=125, callbacks=[callback])

run_logdir = get_run_logdir("SmallForcedLearner_Cifar100")

ForcedSmall = ForcedNetSmall(small_1, small_2, auxillary)

callback = [tf.keras.callbacks.ReduceLROnPlateau(monitor='Block_2_Loss', patience=7)]
ForcedSmall.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.003),
    loss_fn=tf.keras.losses.CategoricalCrossentropy(),
)

history = ForcedSmall.fit(train_dataset, epochs=75, callbacks=[callback])

In [None]:
#Deprecated training for OG ForcedLearning

mae_metric_16 = tf.keras.metrics.MeanAbsoluteError(name="mae")
accuracy_metric_16 = tf.keras.metrics.CategoricalAccuracy(name="categorical_accuracy")

mae_metric_32 = tf.keras.metrics.MeanAbsoluteError(name="mae")
accuracy_metric_32 = tf.keras.metrics.CategoricalAccuracy(name="categorical_accuracy")

mae_metric_64 = tf.keras.metrics.MeanAbsoluteError(name="mae")
accuracy_metric_64 = tf.keras.metrics.CategoricalAccuracy(name="categorical_accuracy")

class ForcedNet(tf.keras.Model):
    def __init__(self, block_1, block_2, block_3):
        super(ForcedNet, self).__init__()
        
        inputs = tf.keras.Input(shape=(32,32,3))
        
        self.auxillary_1 = tf.keras.Sequential(
            [
                tf.keras.layers.AveragePooling2D(pool_size=(32,32), strides=(1, 1), padding='valid'),
                tf.keras.layers.Flatten(),
                tf.keras.layers.Dense(100, activation="softmax")
            ]
        )
        
        self.auxillary_2 = tf.keras.Sequential(
            [
                tf.keras.layers.AveragePooling2D(pool_size=(16,16), strides=(1, 1), padding='valid'),
                tf.keras.layers.Flatten(),
                tf.keras.layers.Dense(100, activation="softmax")
            ]
        )
        
        self.model_1 = block_1

        x = block_1(inputs)
        output = block_2(x)
        
        self.model_2 = tf.keras.Model(inputs=inputs, outputs=output, name="section_2")
        
        x = self.model_2(inputs)
        output = block_3(x)
        
        self.model_3 = tf.keras.Model(inputs=inputs, outputs = output, name="section_3")
        
    def compile(self, optimizer, loss_fn):
        super(ForcedNet, self).compile()
        self.optimizer = optimizer
        self.loss_fn = loss_fn
    
    @property
    def metrics(self):
        return [mae_metric_16, accuracy_metric_16, 
                mae_metric_32, accuracy_metric_32, mae_metric_64, accuracy_metric_64]
        
    def call(self, images):
        x = self.block_1(images)
        x = self.block_2(x)
        return self.block_3(x)
    
    def summary(self):
        self.model_1.summary()
        self.model_2.summary()
        self.model_3.summary()
        print("\nAuxillary Layers:")
        self.auxillary_1.summary()
        self.auxillary_2.summary()
        
    def train_step(self, data):
        images = data[0]
        labels = data[1]
        
        #First model part
        with tf.GradientTape(persistent=True) as tape:
            x = self.model_1(images)
            predictions_1 = self.auxillary_1(x)
            
            loss_1 = self.loss_fn(labels, predictions_1)
            
        grads = tape.gradient(loss_1, self.model_1.trainable_weights)
        grads_output = tape.gradient(loss_1, self.auxillary_1.trainable_weights)
        
        self.optimizer.apply_gradients(
            zip(grads, self.model_1.trainable_weights,)
        )
        self.optimizer.apply_gradients(
            zip(grads_output, self.auxillary_1.trainable_weights)
        )
        
        #Second model part
        with tf.GradientTape(persistent=True) as tape:
            x = self.model_2(images)
            predictions_2 = self.auxillary_2(x)
            
            loss_2 = self.loss_fn(labels, predictions_2)
            
        grads = tape.gradient(loss_2, self.model_2.trainable_weights)
        grads_output = tape.gradient(loss_2, self.auxillary_2.trainable_weights)
        
        self.optimizer.apply_gradients(
            zip(grads, self.model_2.trainable_weights,)
        )
        self.optimizer.apply_gradients(
            zip(grads_output, self.auxillary_2.trainable_weights)
        )
        
        #Third model part
        with tf.GradientTape() as tape:
            predictions_3 = self.model_3(images)
            
            loss_3 = self.loss_fn(labels, predictions_3)
            
        grads = tape.gradient(loss_3, self.model_3.trainable_weights)
        
        self.optimizer.apply_gradients(
            zip(grads, self.model_3.trainable_weights,)
        )
        
        mae_metric_16.update_state(labels, predictions_1)
        accuracy_metric_16.update_state(labels, predictions_1) 
        
        mae_metric_32.update_state(labels, predictions_2)
        accuracy_metric_32.update_state(labels, predictions_2)
        
        mae_metric_64.update_state(labels, predictions_3)
        accuracy_metric_64.update_state(labels, predictions_3)
        
        return {"Block_1_Loss": loss_1, "Block_2_Loss": loss_2, "Block_3_Loss": loss_3,
                
                "Block_1_MAE": mae_metric_16.result(), "Block_2_MAE": mae_metric_32.result(), 
                "Block_3_MAE": mae_metric_64.result(),
                
                "Block_1_Accuracy": accuracy_metric_16.result(), "Block_2_Accuracy": accuracy_metric_32.result(), 
                "Block_3_Accuracy": accuracy_metric_64.result()}

In [None]:
run_logdir = get_run_logdir("ForcedLearning20Cifar100Adam")
callback = [tf.keras.callbacks.ReduceLROnPlateau(monitor='Block_3_Loss', patience=7), tf.keras.callbacks.TensorBoard(run_logdir)]

ForcedLearner = ForcedNet(block_1, block_2, block_3)
ForcedLearner.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.003),
    loss_fn=tf.keras.losses.CategoricalCrossentropy(),
)

history = ForcedLearner.fit(train_dataset, epochs=50, callbacks=[callback])

In [None]:
run_logdir = get_run_logdir("ForcedLearning20Cifar100Adam_Validation")
callback = [tf.keras.callbacks.ReduceLROnPlateau(monitor='Block_3_Loss', patience=7), tf.keras.callbacks.TensorBoard(run_logdir)]

history = ForcedLearner.fit(val_dataset, epochs=1, callbacks=[callback])

In [None]:
for line in val_dataset.take(1):
    images = line[0]
    predictions = model.predict(line[0])
    truth = line[1]

labels = []
for pred in predictions:
    labels.append(np.argmax(pred))

truth_labels = []
for i in truth:
    truth_labels.append(np.argmax(i))

num = 13

print(labels[num])
print(truth_labels[num])

#Load image from file
%pylab inline
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
img = images[num].numpy()
imgplot = plt.imshow(img)
plt.show()

In [None]:
#inputs = tf.keras.Input(shape=(32,32,3))
#x = model_16(inputs)
#x = model_32(x)
import os

vanilla_model = tf.keras.Sequential(
    [
        tf.keras.Input(shape=(32,32,3)),
        tf.keras.layers.Conv2D(64, 1, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(256, 1, padding="same", activation='relu'),
        
        tf.keras.layers.Conv2D(64, 1, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(256, 1, padding="same", activation='relu'),
        
        tf.keras.layers.Conv2D(64, 1, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(256, 1, padding="same", activation='relu'),
        
        tf.keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2,2), padding="same"),
        tf.keras.layers.Conv2D(128, 1, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(128, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(512, 1, padding="same", activation='relu'),
        
        tf.keras.layers.Conv2D(128, 1, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(128, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(512, 1, padding="same", activation='relu'),
        
        tf.keras.layers.Conv2D(128, 1, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(128, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(512, 1, padding="same", activation='relu'),
        
        tf.keras.layers.Conv2D(128, 1, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(128, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(512, 1, padding="same", activation='relu'),
        
        tf.keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2,2), padding="same"),
        tf.keras.layers.Conv2D(512, 1, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(512, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(2048, 1, padding="same", activation='relu'),
        
        tf.keras.layers.Conv2D(512, 1, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(512, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(2048, 1, padding="same", activation='relu'),
        
        tf.keras.layers.Conv2D(512, 1, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(512, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(2048, 1, padding="same", activation='relu'),
        tf.keras.layers.AveragePooling2D(pool_size=(7,7), strides=(1, 1), padding='valid'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(100, activation="softmax")
    ], name = "vanilla_model"
)
run_logdir = get_run_logdir("VanillaModel_Large_Cifar100")
callback = [tf.keras.callbacks.ReduceLROnPlateau(monitor='loss', patience=7),
            tf.keras.callbacks.TensorBoard(run_logdir)]

metrics = [tf.keras.metrics.MeanAbsoluteError(name="mae"),
            tf.keras.metrics.CategoricalAccuracy(name="categorical_accuracy")]

vanilla_model.compile(loss=tf.keras.losses.CategoricalCrossentropy(),
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.0003), metrics=metrics)

vanilla_model.summary()
#history = vanilla_model.fit(train_dataset, epochs=50, callbacks=[callback])

In [None]:
run_logdir = get_run_logdir("DenseNet201_Cifar100")
callback = [tf.keras.callbacks.ReduceLROnPlateau(monitor='loss', patience=7),
            tf.keras.callbacks.TensorBoard(run_logdir)]

model_temp = tf.keras.applications.DenseNet201(weights=None, input_shape=(32,32,3), classes=100)
model_temp.compile(loss=tf.keras.losses.CategoricalCrossentropy(),
                   optimizer=tf.keras.optimizers.Adam(learning_rate=0.0003), metrics=[metrics])
#model_temp.summary()
model_temp.fit(train_dataset, epochs=100, callbacks=[callback])

In [None]:
model_temp.summary()

In [1]:
%reload_ext tensorboard
%tensorboard --logdir '/home/jack/ml/my_env/my_environment/my_environment/my_logs/run_2021_12_29-20_07_20 SmallModelVanilla_Cifar100'

In [2]:
%reload_ext tensorboard
%tensorboard --logdir '/home/jack/ml/my_env/my_environment/my_environment/my_logs/run_2021_12_29-20_20_54 SmallForced_pretrain_Cifar100'

In [3]:
%reload_ext tensorboard
%tensorboard --logdir '/home/jack/ml/my_env/my_environment/my_environment/my_logs/run_2021_12_29-20_25_03 SmallForcedLearner_Cifar100'