In [None]:
#As can be seen from runs prior to run_2021_12_29-00_26_20 ForcedLearning20Cifar100Adam, the categorical
#accuracy progresses much more slowly than the vanilla variant of the model. I hypothesize that this is
#because the model has to play "Catch-up" with itself, because each block is learning independently. This
#causes each successive block in the model to spend the next batch adjusting to what the previous batch just
#learned, and not gaining any intelligence. I have rectified this by allowing each training step to have the
#model pass through one layer, update that specific model_block, pass through another, update that block wrt
#the first and second block, and so on. This has increased the time, but vastly increased the speed at which
#the model learns, even surpassing the vanilla model at the beginning of the training.

#This new forced learning seems to cap out at about 68 percent cat. accuracy, perhaps it is unable to learn
#lower features? Perhaps it would fair well with a larger model, and different dataset. I think I should try
#creating different optimizers with different learning rates for the model.

In [None]:
import tensorflow as tf
import numpy as np
import os

batch_size = 128

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar100.load_data()

x_test, x_val = np.array_split(x_test, 2)
y_test, y_val = np.array_split(y_test, 2)

assert x_train.shape == (50000, 32, 32, 3)
assert x_val.shape == (5000, 32, 32, 3)
assert x_test.shape == (5000, 32, 32, 3)

assert y_train.shape == (50000, 1)
assert y_val.shape == (5000, 1)
assert y_test.shape == (5000, 1)

root_logdir = os.path.join(os.curdir, "my_logs")
def get_run_logdir(model_name):
    import time
    run_id = time.strftime("run_%Y_%m_%d-%H_%M_%S") + " " + model_name
    return os.path.join(root_logdir, run_id)

In [None]:
y_train_onehot = tf.one_hot(np.squeeze(y_train), 100)
y_val_onehot = tf.one_hot(np.squeeze(y_val), 100)
y_test_onehot = tf.one_hot(np.squeeze(y_test), 100)

x_train = x_train.astype("float32")/255
x_val = x_val.astype("float32")/255
x_test = x_test.astype("float32")/255

train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train_onehot))
val_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test_onehot))
test_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val_onehot))

train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size).prefetch(1)
val_dataset = val_dataset.shuffle(buffer_size=1024).batch(batch_size).prefetch(1)
test_dataset = test_dataset.shuffle(buffer_size=1024).batch(batch_size).prefetch(1)

In [None]:
mini_model = tf.keras.Sequential(
    [
        tf.keras.Input(shape=(32,32,3)),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
        tf.keras.layers.Conv2D(32, 3, strides=(2,2), padding="same", activation='relu'),
        tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, strides=(2,2), padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.AveragePooling2D(pool_size=(8,8), strides=(1, 1), padding='valid'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(100, activation="softmax")
    ]
)

small_1 = tf.keras.Sequential(
        [
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
            tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
            tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
            tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
            tf.keras.layers.Conv2D(32, 3, strides=(2,2), padding="same", activation='relu'),
            tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
            tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
            tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
        ]
    )

auxillary = tf.keras.Sequential(
        [
        tf.keras.layers.AveragePooling2D(pool_size=(8,8), strides=(1, 1), padding='valid'),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(100, activation="softmax")
        ]
    )

inputs = tf.keras.Input(shape=(32,32,3))
x = small_1(inputs)
outputs = auxillary(x)

small_model_pretrain = tf.keras.Model(inputs=inputs, outputs=outputs,name="small_model_pretrain")

small_2 = tf.keras.Sequential(
    [
        tf.keras.Input(shape=(16,16,32)),
        tf.keras.layers.Conv2D(64, 3, strides=(2,2), padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.AveragePooling2D(pool_size=(8,8), strides=(1, 1), padding='valid'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(100, activation="softmax")
    ]
)

In [None]:
mae_metric_1 = tf.keras.metrics.MeanAbsoluteError(name="mae")
accuracy_metric_1 = tf.keras.metrics.CategoricalAccuracy(name="categorical_accuracy")

mae_metric_2 = tf.keras.metrics.MeanAbsoluteError(name="mae")
accuracy_metric_2 = tf.keras.metrics.CategoricalAccuracy(name="categorical_accuracy")

class ForcedNetSmall(tf.keras.Model):
    def __init__(self, block_1, block_2, auxillary):
        super(ForcedNetSmall, self).__init__()
        
        inputs = tf.keras.Input(shape=(32,32,3))
        
        self.auxillary_1 = auxillary
        
        self.model_1 = block_1
        
        self.model_2 = block_2

        
    def compile(self, optimizer, loss_fn):
        super(ForcedNetSmall, self).compile()
        self.optimizer = optimizer
        self.loss_fn = loss_fn
    
    @property
    def metrics(self):
        return [mae_metric_1, accuracy_metric_1, 
                mae_metric_2, accuracy_metric_2]
        
    def call(self, images):
        x = self.block_1(images)
        x = self.block_2(x)
        return self.block_3(x)
    
    def summary(self):
        self.model_1.summary()
        self.model_2.summary()
        print("\nAuxillary Layers:")
        self.auxillary_1.summary()
        
    def train_step(self, data):
        images = data[0]
        labels = data[1]
        
        
        
        x = self.model_1(images)
        predictions_1 = self.auxillary_1(x)
        
        
        #Second model part
        with tf.GradientTape(persistent=True) as tape:
            x = self.model_2(images)
            predictions_2 = self.auxillary_2(x)
            
            loss_2 = self.loss_fn(labels, predictions_2)
            
        grads = tape.gradient(loss_2, self.model_2.trainable_weights)
        grads_output = tape.gradient(loss_2, self.auxillary_2.trainable_weights)
        
        self.optimizer.apply_gradients(
            zip(grads, self.model_2.trainable_weights,)
        )
        self.optimizer.apply_gradients(
            zip(grads_output, self.auxillary_2.trainable_weights)
        )
        
        mae_metric_1.update_state(labels, predictions_1)
        accuracy_metric_1.update_state(labels, predictions_1) 
        
        mae_metric_2.update_state(labels, predictions_2)
        accuracy_metric_2.update_state(labels, predictions_2)
        
        return {"Block_1_Loss": loss_1,
                "Block_2_Loss": loss_2,
                
                "Block_1_MAE": mae_metric_1.result(),
                "Block_2_MAE": mae_metric_2.result(), 
                
                "Block_1_Accuracy": accuracy_metric_1.result(),
                "Block_2_Accuracy": accuracy_metric_2.result(), }

In [None]:
run_logdir = get_run_logdir("SmallModelVanilla_Cifar100")
callback = [tf.keras.callbacks.ReduceLROnPlateau(monitor='loss', patience=7),
            tf.keras.callbacks.TensorBoard(run_logdir)]

metrics = [tf.keras.metrics.MeanAbsoluteError(name="mae"),
            tf.keras.metrics.CategoricalAccuracy(name="categorical_accuracy")]

mini_model.compile(loss=tf.keras.losses.CategoricalCrossentropy(),
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.0003), metrics=metrics)

history = mini_model.fit(train_dataset, epochs=200, callbacks=[callback])

In [None]:
metrics = [tf.keras.metrics.MeanAbsoluteError(name="mae"),
            tf.keras.metrics.CategoricalAccuracy(name="categorical_accuracy")]

run_logdir = get_run_logdir("SmallForced_pretrain_Cifar100")

callback = [tf.keras.callbacks.ReduceLROnPlateau(monitor='loss', patience=7), tf.keras.callbacks.TensorBoard(run_logdir)]
small_model_pretrain.compile(
    loss=tf.keras.losses.CategoricalCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.003), metrics=[metrics]
)

history = small_model_pretrain.fit(train_dataset, epochs=125, callbacks=[callback])

run_logdir = get_run_logdir("SmallForcedLearner_Cifar100")

ForcedSmall = ForcedNetSmall(small_1, small_2, auxillary)

callback = [tf.keras.callbacks.ReduceLROnPlateau(monitor='Block_2_Loss', patience=7), tf.keras.callbacks.TensorBoard(run_logdir)]
ForcedSmall.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.003),
    loss_fn=tf.keras.losses.CategoricalCrossentropy(),
)

history = ForcedSmall.fit(train_dataset, epochs=75, callbacks=[callback])

In [4]:
%reload_ext tensorboard
%tensorboard --logdir '/home/jack/ml/my_env/my_environment/my_environment/my_logs/run_2021_12_29-20_07_20 SmallModelVanilla_Cifar100'

In [2]:
%reload_ext tensorboard
%tensorboard --logdir '/home/jack/ml/my_env/my_environment/my_environment/my_logs/run_2021_12_29-20_20_54 SmallForced_pretrain_Cifar100'

Reusing TensorBoard on port 6007 (pid 49115), started 0:00:50 ago. (Use '!kill 49115' to kill it.)

In [3]:
%reload_ext tensorboard
%tensorboard --logdir '/home/jack/ml/my_env/my_environment/my_environment/my_logs/run_2021_12_29-20_25_03 SmallForcedLearner_Cifar100'

Reusing TensorBoard on port 6008 (pid 49207), started 0:00:49 ago. (Use '!kill 49207' to kill it.)