In [1]:
import tensorflow as tf
batch_size = 128
(x_train, Y_train), (x_test, Y_test) = tf.keras.datasets.cifar10.load_data()

In [2]:
import numpy as np
all_digits = np.concatenate([x_train, x_test])
all_output = np.concatenate([Y_train, Y_test])
all_output_one_hot = tf.one_hot(np.squeeze(all_output), 10)
all_digits = all_digits.astype("float32") / 255.0

2021-12-02 21:07:02.299282: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-12-02 21:07:02.302280: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-12-02 21:07:02.302550: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-12-02 21:07:02.302977: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

In [3]:
all_digits = np.reshape(all_digits, (-1, 32, 32, 3))
dataset = tf.data.Dataset.from_tensor_slices((all_digits, all_output_one_hot))
dataset = dataset.shuffle(buffer_size=1024)
dataset = dataset.batch(batch_size).prefetch(1)

In [4]:
DATASET_SIZE=60000
train_size = int(0.7 * DATASET_SIZE)
val_size = int(0.15 * DATASET_SIZE)
test_size = int(0.15 * DATASET_SIZE)

full_dataset = dataset
train_dataset = full_dataset.take(train_size)
test_dataset = full_dataset.skip(train_size)
val_dataset = test_dataset.skip(test_size)
test_dataset = test_dataset.take(test_size)

In [5]:
len(list(train_dataset))

469

In [6]:
model_16 = tf.keras.Sequential(
    [
        tf.keras.Input(shape=(32,32,3)),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
        tf.keras.layers.Conv2D(16, 3, padding='same',activation='relu'),
    ], name = "block_1"
)

base_model_1 = model_16

model_32 = tf.keras.Sequential(
    [
        tf.keras.Input(shape=(32,32,16)),
        tf.keras.layers.Conv2D(32, 3, strides=(2,2), padding="same", activation='relu'),
        tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(32, 3, padding="same", activation='relu'),
    ], name = "block_2"
)

base_model_2 = model_32

model_64 = tf.keras.Sequential(
    [
        tf.keras.Input(shape=(16,16,32)),
        tf.keras.layers.Conv2D(64, 3, strides=(2,2), padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.Conv2D(64, 3, padding="same", activation='relu'),
        tf.keras.layers.AveragePooling2D(pool_size=(8,8), strides=(1, 1), padding='valid'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(10, activation="softmax")
    ], name = "block_3"
)

base_model_3 = model_64

In [7]:
class ForcedNet20(tf.keras.Model):
    def __init__(self, block_1, block_2, block_3):
        super(ForcedNet20, self).__init__()
        self.block_1 = block_1
        self.output_1 = tf.keras.Sequential(
            [
                tf.keras.layers.AveragePooling2D(pool_size=(32,32), strides=(1, 1), padding='valid'),
                tf.keras.layers.Flatten(),
                tf.keras.layers.Dense(10, activation="softmax")
            ]
        )
        
        self.block_2 = block_2
        self.output_2 = tf.keras.Sequential(
            [
                tf.keras.layers.AveragePooling2D(pool_size=(16,16), strides=(1, 1), padding='valid'),
                tf.keras.layers.Flatten(),
                tf.keras.layers.Dense(10, activation="softmax")
            ]
        )
        
        self.block_3 = block_3
    def compile(self, optimizer, loss_fn):
        super(ForcedNet20, self).compile()
        self.optimizer = optimizer
        self.loss_fn = loss_fn
    
    def train_step(self, data):
        images = data[0]
        labels = data[1]
        
        with tf.GradientTape(persistent=True) as tape:
            x = self.block_1(images)
            predictions_1 = self.output_1(x)
            
            x = self.block_2(x)
            predictions_2 = self.output_2(x)

            predictions_3 = self.block_3(x)
            
            loss_1 = self.loss_fn(labels, predictions_1)
            loss_2 = self.loss_fn(labels, predictions_2)
            loss_3 = self.loss_fn(labels, predictions_3)
        
        grads_1 = tape.gradient(loss_1, self.block_1.trainable_weights)
        grads_2 = tape.gradient(loss_2, self.block_2.trainable_weights)
        grads_final = tape.gradient(loss_3, self.block_3.trainable_weights)
        
        grads_1_output = tape.gradient(loss_1, self.output_1.trainable_weights)
        grads_2_output = tape.gradient(loss_2, self.output_2.trainable_weights)
                                
        self.optimizer.apply_gradients(
            zip(grads_1, self.block_1.trainable_weights,)
        )
        self.optimizer.apply_gradients(
            zip(grads_1_output, self.output_1.trainable_weights)
        )
        
        self.optimizer.apply_gradients(
            zip(grads_2, self.block_2.trainable_weights)
        )
        
        self.optimizer.apply_gradients(
            zip(grads_2_output, self.output_2.trainable_weights)
        )
        
        self.optimizer.apply_gradients(
            zip(grads_final, self.block_3.trainable_weights)
        )
        
        return {"Block_1_Loss": loss_1, "Block_2_Loss": loss_2, "Block_3_Loss": loss_3}

In [8]:
from sklearn.metrics import accuracy_score, mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
import os

class Metrics(tf.keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self._data = []

    def on_epoch_end(self, batch, logs={}):
        X_val, y_val = self.validation_data[0], self.validation_data[1]
        y_predict = np.asarray(model.predict(X_val))

        y_val = np.argmax(y_val, axis=1)
        y_predict = np.argmax(y_predict, axis=1)

        self._data.append({
            'mse': mean_squared_error(y_val, y_predict),
        })
        self._data.append({
            'mae': mean_absolute_error(y_val, y_predict),
        })
        self._data.append({
            'mape': mean_absolute_percentage_error(y_val, y_predict),
        })
        self._data.append({
            'accuracy': accuracy_score(y_val, y_predict),
        })
        return

    def get_data(self):
        return self._data

metrics = Metrics()

root_logdir = os.path.join(os.curdir, "my_logs")

def get_run_logdir(model_name):
    import time
    run_id = time.strftime("run_%Y_%m_%d-%H_%M_%S") + " " + model_name
    return os.path.join(root_logdir, run_id)

run_logdir = get_run_logdir("ForcedLearning20")
callback = [tf.keras.callbacks.ReduceLROnPlateau(monitor='Block_3_Loss', patience=20, min_delta=0.0001,), tf.keras.callbacks.TensorBoard(run_logdir)]

ForcedLearner = ForcedNet20(model_16, model_32, model_64)
ForcedLearner.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0003),
    loss_fn=tf.keras.losses.CategoricalCrossentropy(),
)

history = ForcedLearner.fit(dataset, epochs=10000, callbacks=[callback])

Epoch 1/10000


2021-12-02 21:07:06.571713: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8200
2021-12-02 21:07:07.568133: I tensorflow/stream_executor/cuda/cuda_blas.cc:1774] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/10000
Epoch 3/10000
Epoch 4/10000
Epoch 5/10000
Epoch 6/10000
Epoch 7/10000
Epoch 8/10000
Epoch 9/10000
Epoch 10/10000
Epoch 11/10000
Epoch 12/10000
Epoch 13/10000
Epoch 14/10000
Epoch 15/10000
Epoch 16/10000

KeyboardInterrupt: 

In [9]:
inputs = tf.keras.Input(shape=(32,32,3))
x = base_model_1(inputs)
x = base_model_2(x)
outputs = base_model_3(x)
model = tf.keras.Model(inputs=inputs, outputs=outputs, name="VanillaModel")

In [10]:
model_16.trainable_weights

[<tf.Variable 'conv2d/kernel:0' shape=(3, 3, 3, 16) dtype=float32, numpy=
 array([[[[-0.05375624,  0.13188165,  0.14402987, -0.08244733,
            0.04991871, -0.00195808,  0.12058412, -0.06695587,
            0.15695581, -0.09722174, -0.20612259,  0.10180074,
           -0.13146573, -0.0185107 ,  0.0540503 , -0.02902461],
          [ 0.04679592,  0.07487708,  0.04507318,  0.19232981,
           -0.12900034,  0.18260531,  0.17626868,  0.06835281,
           -0.03841044, -0.11267674, -0.06316394, -0.00789947,
            0.00557988,  0.16408741, -0.16328351, -0.02224325],
          [ 0.11728892,  0.06132431, -0.11336042, -0.1313126 ,
            0.02850037,  0.08434359,  0.08300991, -0.02286066,
            0.08794345,  0.02848215, -0.12244668, -0.09064846,
            0.09030225,  0.18390678,  0.10081933,  0.01093384]],
 
         [[-0.14381403, -0.15227799,  0.07158376, -0.11439311,
           -0.19756031, -0.07013111, -0.15156724, -0.05521821,
           -0.14552446, -0.10116047,  

In [11]:
model.trainable_weights

[<tf.Variable 'conv2d/kernel:0' shape=(3, 3, 3, 16) dtype=float32, numpy=
 array([[[[-0.05375624,  0.13188165,  0.14402987, -0.08244733,
            0.04991871, -0.00195808,  0.12058412, -0.06695587,
            0.15695581, -0.09722174, -0.20612259,  0.10180074,
           -0.13146573, -0.0185107 ,  0.0540503 , -0.02902461],
          [ 0.04679592,  0.07487708,  0.04507318,  0.19232981,
           -0.12900034,  0.18260531,  0.17626868,  0.06835281,
           -0.03841044, -0.11267674, -0.06316394, -0.00789947,
            0.00557988,  0.16408741, -0.16328351, -0.02224325],
          [ 0.11728892,  0.06132431, -0.11336042, -0.1313126 ,
            0.02850037,  0.08434359,  0.08300991, -0.02286066,
            0.08794345,  0.02848215, -0.12244668, -0.09064846,
            0.09030225,  0.18390678,  0.10081933,  0.01093384]],
 
         [[-0.14381403, -0.15227799,  0.07158376, -0.11439311,
           -0.19756031, -0.07013111, -0.15156724, -0.05521821,
           -0.14552446, -0.10116047,  

In [None]:
run_logdir = get_run_logdir("VanillaNetwork")
callback = [tf.keras.callbacks.ReduceLROnPlateau(monitor='loss', patience=7), tf.keras.callbacks.TensorBoard(run_logdir)]
model.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer=tf.keras.optimizers.Adam(learning_rate=0.0003), metrics=['mse', 'mae', 'mape', 'accuracy'])
history = model.fit(dataset, validation_data=(val_dataset), epochs=10000, callbacks=[callback])