In [117]:
import tensorflow as tf
import tensorflow_datasets as tfds
import datetime

# magic line only needed in jupyter notebooks!
%load_ext tensorboard 

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [132]:
# load data with as_supervised=True to import labels
mnist = tfds.load("mnist", split =["train","test"], as_supervised=True)
train_ds = mnist[0]
val_ds = mnist[1]

In [142]:
# 2. write function to create the dataset that we want
def preprocess(data, batch_size, task):
    # image should be float
    data = data.map(lambda x, t: (tf.cast(x, float), t))
    # image should be flattened
    data = data.map(lambda x, t: (tf.reshape(x, (-1,)), t))
    # image vector will here have values between -1 and 1
    data = data.map(lambda x,t: ((x/128.)-1., t))
    # we want to have two mnist images in each example
    
    # this leads to a single example being ((x1,y1),(x2,y2))
    data = tf.data.Dataset.zip((data.shuffle(2000), data.shuffle(2000)))
    
    # map ((x1,y1),(x2,y2)) to (x1,x2, y1==y2*) *boolean
    if(task==1):
        data = data.map(lambda x1, x2: (x1[0], x2[0], x1[1]+x2[1]>=5))
        # transform boolean target to int
        data = data.map(lambda x1, x2, t: (x1,x2, tf.cast(t, tf.int32)))
    else:
        # possible 19 results for a-b [-9, 9], encode as one hot vector for classification
        data = data.map(lambda x1, x2: (x1[0], x2[0], tf.one_hot((x1[1]-x2[1]), 19, dtype=tf.float32)))
    
    # batch the dataset
    data = data.batch(batch_size)
    # prefetch
    data = data.prefetch(tf.data.AUTOTUNE)
    return data

train_ds1 = preprocess(train_ds, batch_size=32, task=1) #train_ds.apply(preprocess)
val_ds1 = preprocess(val_ds, batch_size=32, task=1) #val_ds.apply(preprocess)

train_ds2 = preprocess(train_ds, batch_size=32, task=2) #train_ds.apply(preprocess)+
val_ds2 = preprocess(val_ds, batch_size=32, task=2) #val_ds.apply(preprocess)
print(train_ds2)

<PrefetchDataset element_spec=(TensorSpec(shape=(None, 784), dtype=tf.float32, name=None), TensorSpec(shape=(None, 784), dtype=tf.float32, name=None), TensorSpec(shape=(None, 19), dtype=tf.float32, name=None))>


In [134]:
class BinaryModel(tf.keras.Model):

    # 1. constructor
    def __init__(self):
        super().__init__()
        # inherit functionality from parent class

        # optimizer, loss function and metrics
        self.metrics_list = [tf.keras.metrics.BinaryAccuracy(name="subtask1_accuracy"), tf.keras.metrics.Mean(name="subtask1_loss")]
        
        self.optimizer = tf.keras.optimizers.Adam()
        
        self.loss_function = tf.keras.losses.BinaryCrossentropy()

        # layers to encode the images (both layers used for both images)
        self.dense1 = tf.keras.layers.Dense(128, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(128, activation=tf.nn.relu)
        
        self.dense3 = tf.keras.layers.Dense(128, activation=tf.nn.relu)
        
        self.out_layer = tf.keras.layers.Dense(1,activation=tf.nn.sigmoid)
        
    # 2. call method (forward computation)
    def call(self, images, training=False):
        img1, img2 = images
        
        img1_x = self.dense1(img1)
        img1_x = self.dense2(img1_x)
        
        img2_x = self.dense1(img2)
        img2_x = self.dense2(img2_x)
        
        combined_x = tf.concat([img1_x, img2_x ], axis=1)
        combined_x = self.dense3(combined_x)
        return self.out_layer(combined_x)

    # 3. metrics property
    @property
    def metrics(self):
        return self.metrics_list
        # return a list with all metrics in the model

    # 4. reset all metrics objects
    def reset_metrics(self):
        for metric in self.metrics:
            metric.reset_states()

    # 5. train step method
    @tf.function
    def train_step(self, data):
        img1, img2, label = data
        
        with tf.GradientTape() as tape:
            output = self((img1, img2), training=True)
            loss = self.loss_function(label, output)
            
        gradients = tape.gradient(loss, self.trainable_variables)
        
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        
        # update the state of the metrics according to loss
        self.metrics[0].update_state(label, output)
        self.metrics[1].update_state(loss)
        
        # return a dictionary with metric names as keys and metric results as values
        return {m.name : m.result() for m in self.metrics}

    # 6. test_step method
    @tf.function
    def test_step(self, data):
        img1, img2, label = data
        # same as train step (without parameter updates)
        output = self((img1, img2), training=False)
        loss = self.loss_function(label, output)
        self.metrics[0].update_state(label, output)
        self.metrics[1].update_state(loss)
        
        return {m.name : m.result() for m in self.metrics}

In [143]:
class CategoricalModel(tf.keras.Model):

    # 1. constructor
    def __init__(self):
        super().__init__()
        # inherit functionality from parent class

        # optimizer, loss function and metrics
        self.metrics_list = [tf.keras.metrics.CategoricalAccuracy(name="subtask2_accuracy"), tf.keras.metrics.Mean(name="subtask2_loss")]
        
        self.optimizer = tf.keras.optimizers.Adam()
        
        self.loss_function = tf.keras.losses.CategoricalCrossentropy()

        # layers to encode the images (both layers used for both images)
        self.dense1 = tf.keras.layers.Dense(128, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(128, activation=tf.nn.relu)
        
        self.dense3 = tf.keras.layers.Dense(256, activation=tf.nn.relu)
        
        self.out_layer = tf.keras.layers.Dense(19,activation=tf.nn.softmax)
        
    # 2. call method (forward computation)
    def call(self, images, training=False):
        img1, img2 = images
        
        img1_x = self.dense1(img1)
        img1_x = self.dense2(img1_x)
        
        img2_x = self.dense1(img2)
        img2_x = self.dense2(img2_x)
        
        combined_x = tf.concat([img1_x, img2_x ], axis=1)
        combined_x = self.dense3(combined_x)
        return self.out_layer(combined_x)

    # 3. metrics property
    @property
    def metrics(self):
        return self.metrics_list
        # return a list with all metrics in the model

    # 4. reset all metrics objects
    def reset_metrics(self):
        for metric in self.metrics:
            metric.reset_states()

    # 5. train step method
    @tf.function
    def train_step(self, data):
        img1, img2, label = data
        
        with tf.GradientTape() as tape:
            output = self((img1, img2), training=True)
            loss = self.loss_function(label, output)
            
        gradients = tape.gradient(loss, self.trainable_variables)
        
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        
        # update the state of the metrics according to loss
        self.metrics[0].update_state(label, output)
        self.metrics[1].update_state(loss)
        
        # return a dictionary with metric names as keys and metric results as values
        return {m.name : m.result() for m in self.metrics}

    # 6. test_step method
    @tf.function
    def test_step(self, data):
        img1, img2, label = data
        # same as train step (without parameter updates)
        output = self((img1, img2), training=False)
        loss = self.loss_function(label, output)
        self.metrics[0].update_state(label, output)
        self.metrics[1].update_state(loss)
        
        return {m.name : m.result() for m in self.metrics}

In [146]:
def create_summary_writers(config_name):
    
    # Define where to save the logs
    # along with this, you may want to save a config file with the same name so you know what the hyperparameters were used
    # alternatively make a copy of the code that is used for later reference
    
    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

    train_log_path = f"logs/{config_name}/{current_time}/train"
    val_log_path = f"logs/{config_name}/{current_time}/val"

    # log writer for training metrics
    train_summary_writer = tf.summary.create_file_writer(train_log_path)

    # log writer for validation metrics
    val_summary_writer = tf.summary.create_file_writer(val_log_path)
    
    return train_summary_writer, val_summary_writer

train_summary_writer_subtask1, val_summary_writer_subtask1 = create_summary_writers(config_name="subtask_1_RUN1")
train_summary_writer_subtask2, val_summary_writer_subtask2 = create_summary_writers(config_name="subtask_2_RUN3")


In [136]:
import tqdm
def training_loop(model, train_ds, val_ds, start_epoch,
                  epochs, train_summary_writer, 
                  val_summary_writer, save_path):

    # 1. iterate over epochs
    for e in range(start_epoch, epochs):

        # 2. train steps on all batches in the training data
        for data in tqdm.tqdm(train_ds, position=0, leave=True):
            metrics = model.train_step(data) # we save to metrics because we want to print it and tensorboard

        ## Training data
        # 3. log and print training metrics
        with train_summary_writer.as_default():
            # for scalar metrics:
            for metric in model.metrics:
                    tf.summary.scalar(f"{metric.name}", metric.result(), step=e)
            # alternatively, log metrics individually (allows for non-scalar metrics such as tf.keras.metrics.MeanTensor)
            # e.g. tf.summary.image(name="mean_activation_layer3", data = metrics["mean_activation_layer3"],step=e)
        
        #print the metrics
        print([f"{key}: {value.numpy()}" for (key, value) in metrics.items()])
        
        # 4. reset metric objects for next epch
        model.reset_metrics()

        #####################################################

        ## Validation data
        # 5. evaluate on validation data
        for data in val_ds:
            metrics = model.test_step(data)
        
        # 6. log validation metrics
        with val_summary_writer.as_default():
            # for scalar metrics:
            for metric in model.metrics:
                    tf.summary.scalar(f"{metric.name}", metric.result(), step=e)
            # alternatively, log metrics individually (allows for non-scalar metrics such as tf.keras.metrics.MeanTensor)
            # e.g. tf.summary.image(name="mean_activation_layer3", data = metrics["mean_activation_layer3"],step=e)
            
        print([f"val_{key}: {value.numpy()}" for (key, value) in metrics.items()])
        # 7. reset metric objects
        model.reset_metrics()
        
    # 8. save model weights if save_path is given
    if save_path:
        model.save_weights(save_path)

In [124]:
# open the tensorboard logs
%tensorboard --logdir logs/

In [145]:
# 1. instantiate model
model_subtask1 = BinaryModel()
model_subtask2 = CategoricalModel()

# 2. choose a path to save the weights
save_path_subtask1 = "subtask1_trained_model_RUN1"
save_path_subtask2 = "subtask2_trained_model_RUN3"

# 3. pass arguments to training loop function
training_loop(model=model_subtask2,
    train_ds=train_ds2,
    val_ds=val_ds2,
    start_epoch=0,
    epochs=10,
    train_summary_writer=train_summary_writer_subtask2,
    val_summary_writer=val_summary_writer_subtask2,
    save_path=save_path_subtask2)

  0%|          | 0/1875 [00:00<?, ?it/s]2022-11-26 17:09:41.251733: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
100%|██████████| 1875/1875 [00:20<00:00, 93.07it/s] 
2022-11-26 17:10:01.221584: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


['accuracy: 0.12155000120401382', 'loss: 4325623.5']


2022-11-26 17:10:03.827548: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


['val_accuracy: 0.021400000900030136', 'val_loss: 17348446.0']


100%|██████████| 1875/1875 [00:19<00:00, 96.18it/s] 


['accuracy: 0.1200999990105629', 'loss: 49680700.0']
['val_accuracy: 0.08980000764131546', 'val_loss: 74761424.0']


100%|██████████| 1875/1875 [00:18<00:00, 99.48it/s] 


['accuracy: 0.12183333188295364', 'loss: 152950960.0']
['val_accuracy: 0.5485000014305115', 'val_loss: 270329760.0']


100%|██████████| 1875/1875 [00:19<00:00, 96.55it/s] 


['accuracy: 0.11883333325386047', 'loss: 319726496.0']
['val_accuracy: 0.050700001418590546', 'val_loss: 271315840.0']


100%|██████████| 1875/1875 [00:19<00:00, 98.49it/s] 


['accuracy: 0.12071666866540909', 'loss: 551154944.0']
['val_accuracy: 0.07810000330209732', 'val_loss: 830948672.0']


100%|██████████| 1875/1875 [00:19<00:00, 97.03it/s] 


['accuracy: 0.12076666951179504', 'loss: 888380032.0']
['val_accuracy: 0.03350000083446503', 'val_loss: 603351616.0']


100%|██████████| 1875/1875 [00:18<00:00, 99.56it/s] 


['accuracy: 0.12248333543539047', 'loss: 1281678336.0']
['val_accuracy: 0.08050000667572021', 'val_loss: 1202204672.0']


100%|██████████| 1875/1875 [00:18<00:00, 99.64it/s] 


['accuracy: 0.12323333323001862', 'loss: 1773709440.0']
['val_accuracy: 0.04780000075697899', 'val_loss: 2012207616.0']


100%|██████████| 1875/1875 [00:18<00:00, 99.45it/s] 


['accuracy: 0.12098333239555359', 'loss: 2377750528.0']
['val_accuracy: 0.07930000126361847', 'val_loss: 3078215936.0']


100%|██████████| 1875/1875 [00:19<00:00, 98.10it/s] 


['accuracy: 0.12301667034626007', 'loss: 3115229952.0']
['val_accuracy: 0.08670000731945038', 'val_loss: 4714269696.0']


In [None]:
#load the model:
fresh_model = TwinMNISTModel()

# build the model's parameters by calling it on input
for img1,img2,label in train_ds:
    fresh_model((img1,img2));
    break

# load the saved weights
model = fresh_model.load_weights(save_path)
