In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds
import math
import datetime

In [2]:
# model of network
class TwinModel(tf.keras.Model):

    # hyperparameters are passed for instantiation 
    # to make the model esasily adjustable 
    def __init__(self, hidden_activation, output_activation, optimizer, accuracy, loss_function, depth):
        
        #initialzie superclass
        super().__init__()
        
        
        #initialize optimizer, metric and loss function
        self.optimizer = optimizer
        self.metrics_list = [
            tf.keras.metrics.Mean(name="mean"),
            accuracy
            ] 
        self.loss_function = loss_function

        # define layers
        self.layer1 = tf.keras.layers.Dense(128, activation=hidden_activation)
        self.layer2 = tf.keras.layers.Dense(128, activation=hidden_activation)
        self.output_layer = tf.keras.layers.Dense(depth, activation=output_activation)
    

    def call(self, images, training=False):
        
        #passes signal through network and calculates output
        
        img1, img2 = images
        
        # forward pass with first image
        x1 = self.layer1(img1)
        x1 = self.layer2(x1)

        # forward pass with second image
        x2 = self.layer1(img2)
        x2 = self.layer2(x2)     

        combined_x = tf.concat([x1, x2], axis=1)

        return self.output_layer(combined_x)
           
    def reset_metrics(self):
        
        for metric in self.metrics:
            metric.reset_states()
            
    @tf.function
    def train_step(self, data):
        
        #performs trainstep by calculating loss and gradients and applying the gradients to weights
        #and biases (trainable variables)
        
        img1, img2, targets = data
        
        with tf.GradientTape() as tape:
            predictions = self((img1, img2), training=True)
            
            loss = self.loss_function(targets, predictions) + tf.reduce_sum(self.losses)
        
        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        
        # update loss metric
        self.metrics[0].update_state(loss)
        
        # for all metrics except loss, update states (accuracy etc.)
        for metric in self.metrics[1:]:
            metric.update_state(targets,predictions)

        # Return a dictionary mapping metric names to current value
        return {m.name: m.result() for m in self.metrics}

    @tf.function
    def test_step(self, data):

        img1, img2, targets = data
        predictions = self((img1, img2), training=False)
        loss = self.loss_function(targets, predictions) + tf.reduce_sum(self.losses)

        self.metrics[0].update_state(loss)
        # for accuracy metrics:
        for metric in self.metrics[1:]:
            metric.update_state(targets, predictions)

        return {m.name: m.result() for m in self.metrics}

In [3]:
# Define where to save the log
config_name= "config_name"
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

train_log_path = f"logs/{config_name}/{current_time}/train"
val_log_path = f"logs/{config_name}/{current_time}/val"

# log writer for training metrics
train_summary_writer = tf.summary.create_file_writer(train_log_path)

# log writer for validation metrics
val_summary_writer = tf.summary.create_file_writer(val_log_path)

2022-11-28 21:30:40.689537: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-28 21:30:40.693131: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [4]:
import pprint
import tqdm

def training_loop(model, train_ds, val_ds, epochs, train_summary_writer, val_summary_writer):
    
    for epoch in range(epochs):
        print(f"Epoch {epoch}:")
        
        # Training:
        for data in tqdm.tqdm(train_ds, position=0, leave=True):
            metrics = model.train_step(data)
            
            # logging the validation metrics to the log file which is used by tensorboard
            with train_summary_writer.as_default():
                for metric in model.metrics:
                    tf.summary.scalar(f"{metric.name}", metric.result(), step=epoch)

        # print the metrics
        print([f"{key}: {value.numpy()}" for (key, value) in metrics.items()])

        # reset all metrics (requires a reset_metrics method in the model)
        model.reset_metrics()    
        
        # Validation:
        for data in val_ds:
            metrics = model.test_step(data)
        
            # logging the validation metrics to the log file which is used by tensorboard
            with val_summary_writer.as_default():
                for metric in model.metrics:
                    tf.summary.scalar(f"{metric.name}", metric.result(), step=epoch)
                    
        print([f"val_{key}: {value.numpy()}" for (key, value) in metrics.items()])

        # reset all metrics
        model.reset_metrics()
        print("\n")

In [5]:
# 2. write function to create the dataset that we want
def preprocess(data, batch_size, target_function, depth):
  

  data = data.map(lambda x, t: ((tf.cast(tf.reshape(x, (-1,)), tf.float32)/128.)-1.,t))

  # we want to have two mnist images in each example
  zipped_ds = tf.data.Dataset.zip((data.shuffle(2000), data.shuffle(2000)))
  #for subtask a: map ((x1,y1),(x2,y2)) to (x1,x2, y1+y2 >= 5*) *boolean
  #for loss use binary cross entropy and for activation function sigmoid

  #for subtask b: output layer is vector for values betweeen -9 and +9 -> turn into one_hot_vector
  #for loss use categorical cross entropy and for activation function softmax
  zipped_ds = zipped_ds.map(lambda x1, x2: (x1[0], x2[0], target_function(x1[1], x2[1])))
  zipped_ds = zipped_ds.map(lambda x1, x2, target: (x1, x2, tf.one_hot(target, depth=depth)))

  # batch the dataset
  zipped_ds = zipped_ds.batch(batch_size)
  # prefetch
  zipped_ds = zipped_ds.prefetch(tf.data.AUTOTUNE)

  return zipped_ds

In [6]:
# Initialize the functions for model.
a = lambda x1, x2: int(x1 + x2 >= 5)
b = lambda x1, x2: x1 - x2


def subtask(option):
    #get mnist from tensorflow_datasets
    train_ds, val_ds = tfds.load("mnist", split =["train","test"], as_supervised=True)
    
    #check which model is asked for and initialize corresponding parameters
    if option == "a":
        hidden_activation = tf.nn.sigmoid
        output_activation = tf.nn.sigmoid
        optimizer = tf.keras.optimizers.Adam(0.001)
        loss_function = tf.keras.losses.BinaryCrossentropy()
        depth = 1
        train_ds = preprocess(train_ds, batch_size=32, target_function=a, depth=depth)
        val_ds = preprocess(val_ds, batch_size=32, target_function=a, depth=depth)
        accuracy = tf.keras.metrics.BinaryAccuracy(name="acc")

    if option == "b":
        hidden_activation = tf.nn.relu
        output_activation = tf.nn.softmax
        optimizer = tf.keras.optimizers.SGD(0.001)
        loss_function = tf.keras.losses.CategoricalCrossentropy()
        depth=19
        train_ds = preprocess(train_ds, batch_size=32, target_function=b, depth=depth)
        val_ds = preprocess(val_ds, batch_size=32, target_function=b, depth=depth)
        accuracy = tf.keras.metrics.CategoricalAccuracy(name="acc")


    model = TwinModel(hidden_activation, output_activation, optimizer, accuracy, loss_function, depth)
    
    save_path = "trained_model_RUN1"
    
    training_loop(model=model,
    train_ds=train_ds,
    val_ds=val_ds,
    epochs=10,
    train_summary_writer=train_summary_writer,
    val_summary_writer=val_summary_writer,
    )


subtask("a")

print("Training finished.")

Epoch 0:


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1875/1875 [01:45<00:00, 17.71it/s]


['mean: 0.210931658744812', 'acc: 0.9189833402633667']
['val_mean: 0.15154871344566345', 'val_acc: 0.9427000284194946']


Epoch 1:


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1875/1875 [01:28<00:00, 21.08it/s]


['mean: 0.13676755130290985', 'acc: 0.9485499858856201']
['val_mean: 0.12123598903417587', 'val_acc: 0.9559999704360962']


Epoch 2:


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1875/1875 [02:21<00:00, 13.21it/s]


['mean: 0.11665378510951996', 'acc: 0.9572833180427551']
['val_mean: 0.11089668422937393', 'val_acc: 0.9605000019073486']


Epoch 3:


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1875/1875 [01:31<00:00, 20.43it/s]


['mean: 0.10609745234251022', 'acc: 0.9624500274658203']
['val_mean: 0.10437672585248947', 'val_acc: 0.9666000008583069']


Epoch 4:


 98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍    | 1831/1875 [01:28<00:02, 20.61it/s]


KeyboardInterrupt: 

# Saving and loading a subclassed model

Because training deep neural networks can take multiple days, weeks or even months, we want to save checkpoints in between. This is especially useful if you use Google Colab and you save the model directly to your Google Drive folder. That way you don't lose any progress if your runtime gets closed.

In [None]:
# save the model with a meaningful name
model.save_weights(f"saved_model_{config_name}", save_format="tf")

# load the model:
# instantiate a new model from our CNN class
loaded_model = FFN(optimizer, loss_function)

# build the model
#inp= tf.keras.Input((28,28,1))
#loaded_model(inp)

# load the model weights to continue training. 
loaded_model.load_weights(f"saved_model_{config_name}");

# continue training (but: optimizer state is lost)

# run the training loop 
training_loop(model=loaded_model, 
                train_ds=train_ds, 
                val_ds=val_ds, 
                epochs=10, 
                train_summary_writer=train_summary_writer, 
                val_summary_writer=val_summary_writer)

Epoch 0:


 51%|█████▏    | 964/1875 [00:14<00:13, 66.14it/s]


KeyboardInterrupt: ignored