In [10]:
import tensorflow as tf
import tensorflow_datasets as tfds
import math
import datetime 

In [11]:
# model of network
class TwinModel(tf.keras.Model):

    # hyperparameters are passed for instantiation 
    # to make the model esasily adjustable 
    def __init__(self, hidden_activation, output_activation, optimizer, accuracy, loss_function, depth):
        
        # initialzie superclass
        super().__init__()
        
        
        # initialize optimizer, metric and loss function
        self.optimizer = optimizer
        self.metrics_list = [
            tf.keras.metrics.Mean(name="mean"),
            accuracy
            ] 
        self.loss_function = loss_function

        # define layers
        self.layer1 = tf.keras.layers.Dense(128, activation=hidden_activation)
        self.layer2 = tf.keras.layers.Dense(128, activation=hidden_activation)
        self.output_layer = tf.keras.layers.Dense(depth, activation=output_activation)
    
    # passes signal through network and calculates output
    def call(self, images, training=False):
                
        img1, img2 = images
        
        # forward pass with first image
        x1 = self.layer1(img1)
        x1 = self.layer2(x1)

        # forward pass with second image
        x2 = self.layer1(img2)
        x2 = self.layer2(x2)     

        combined_x = tf.concat([x1, x2], axis=1)

        return self.output_layer(combined_x)
           
    def reset_metrics(self):
        
        for metric in self.metrics:
            metric.reset_states()

    # performs trainstep by calculating loss and gradients and applying the gradients to weights
    # and biases (trainable variables)      
    @tf.function
    def train_step(self, data):
        
        img1, img2, targets = data
        
        with tf.GradientTape() as tape:
            predictions = self((img1, img2), training=True)
            
            loss = self.loss_function(targets, predictions) + tf.reduce_sum(self.losses)
        
        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        
        # update loss metric
        self.metrics[0].update_state(loss)
        
        # for all metrics except loss, update states (accuracy etc.)
        for metric in self.metrics[1:]:
            metric.update_state(targets,predictions)

        # Return a dictionary mapping metric names to current value
        return {m.name: m.result() for m in self.metrics}

    @tf.function
    def test_step(self, data):

        img1, img2, targets = data
        predictions = self((img1, img2), training=False)
        loss = self.loss_function(targets, predictions) + tf.reduce_sum(self.losses)

        self.metrics[0].update_state(loss)
        # for accuracy metrics:
        for metric in self.metrics[1:]:
            metric.update_state(targets, predictions)

        return {m.name: m.result() for m in self.metrics}

In [12]:
# Define where to save the log
config_name= "config_name"
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

train_log_path = f"logs/{config_name}/{current_time}/train"
val_log_path = f"logs/{config_name}/{current_time}/val"

# log writer for training metrics
train_summary_writer = tf.summary.create_file_writer(train_log_path)

# log writer for validation metrics
val_summary_writer = tf.summary.create_file_writer(val_log_path)

In [13]:
import pprint
import tqdm

def training_loop(model, train_ds, val_ds, epochs, train_summary_writer, val_summary_writer):
    
    for epoch in range(epochs):
        print(f"Epoch {epoch}:")
        
        # Training:
        for data in tqdm.tqdm(train_ds, position=0, leave=True):
            metrics = model.train_step(data)
            
            # logging the validation metrics to the log file which is used by tensorboard
            with train_summary_writer.as_default():
                for metric in model.metrics:
                    tf.summary.scalar(f"{metric.name}", metric.result(), step=epoch)

        # print the metrics
        print([f"{key}: {value.numpy()}" for (key, value) in metrics.items()])

        # reset all metrics (requires a reset_metrics method in the model)
        model.reset_metrics()    
        
        # Validation:
        for data in val_ds:
            metrics = model.test_step(data)
        
            # logging the validation metrics to the log file which is used by tensorboard
            with val_summary_writer.as_default():
                for metric in model.metrics:
                    tf.summary.scalar(f"{metric.name}", metric.result(), step=epoch)
                    
        print([f"val_{key}: {value.numpy()}" for (key, value) in metrics.items()])

        # reset all metrics
        model.reset_metrics()
        print("\n")

In [14]:
# write function to create the dataset that we want
def preprocess(data, batch_size, target_function, depth):
  

  data = data.map(lambda x, t: ((tf.cast(tf.reshape(x, (-1,)), tf.float32)/128.)-1.,t))

  # we want to have two mnist images in each example
  zipped_ds = tf.data.Dataset.zip((data.shuffle(2000), data.shuffle(2000)))
  # for subtask a: map ((x1,y1),(x2,y2)) to (x1,x2, y1+y2 >= 5*) *boolean
  # for loss use binary cross entropy and for activation function sigmoid

  # for subtask b: output layer is vector for values betweeen -9 and +9 -> turn into one_hot_vector
  # for loss use categorical cross entropy and for activation function softmax
  zipped_ds = zipped_ds.map(lambda x1, x2: (x1[0], x2[0], target_function(x1[1], x2[1])))
  zipped_ds = zipped_ds.map(lambda x1, x2, target: (x1, x2, tf.one_hot(target, depth=depth)))

  # batch the dataset
  zipped_ds = zipped_ds.batch(batch_size)
  # prefetch
  zipped_ds = zipped_ds.prefetch(tf.data.AUTOTUNE)

  return zipped_ds

In [15]:
# Initialize the functions for model.
a = lambda x1, x2: int(x1 + x2 >= 5)
b = lambda x1, x2: x1 - x2

# sets different hyperparamters for the different subtasks
# initializes respective model and calls training_loop
def subtask(option):

    # get mnist from tensorflow_datasets
    train_ds, val_ds = tfds.load("mnist", split =["train","test"], as_supervised=True)
    
    # check which model is asked for and initialize corresponding parameters
    # hyperparameters for subtask a
    if option == "a":
        hidden_activation = tf.nn.sigmoid
        output_activation = tf.nn.sigmoid
        optimizer = tf.keras.optimizers.Adam(0.001)
        loss_function = tf.keras.losses.BinaryCrossentropy()
        depth = 1
        train_ds = preprocess(train_ds, batch_size=32, target_function=a, depth=depth) #train_ds.apply(preprocess)
        val_ds = preprocess(val_ds, batch_size=32, target_function=a, depth=depth) #val_ds.apply(preprocess)
        accuracy = tf.keras.metrics.BinaryAccuracy(name="acc")

    # hyperparamteres for subtask b
    if option == "b":
        hidden_activation = tf.nn.relu
        output_activation = tf.nn.sigmoid
        optimizer = tf.keras.optimizers.SGD(0.001)
        loss_function = tf.keras.losses.CategoricalCrossentropy()
        depth=19
        train_ds = preprocess(train_ds, batch_size=32, target_function=b, depth=depth) #train_ds.apply(preprocess)
        val_ds = preprocess(val_ds, batch_size=32, target_function=b, depth=depth) #val_ds.apply(preprocess)
        accuracy = tf.keras.metrics.CategoricalAccuracy(name="acc")


    model = TwinModel(hidden_activation, output_activation, optimizer, accuracy, loss_function, depth)
        
    training_loop(model=model,
                  train_ds=train_ds,
                  val_ds=val_ds,
                  epochs=10,
                  train_summary_writer=train_summary_writer,
                  val_summary_writer=val_summary_writer,
                  )

    print("Training finished.")

In [9]:
subtask("a")

Downloading and preparing dataset 11.06 MiB (download: 11.06 MiB, generated: 21.00 MiB, total: 32.06 MiB) to ~/tensorflow_datasets/mnist/3.0.1...


Dl Completed...:   0%|          | 0/4 [00:00<?, ? file/s]

Dataset mnist downloaded and prepared to ~/tensorflow_datasets/mnist/3.0.1. Subsequent calls will reuse this data.
Epoch 0:


100%|██████████| 1875/1875 [00:20<00:00, 91.05it/s] 


['mean: 0.20400238037109375', 'acc: 0.9207666516304016']
['val_mean: 0.14142201840877533', 'val_acc: 0.9456999897956848']


Epoch 1:


100%|██████████| 1875/1875 [00:12<00:00, 144.89it/s]


['mean: 0.12868772447109222', 'acc: 0.9519833326339722']
['val_mean: 0.11819861084222794', 'val_acc: 0.9602000117301941']


Epoch 2:


100%|██████████| 1875/1875 [00:12<00:00, 146.55it/s]


['mean: 0.11427542567253113', 'acc: 0.9591666460037231']
['val_mean: 0.10275159031152725', 'val_acc: 0.9628999829292297']


Epoch 3:


100%|██████████| 1875/1875 [00:13<00:00, 135.46it/s]


['mean: 0.10257577151060104', 'acc: 0.9640499949455261']
['val_mean: 0.10203292220830917', 'val_acc: 0.9621000289916992']


Epoch 4:


100%|██████████| 1875/1875 [00:20<00:00, 91.57it/s] 


['mean: 0.09750419110059738', 'acc: 0.9664999842643738']
['val_mean: 0.10596373677253723', 'val_acc: 0.9635999798774719']


Epoch 5:


100%|██████████| 1875/1875 [00:20<00:00, 91.56it/s] 


['mean: 0.09654384106397629', 'acc: 0.9685500264167786']
['val_mean: 0.09232468903064728', 'val_acc: 0.9739999771118164']


Epoch 6:


100%|██████████| 1875/1875 [00:12<00:00, 145.88it/s]


['mean: 0.08872833847999573', 'acc: 0.9706666469573975']
['val_mean: 0.09616091102361679', 'val_acc: 0.9695000052452087']


Epoch 7:


100%|██████████| 1875/1875 [00:20<00:00, 91.56it/s] 


['mean: 0.08915281295776367', 'acc: 0.9711833596229553']
['val_mean: 0.09101276844739914', 'val_acc: 0.9728999733924866']


Epoch 8:


100%|██████████| 1875/1875 [00:20<00:00, 91.52it/s] 


['mean: 0.08452092856168747', 'acc: 0.9735666513442993']
['val_mean: 0.09901247918605804', 'val_acc: 0.9690999984741211']


Epoch 9:


100%|██████████| 1875/1875 [00:20<00:00, 91.57it/s] 


['mean: 0.08291064947843552', 'acc: 0.9743499755859375']
['val_mean: 0.09624452888965607', 'val_acc: 0.9724000096321106']


Training finished.


In [16]:
subtask("b")

Epoch 0:


100%|██████████| 1875/1875 [00:19<00:00, 98.19it/s] 


['mean: 1.251694917678833', 'acc: 0.33079999685287476']
['val_mean: 1.182015299797058', 'val_acc: 0.49540001153945923']


Epoch 1:


100%|██████████| 1875/1875 [00:12<00:00, 148.94it/s]


['mean: 1.1680537462234497', 'acc: 0.5353500247001648']
['val_mean: 1.1658536195755005', 'val_acc: 0.550599992275238']


Epoch 2:


100%|██████████| 1875/1875 [00:20<00:00, 91.58it/s] 


['mean: 1.1488704681396484', 'acc: 0.5539666414260864']
['val_mean: 1.1448982954025269', 'val_acc: 0.5514000058174133']


Epoch 3:


100%|██████████| 1875/1875 [00:12<00:00, 149.66it/s]


['mean: 1.138388991355896', 'acc: 0.5508000254631042']
['val_mean: 1.1241521835327148', 'val_acc: 0.5523999929428101']


Epoch 4:


100%|██████████| 1875/1875 [00:20<00:00, 91.56it/s] 


['mean: 1.124572992324829', 'acc: 0.5499833226203918']
['val_mean: 1.117884874343872', 'val_acc: 0.5382000207901001']


Epoch 5:


100%|██████████| 1875/1875 [00:12<00:00, 151.74it/s]


['mean: 1.1073075532913208', 'acc: 0.4453999996185303']
['val_mean: 1.093227744102478', 'val_acc: 0.2939999997615814']


Epoch 6:


100%|██████████| 1875/1875 [00:12<00:00, 150.80it/s]


['mean: 1.0800666809082031', 'acc: 0.32214999198913574']
['val_mean: 1.071268916130066', 'val_acc: 0.3675000071525574']


Epoch 7:


100%|██████████| 1875/1875 [00:12<00:00, 148.51it/s]


['mean: 1.0596716403961182', 'acc: 0.382750004529953']
['val_mean: 1.046690583229065', 'val_acc: 0.4207000136375427']


Epoch 8:


100%|██████████| 1875/1875 [00:20<00:00, 91.57it/s] 


['mean: 1.038266897201538', 'acc: 0.42551666498184204']
['val_mean: 1.030623435974121', 'val_acc: 0.48410001397132874']


Epoch 9:


100%|██████████| 1875/1875 [00:20<00:00, 91.57it/s] 


['mean: 1.0276178121566772', 'acc: 0.44448333978652954']
['val_mean: 1.0045217275619507', 'val_acc: 0.4571000039577484']


Training finished.
