In [1]:
"""
@authors: faurand, chardes, ehagensieker
"""
import tensorflow_datasets as tfds
import tensorflow as tf
import numpy as np 

#load the mnist dataset
(train_ds, test_ds) = tfds.load('mnist', split=['train', 'test'], as_supervised=True)


Downloading and preparing dataset 11.06 MiB (download: 11.06 MiB, generated: 21.00 MiB, total: 32.06 MiB) to ~/tensorflow_datasets/mnist/3.0.1...


Dl Completed...:   0%|          | 0/4 [00:00<?, ? file/s]

Dataset mnist downloaded and prepared to ~/tensorflow_datasets/mnist/3.0.1. Subsequent calls will reuse this data.


In [2]:
import math
import datetime

# in a notebook, load the tensorboard extension, not needed for scripts
%load_ext tensorboard

In [7]:
def calc(task,mnist):
  '''
  define the two subtasks which differ there target values

  Args: 
    task(int): which task to perform
    mnist(array): dataset to be used for the task
  Return: 
    mnist(array): preprocessed dataset
  '''
  if task == 1:
    mnist = mnist.map(lambda pic1,pic2: (pic1[0],pic2[0],tf.cast((pic1[1] + pic2[1] >= 5), tf.int32)))
    return mnist
  if task == 2:
    mnist = mnist.map(lambda pic1,pic2: (pic1[0],pic2[0], tf.cast((pic1[1]-pic2[1]), tf.int32)))
    return mnist




In [8]:
def prepare_mnist_data(task,batch_size,mnist):
  
  #flatten the images into one dimensional vector
  mnist = mnist.map(lambda img, target: (tf.reshape(img, (-1,)), target))
  #convert data from uint8 to float32
  mnist = mnist.map(lambda img, target: (tf.cast(img, tf.float32), target))
  #normalization to have input of range [-1,1]
  mnist = mnist.map(lambda img, target: ((img/128.)-1., target))
  #create a tuple with shuffled data points 
  mnist = tf.data.Dataset.zip((mnist.shuffle(2000), mnist.shuffle(2000)))
  #create a triple depending on the task to be performed
  mnist = calc(task,mnist)
  #cache this progress in memory - improve performance
  mnist = mnist.cache()
  #shuffle, batch, prefetch
  mnist = mnist.shuffle(1000)
  mnist = mnist.batch(batch_size)
  mnist = mnist.prefetch(20)

  #return preprocessed dataset
  return mnist

#apply the preprocessing to both data sets
# batch_size = 32
# train_1 = prepare_mnist_data(1,batch_size,train_ds)
# test_1 = prepare_mnist_data(1,batch_size,test_ds)

# train_2 = prepare_mnist_data(2,batch_size,train_ds)
# test_2 = prepare_mnist_data(2,batch_size,test_ds)

In [9]:
#have a look at the triple 
# for i in train_1.take(1):
#   print("pic1:", i[0], "\npic2: ", i[1], "\ntarget: ",i[2], i[2].shape)

pic1: tf.Tensor(
[[-1. -1. -1. ... -1. -1. -1.]
 [-1. -1. -1. ... -1. -1. -1.]
 [-1. -1. -1. ... -1. -1. -1.]
 ...
 [-1. -1. -1. ... -1. -1. -1.]
 [-1. -1. -1. ... -1. -1. -1.]
 [-1. -1. -1. ... -1. -1. -1.]], shape=(32, 784), dtype=float32) 
pic2:  tf.Tensor(
[[-1. -1. -1. ... -1. -1. -1.]
 [-1. -1. -1. ... -1. -1. -1.]
 [-1. -1. -1. ... -1. -1. -1.]
 ...
 [-1. -1. -1. ... -1. -1. -1.]
 [-1. -1. -1. ... -1. -1. -1.]
 [-1. -1. -1. ... -1. -1. -1.]], shape=(32, 784), dtype=float32) 
target:  tf.Tensor([0 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1], shape=(32,), dtype=int32) (32,)


In [23]:
"""
@authors: faurand, chardes, ehagensieker
"""
class MyModel(tf.keras.Model):
  """
  The model we are using for predicting the outcome 
  """ 
  def __init__(self, task=1,optimizer=tf.keras.optimizers.Adam()):
    """
    Initializing the model 
    """
    #inherit from parent class
    super(MyModel, self).__init__()
    
    #depending on the task to perform we need different activation functions for the output layer 
    #and different metrics
    if task==1:
      self.metrics_list = [tf.keras.metrics.Mean(name = 'loss'),tf.keras.metrics.BinaryAccuracy(name = "acc")]
      self.loss_function = tf.keras.losses.BinaryCrossentropy()
      self.dense1 = tf.keras.layers.Dense(256, activation=tf.nn.relu)
      self.dense2 = tf.keras.layers.Dense(256, activation=tf.nn.relu)
      self.out_layer = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)

    elif task==2:
      self.metrics_list = [tf.keras.metrics.Mean(name = 'loss'),tf.keras.metrics.MeanSquaredLogarithmicError(name = "MSLE")]
      self.loss_function = tf.keras.losses.MeanSquaredError()
      #self.loss_function = tf.keras.losses.MeanSquaredLogarithmicError()
      self.dense1 = tf.keras.layers.Dense(256, activation= tf.nn.relu)
      self.dense2 = tf.keras.layers.Dense(256, activation= tf.nn.relu)
      self.out_layer = tf.keras.layers.Dense(1, activation= "linear")

    else: print("Exception")

    #self.learning_rate = 0.001
    self.optimizer = optimizer

  @tf.function 
  def call(self, images):
    """
    how to forward the images through the layer
    """
    img1, img2 = images 

    img1_x = self.dense1(img1)
    img1_x = self.dense2(img1_x)

    img2_x = self.dense1(img2)
    img2_x = self.dense2(img2_x)

    combined = tf.concat([img1_x, img2_x], axis = 1)
    out = self.out_layer(combined)

    return out

  @property
  def metrics(self):
    return self.metrics_list


  def reset_metric(self):
    for metric in self.metrics: 
      metric.reset_states()

  @tf.function 
  def train_step(self, data):
    img1, img2, t = data

    with tf.GradientTape() as tape:
      output = self((img1, img2), training = True)
      loss = self.loss_function(t, output)
    
    gradients = tape.gradient(loss, self.trainable_variables)
    self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))

    self.metrics[0].update_state(loss)
    
    for metric in self.metrics[1:]:
          metric.update_state(t,output)

    return {metric.name: metric.result() for metric in self.metrics}

  @tf.function
  def test_step(self, data):
    img1, img2, t = data

    output = self((img1,img2), training = False)
    loss = self.loss_function(t, output)

    self.metrics[0].update_state(loss)

    for metric in self.metrics[1:]:
          metric.update_state(t,output)
    
    return {metric.name: metric.result() for metric in self.metrics}


    


In [11]:
import tqdm
import pprint 

def training_loop(model, train_ds, val_ds, epochs, train_summary_writer, val_summary_writer): 
  #iterate over given amount of epochs
  for epoch in range(epochs): 
    print(f"Epoch {epoch}: ")

    #train on all batches of the training data
    for data in tqdm.tqdm(train_ds, position = 0, leave = True):

      metrics = model.train_step(data)

      with train_summary_writer.as_default(): 
        for metric in model.metrics: 
          tf.summary.scalar(f"{metric.name}", metric.result(), step=epoch)

    # print the metrics
    print([f"Train {key}: {value.numpy()}" for (key, value) in metrics.items()])

    #reset metric 
    model.reset_metrics()
  
    #evaluation on validation set
    for data in val_ds:
      metrics = model.test_step(data)

      with val_summary_writer.as_default():
        for metric in model.metrics:
          tf.summary.scalar(f"{metric.name}", metric.result(), step=epoch)
      
    # print the metrics
    print([f"Test {key}: {value.numpy()}" for (key, value) in metrics.items()])

    #reset metric
    model.reset_metric()

    print("\n")
      
    

In [26]:
def run(task, train_summary_writer, val_summary_writer, optimizer=tf.keras.optimizers.Adam()):
  ### Hyperparameters
  num_epochs = 5
  batch_size = 32
  if task == 1:
    train = prepare_mnist_data(1,batch_size,train_ds)
    test = prepare_mnist_data(1,batch_size,test_ds)
  if task == 2:
    train = prepare_mnist_data(2,batch_size,train_ds)
    test = prepare_mnist_data(2,batch_size,test_ds)
    
  train_dataset = train
  test_dataset = test

  # Initialize the model.
  model = MyModel(task,optimizer)

  training_loop(model,train_dataset,test_dataset,num_epochs,train_summary_writer, val_summary_writer)

In [19]:
config_name = "config_name"
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

train_log_path = f"logs/{config_name}/{current_time}/train"
val_log_path = f"logs/{config_name}/{current_time}/val"

#log writer 
train_summary_writer = tf.summary.create_file_writer(train_log_path)

val_summary_writer = tf.summary.create_file_writer(val_log_path)




In [27]:
print("Task 1, optimizer=SGD, learning_rate=0.001\n")
run(1, train_summary_writer, val_summary_writer,tf.keras.optimizers.SGD(learning_rate=0.001))
print("\nTask 1, optimizer=Adam\n")
run(1, train_summary_writer, val_summary_writer)
print("Task 2, optimizer=SGD, learning_rate=0.001\n")
run(2, train_summary_writer, val_summary_writer,tf.keras.optimizers.SGD(learning_rate=0.001))
print("\nTask 2, optimizer=Adam\n")
run(2, train_summary_writer, val_summary_writer)


Task 2, optimizer=Adam

Epoch 0: 


100%|██████████| 1875/1875 [00:40<00:00, 45.76it/s] 


['Train loss: 2.95695161819458', 'Train MSLE: 0.15602430701255798']
['Test loss: 1.9282146692276', 'Test MSLE: 0.1061551421880722']


Epoch 1: 


100%|██████████| 1875/1875 [00:12<00:00, 146.47it/s]


['Train loss: 1.5113239288330078', 'Train MSLE: 0.08273565769195557']
['Test loss: 1.4748609066009521', 'Test MSLE: 0.08348338305950165']


Epoch 2: 


100%|██████████| 1875/1875 [00:20<00:00, 91.62it/s] 


['Train loss: 1.1835880279541016', 'Train MSLE: 0.06583007425069809']
['Test loss: 1.254608392715454', 'Test MSLE: 0.06959816813468933']


Epoch 3: 


100%|██████████| 1875/1875 [00:12<00:00, 146.62it/s]


['Train loss: 1.0046085119247437', 'Train MSLE: 0.0568152517080307']
['Test loss: 1.1661521196365356', 'Test MSLE: 0.06374502927064896']


Epoch 4: 


100%|██████████| 1875/1875 [00:20<00:00, 91.63it/s] 


['Train loss: 0.8588299751281738', 'Train MSLE: 0.04887939989566803']
['Test loss: 1.07757568359375', 'Test MSLE: 0.05715864524245262']


Epoch 5: 


100%|██████████| 1875/1875 [00:20<00:00, 91.61it/s] 


['Train loss: 0.7793079018592834', 'Train MSLE: 0.04429255798459053']
['Test loss: 1.0395427942276', 'Test MSLE: 0.05631794035434723']


Epoch 6: 


100%|██████████| 1875/1875 [00:20<00:00, 91.63it/s] 


['Train loss: 0.6993036270141602', 'Train MSLE: 0.04099111258983612']
['Test loss: 0.9624702334403992', 'Test MSLE: 0.049851562827825546']


Epoch 7: 


100%|██████████| 1875/1875 [00:20<00:00, 91.61it/s] 


['Train loss: 0.6261124610900879', 'Train MSLE: 0.03669197857379913']
['Test loss: 0.9438563585281372', 'Test MSLE: 0.05096692591905594']


Epoch 8: 


100%|██████████| 1875/1875 [00:20<00:00, 91.62it/s] 


['Train loss: 0.5640623569488525', 'Train MSLE: 0.03368149325251579']
['Test loss: 1.0401047468185425', 'Test MSLE: 0.0533425398170948']


Epoch 9: 


100%|██████████| 1875/1875 [00:12<00:00, 149.38it/s]


['Train loss: 0.5368348956108093', 'Train MSLE: 0.03177974745631218']
['Test loss: 1.0000885725021362', 'Test MSLE: 0.05349073186516762']


Epoch 10: 


100%|██████████| 1875/1875 [00:20<00:00, 91.62it/s] 


['Train loss: 0.4928482174873352', 'Train MSLE: 0.029231611639261246']
['Test loss: 0.9786913394927979', 'Test MSLE: 0.05111848935484886']


Epoch 11: 


100%|██████████| 1875/1875 [00:20<00:00, 91.61it/s] 


['Train loss: 0.4623214900493622', 'Train MSLE: 0.027642900124192238']
['Test loss: 0.8883705139160156', 'Test MSLE: 0.0463411845266819']


Epoch 12: 


100%|██████████| 1875/1875 [00:20<00:00, 91.62it/s] 


['Train loss: 0.4246049225330353', 'Train MSLE: 0.025830697268247604']
['Test loss: 0.9230616688728333', 'Test MSLE: 0.04527357220649719']


Epoch 13: 


100%|██████████| 1875/1875 [00:12<00:00, 148.79it/s]


['Train loss: 0.42106378078460693', 'Train MSLE: 0.026334106922149658']
['Test loss: 0.9332844018936157', 'Test MSLE: 0.04890970513224602']


Epoch 14: 


100%|██████████| 1875/1875 [00:20<00:00, 91.63it/s] 


['Train loss: 0.3841831684112549', 'Train MSLE: 0.0235282015055418']
['Test loss: 0.9159929156303406', 'Test MSLE: 0.04683277755975723']


